Prometheus job config.
This commit is contained in:
166
services/prometheus.hcl
Normal file
166
services/prometheus.hcl
Normal file
@@ -0,0 +1,166 @@
|
||||
job "prometheus" {
|
||||
datacenters = ["alo"]
|
||||
type = "service"
|
||||
|
||||
meta {
|
||||
version = "2"
|
||||
}
|
||||
|
||||
group "monitoring" {
|
||||
count = 1
|
||||
|
||||
network {
|
||||
port "http" {
|
||||
#host_network = "tailscale"
|
||||
static = "9090"
|
||||
}
|
||||
}
|
||||
|
||||
task "prometheus" {
|
||||
driver = "docker"
|
||||
|
||||
service {
|
||||
name = "prometheus"
|
||||
port = "http"
|
||||
tags = [
|
||||
"traefik.enable=true",
|
||||
"traefik.http.routers.prometheus.entryPoints=websecure",
|
||||
]
|
||||
|
||||
check {
|
||||
type = "http"
|
||||
path = "/-/healthy"
|
||||
name = "http"
|
||||
interval = "5s"
|
||||
timeout = "2s"
|
||||
}
|
||||
}
|
||||
|
||||
# main configuration file
|
||||
template {
|
||||
data = <<EOH
|
||||
#alerting:
|
||||
# alertmanagers:
|
||||
# - static_configs:
|
||||
# - targets:
|
||||
# - alertmanager.service.home:9093
|
||||
|
||||
rule_files:
|
||||
- "alerts.yml"
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'prometheus'
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
|
||||
- job_name: 'metrics'
|
||||
scrape_interval: 5s
|
||||
metrics_path: /metrics
|
||||
consul_sd_configs:
|
||||
- server: '{{ env "NOMAD_IP_http" }}:8500'
|
||||
tags: ['metrics']
|
||||
scheme: http
|
||||
relabel_configs:
|
||||
- source_labels: ['__meta_consul_dc']
|
||||
target_label: 'dc'
|
||||
- source_labels: [__meta_consul_service]
|
||||
target_label: 'job'
|
||||
- source_labels: ['__meta_consul_node']
|
||||
target_label: 'host'
|
||||
- source_labels: ['__meta_consul_tags']
|
||||
target_label: 'tags'
|
||||
- source_labels: ['__meta_consul_tags']
|
||||
regex: '.*job-(.*?)(,.*)'
|
||||
replacement: '${1}'
|
||||
target_label: 'job_name'
|
||||
|
||||
- job_name: 'consul-server'
|
||||
scrape_interval: 10s
|
||||
metrics_path: /v1/agent/metrics
|
||||
honor_labels: true
|
||||
params:
|
||||
format: ['prometheus']
|
||||
consul_sd_configs:
|
||||
- server: '{{ env "NOMAD_IP_http" }}:8500'
|
||||
services: ['nomad-client']
|
||||
scheme: http
|
||||
relabel_configs:
|
||||
- source_labels: ['__meta_consul_dc']
|
||||
target_label: 'dc'
|
||||
- source_labels: ['__meta_consul_node']
|
||||
target_label: 'host'
|
||||
- source_labels: ['__meta_consul_tags']
|
||||
target_label: 'tags'
|
||||
- source_labels: [__address__]
|
||||
action: replace
|
||||
regex: ([^:]+):.*
|
||||
replacement: $1:8500
|
||||
target_label: __address__
|
||||
|
||||
- job_name: 'nomad'
|
||||
consul_sd_configs:
|
||||
- server: '{{ env "NOMAD_IP_http" }}:8500'
|
||||
services: ['nomad-client']
|
||||
tags: ['http']
|
||||
scheme: http
|
||||
scrape_interval: 10s
|
||||
metrics_path: /v1/metrics
|
||||
params:
|
||||
format: ['prometheus']
|
||||
relabel_configs:
|
||||
- source_labels: ['__meta_consul_dc']
|
||||
target_label: 'dc'
|
||||
- source_labels: [__meta_consul_service]
|
||||
target_label: 'job'
|
||||
- source_labels: ['__meta_consul_node']
|
||||
target_label: 'host'
|
||||
|
||||
EOH
|
||||
|
||||
destination = "local/prometheus.yml"
|
||||
change_mode = "signal"
|
||||
change_signal = "SIGHUP"
|
||||
env = false
|
||||
}
|
||||
|
||||
template {
|
||||
change_mode = "noop"
|
||||
destination = "local/alerts.yml"
|
||||
left_delimiter = "[["
|
||||
right_delimiter = "]]"
|
||||
data = <<EOH
|
||||
---
|
||||
groups:
|
||||
- name: prometheus_alerts
|
||||
rules:
|
||||
- alert: Traefik Down
|
||||
expr: absent(nomad_client_allocs_cpu_user{task="traefik"})
|
||||
for: 2m
|
||||
labels:
|
||||
severity: page
|
||||
annotations:
|
||||
description: "Traefik is down."
|
||||
EOH
|
||||
}
|
||||
|
||||
|
||||
config {
|
||||
image = "prom/prometheus:v2.44.0"
|
||||
network_mode = "host"
|
||||
args = ["--storage.tsdb.path", "/opt/prometheus", "--web.listen-address", "0.0.0.0:9090", "--storage.tsdb.retention.time", "900d"]
|
||||
force_pull = true
|
||||
ports = ["http"]
|
||||
volumes = [
|
||||
"local/alerts.yml:/prometheus/alerts.yml",
|
||||
"local/prometheus.yml:/prometheus/prometheus.yml",
|
||||
"/data/compute/appdata/prometheus:/opt/prometheus",
|
||||
]
|
||||
}
|
||||
|
||||
resources {
|
||||
cpu = 1000
|
||||
memory = 512
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user