diff --git a/modules/swarm/prometheus/cfg/prometheus.yml b/modules/swarm/prometheus/cfg/prometheus.yml new file mode 100644 index 0000000..fc0abef --- /dev/null +++ b/modules/swarm/prometheus/cfg/prometheus.yml @@ -0,0 +1,73 @@ +--- +global: + scrape_interval: 5s + evaluation_interval: 15s + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + - job_name: c3voc + scrape_interval: 30s + scheme: https + static_configs: + - targets: ["monitoring.c3voc.de:443"] + + - job_name: vector + static_configs: + - targets: ["ax41-1.fsn.mon2.de:28668"] + + - job_name: node + static_configs: + - targets: + - ingest-fsn.chaoswest.tv:9100 + - ingest-nbg.chaoswest.tv:9100 + - oldportier.chaoswest.tv:9100 + + - job_name: srs + static_configs: + - targets: + - ingest-fsn.chaoswest.tv:9185 + - ingest-nbg.chaoswest.tv:9185 + + - job_name: ffmpeg + scrape_interval: 1s + static_configs: + - targets: + - oldportier.chaoswest.tv:2342 + + - job_name: "dockerswarm" + dockerswarm_sd_configs: + - host: tcp://docker:2375 + role: tasks + relabel_configs: + # Only keep containers that should be running. + - source_labels: [__meta_dockerswarm_task_desired_state] + regex: running + action: keep + # Only keep containers that have a `prometheus-job` label. + - source_labels: [__meta_dockerswarm_service_label_prometheus_job] + regex: .+ + action: keep + # Use the prometheus-job Swarm label as Prometheus job label. + - source_labels: [__meta_dockerswarm_service_label_prometheus_job] + target_label: job + # Only scrape within the metrics network + - source_labels: [__meta_dockerswarm_network_name] + regex: metrics + action: keep + # Fix address and use the label defined listening port + - source_labels: + [__address__, __meta_dockerswarm_service_label_prometheus_port] + separator: ":" + regex: "(.*):.*:(.*)" + target_label: __address__ + replacement: "${1}:${2}" + # if this is a node exporter, use the node hostname as the instance label + - source_labels: + [__meta_dockerswarm_service_name, __meta_dockerswarm_node_hostname] + separator: ":" + regex: "prometheus-node-exporter:(.*)" + target_label: instance + replacement: "${1}" diff --git a/modules/swarm/prometheus/docker-socket-proxy.tf b/modules/swarm/prometheus/docker-socket-proxy.tf new file mode 100644 index 0000000..717917a --- /dev/null +++ b/modules/swarm/prometheus/docker-socket-proxy.tf @@ -0,0 +1,54 @@ +# Prometheus has switched to running with 'nobody' user, which doesn't have +# access to the docker socket. This service runs a proxy that allows prometheus +# to access the docker socket. + +# Since it internally uses HAProxy, it can also restrict access to parts of the +# docker API. By default, everything but the parts allowed in the envs below +# is restricted. + +# Nice bonus: it also would allow us to run prometheus on a non-manager node. + + +data "docker_registry_image" "docker_socket_proxy" { + name = "ghcr.io/tecnativa/docker-socket-proxy:0.1" +} + +resource "docker_service" "docker_socket_proxy" { + name = "prometheus_docker_socket_proxy" + + mode { + global = true + } + + task_spec { + placement { + constraints = [ + "node.role==manager", + ] + } + + networks_advanced { + name = docker_network.docker_socket_proxy.id + aliases = [ + "docker", + ] + } + + container_spec { + image = "${data.docker_registry_image.docker_socket_proxy.name}@${data.docker_registry_image.docker_socket_proxy.sha256_digest}" + + env = { + NODES = "1" + NETWORKS = "1" + SERVICES = "1" + TASKS = "1" + } + + mounts { + target = "/var/run/docker.sock" + source = "/var/run/docker.sock" + type = "bind" + } + } + } +} diff --git a/modules/swarm/prometheus/main.tf b/modules/swarm/prometheus/main.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/swarm/prometheus/network.tf b/modules/swarm/prometheus/network.tf new file mode 100644 index 0000000..3c6c122 --- /dev/null +++ b/modules/swarm/prometheus/network.tf @@ -0,0 +1,23 @@ +data "docker_network" "traefik" { + name = "traefik" +} + +resource "docker_network" "metrics" { + name = "metrics" + attachable = true + driver = "overlay" + + lifecycle { + ignore_changes = [labels] + } +} + +resource "docker_network" "docker_socket_proxy" { + name = "prometheus_docker_socket_proxy" + attachable = true + driver = "overlay" + + lifecycle { + ignore_changes = [labels] + } +} diff --git a/modules/swarm/prometheus/node-exporter.tf b/modules/swarm/prometheus/node-exporter.tf new file mode 100644 index 0000000..15941a9 --- /dev/null +++ b/modules/swarm/prometheus/node-exporter.tf @@ -0,0 +1,64 @@ +data "docker_registry_image" "node_exporter" { + name = "prom/node-exporter" +} + +locals { + labels_node_exporter = { + "shepherd.auto-update" = "true", + "prometheus.job" = "node", + "prometheus.port" = "9100", + } +} + +resource "docker_service" "node_exporter" { + name = "prometheus_node_exporter" + + mode { + global = true + } + + dynamic "labels" { + for_each = local.labels_node_exporter + content { + label = labels.key + value = labels.value + } + } + + task_spec { + networks_advanced { + name = docker_network.metrics.id + } + + container_spec { + image = "${data.docker_registry_image.node_exporter.name}@${data.docker_registry_image.node_exporter.sha256_digest}" + + args = [ + "--path.rootfs=/host/root", + "--path.procfs=/host/proc", + "--path.sysfs=/host/sys", + "--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($|/)" + ] + mounts { + target = "/host/root" + source = "/" + type = "bind" + read_only = true + } + + mounts { + target = "/host/proc" + source = "/proc" + type = "bind" + read_only = true + } + + mounts { + target = "/host/sys" + source = "/sys" + type = "bind" + read_only = true + } + } + } +} diff --git a/modules/swarm/prometheus/outputs.tf b/modules/swarm/prometheus/outputs.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/swarm/prometheus/prometheus.tf b/modules/swarm/prometheus/prometheus.tf new file mode 100644 index 0000000..21b0767 --- /dev/null +++ b/modules/swarm/prometheus/prometheus.tf @@ -0,0 +1,87 @@ +data "docker_registry_image" "prometheus" { + name = "prom/prometheus" +} + +locals { + labels_prometheus = { + "shepherd.auto-update" = "true", + "traefik.enable" = "true" + "traefik.http.services.prometheus.loadbalancer.server.port" = "9090", + "traefik.http.routers.prometheus.rule" = "Host(`prometheus.chaoswest.tv`)", + "traefik.http.routers.prometheus.tls" = "true", + "traefik.http.routers.prometheus.tls.certresolver" = "default", + "traefik.http.routers.prometheus.middlewares" = "prometheus-auth", + "traefik.http.middlewares.prometheus-auth.basicauth.users" = "prometheus:$2y$10$XK9vcKzVol9ZWJLiSbKruuFP2jBsVrFY8Vc4ANtm6JnhsXgbnfLYm" + } +} + +resource "docker_config" "prometheus" { + name = "prometheus-yml-${replace(timestamp(), ":", ".")}" + data = base64encode(file("${path.module}/cfg/prometheus.yml")) + lifecycle { + ignore_changes = [name] + create_before_destroy = true + } +} + +resource "docker_service" "prometheus" { + name = "prometheus" + + dynamic "labels" { + for_each = local.labels_prometheus + content { + label = labels.key + value = labels.value + } + } + + task_spec { + networks_advanced { + name = data.docker_network.traefik.id + } + + networks_advanced { + name = docker_network.metrics.id + } + + networks_advanced { + name = docker_network.docker_socket_proxy.id + } + + container_spec { + image = "${data.docker_registry_image.prometheus.name}@${data.docker_registry_image.prometheus.sha256_digest}" + + configs { + config_id = docker_config.prometheus.id + config_name = docker_config.prometheus.name + file_name = "/etc/prometheus/prometheus.yml" + file_uid = "0" + file_gid = "0" + file_mode = "0444" + } + + mounts { + target = "/prometheus" + source = "/mnt/data/prometheus/" + type = "bind" + } + + mounts { + target = "/var/run/docker.sock" + source = "/var/run/docker.sock" + type = "bind" + } + } + } +} + +data "hetznerdns_zone" "primary" { + name = "chaoswest.tv" +} + +resource "hetznerdns_record" "primary" { + zone_id = data.hetznerdns_zone.primary.id + name = "prometheus" + value = "ax41-1.fsn.mon2.de." + type = "CNAME" +} diff --git a/modules/swarm/prometheus/variables.tf b/modules/swarm/prometheus/variables.tf new file mode 100644 index 0000000..e69de29 diff --git a/modules/swarm/prometheus/version.tf b/modules/swarm/prometheus/version.tf new file mode 100644 index 0000000..be1d72e --- /dev/null +++ b/modules/swarm/prometheus/version.tf @@ -0,0 +1,13 @@ +terraform { + required_version = "1.5.5" + required_providers { + hetznerdns = { + source = "timohirt/hetznerdns" + version = "~>2.2" + } + docker = { + source = "kreuzwerker/docker" + version = "~>3.0" + } + } +} diff --git a/stacks/ax41-1/.terraform.lock.hcl b/stacks/ax41-1/.terraform.lock.hcl index 6d832dd..5f3f22e 100644 --- a/stacks/ax41-1/.terraform.lock.hcl +++ b/stacks/ax41-1/.terraform.lock.hcl @@ -20,6 +20,7 @@ provider "registry.terraform.io/kreuzwerker/docker" { version = "3.0.2" constraints = "~> 3.0" hashes = [ + "h1:cT2ccWOtlfKYBUE60/v2/4Q6Stk1KYTNnhxSck+VPlU=", "h1:tryCE8s9BiT6VyfnGgU1mUt9s0HcCKlRERdLd2fr010=", "zh:15b0a2b2b563d8d40f62f83057d91acb02cd0096f207488d8b4298a59203d64f", "zh:23d919de139f7cd5ebfd2ff1b94e6d9913f0977fcfc2ca02e1573be53e269f95", diff --git a/stacks/ax41-1/README.md b/stacks/ax41-1/README.md index 2267f45..b716e56 100644 --- a/stacks/ax41-1/README.md +++ b/stacks/ax41-1/README.md @@ -23,6 +23,7 @@ | [grafana](#module\_grafana) | ../../modules/swarm/grafana | n/a | | [hedgedoc](#module\_hedgedoc) | ../../modules/swarm/hedgedoc | n/a | | [jitsi](#module\_jitsi) | ../../modules/swarm/jitsi | n/a | +| [prometheus](#module\_prometheus) | ../../modules/swarm/prometheus | n/a | | [shepherd](#module\_shepherd) | ../../modules/swarm/shepherd | n/a | | [shit](#module\_shit) | ../../modules/swarm/shit | n/a | | [spaceapi](#module\_spaceapi) | ../../modules/swarm/spaceapi | n/a | diff --git a/stacks/ax41-1/main.tf b/stacks/ax41-1/main.tf index 9affe5a..fafaf99 100644 --- a/stacks/ax41-1/main.tf +++ b/stacks/ax41-1/main.tf @@ -37,3 +37,7 @@ module "spaceapi" { module "forgejo" { source = "../../modules/swarm/forgejo" } + +module "prometheus" { + source = "../../modules/swarm/prometheus" +} diff --git a/stacks/ax41-1/secrets.enc.yaml b/stacks/ax41-1/secrets.enc.yaml index 22e7fba..c8b8bc5 100644 --- a/stacks/ax41-1/secrets.enc.yaml +++ b/stacks/ax41-1/secrets.enc.yaml @@ -32,8 +32,8 @@ sops: WmlRUnowa2lMNWpDT0xEU0htV0w3U00K1f/SO/FBvC9lIBzveBEwhopj5ryMVCmD jw8AdxvmMwsCSfIROKkzMqiUs2zsj6FOMlYFI1Rb07mItSO2Yd7TsA== -----END AGE ENCRYPTED FILE----- - lastmodified: "2024-01-25T17:33:50Z" - mac: ENC[AES256_GCM,data:VUSGKproAmU286+mf9nM/IzddWcT18/6tk73guFUB6C3Turfv9DXDW1wSXu6vTxGlOinChxlcBCnsGfk7gfFisjAEZHJq9IJ0P2myYp+lKbybolm0fbTZ4jda7DUnvN3n4I0EqFoUa/vPN/DSkdt0hKj1Ayz5AdFIAvOtkphMPA=,iv:dHhMBT3T3bWPSUadDgo+h2KSf2qC32q+nM26eK9ivDo=,tag:30dNpRvB6vet5UoWy/DZtg==,type:str] + lastmodified: "2024-01-30T17:00:41Z" + mac: ENC[AES256_GCM,data:GvVUAo5Qtp0Dcnffh42jGkpT3khDRYXf6ws6Q3n2dWk+q39+xDQ3oxCGKMsYwbxrQ5s1oSI7dENSfxQA1Rwk3+Z0wmrRry9fxlYEnDeYLiR2Jxp0+7zDWUcusfSnjC/ASmwCYSFBcQM4jhD4uyVmhluS0E5KrjOD223Z6vtjxck=,iv:5OFMGJatztWTUR2Xb49CYl0Z42UsieYPTR6YoBn9UmM=,tag:yibooLYgoAN+IeiHVyexKg==,type:str] pgp: [] unencrypted_suffix: _unencrypted version: 3.8.1