From 873ae2c1357d9e54deb3604896f65b844ef4580b Mon Sep 17 00:00:00 2001 From: Jens Nolte <git@queezle.net> Date: Sun, 29 Jan 2023 18:52:43 +0100 Subject: [PATCH] WIP monitoring --- modules/monitoring/grafana-agent.nix | 105 +++++++++++++++++++++ modules/monitoring/idle-load.patch | 17 ++++ modules/monitoring/monitoring-server.nix | 115 +++++++++++++++++++++++ 3 files changed, 237 insertions(+) create mode 100644 modules/monitoring/grafana-agent.nix create mode 100644 modules/monitoring/idle-load.patch create mode 100644 modules/monitoring/monitoring-server.nix diff --git a/modules/monitoring/grafana-agent.nix b/modules/monitoring/grafana-agent.nix new file mode 100644 index 0000000..2f6e6ab --- /dev/null +++ b/modules/monitoring/grafana-agent.nix @@ -0,0 +1,105 @@ +{ config, lib, pkgs, ... }: +with lib; + +let + cfg = config.queezle.monitoring.grafana-agent; + settingsFormat = pkgs.formats.yaml { }; + configFile = settingsFormat.generate "grafana-agent.yaml" cfg.settings; + patched-grafana-agent = pkgs.grafana-agent.overrideAttrs (old: { + preBuild = '' + chmod +w vendor/github.com/prometheus/prometheus/tsdb/wlog/watcher.go + chmod +w vendor/github.com/prometheus/prometheus/tsdb/wlog/ + patch -p1 -i ${./idle-load.patch} + ''; + }); +in { + options.queezle.monitoring.grafana-agent = { + enable = lib.mkEnableOption "prometheus agent config"; + + remoteWriteUrl = mkOption { + type = types.str; + default = "http://prometheus:99/api/v1/write"; + }; + + settings = mkOption { + description = lib.mdDoc '' + Configuration for `grafana-agent`. + See https://grafana.com/docs/agent/latest/configuration/ + ''; + + type = types.submodule { + freeformType = settingsFormat.type; + }; + }; + }; + + config = mkIf cfg.enable { + + queezle.monitoring.grafana-agent.settings = { + metrics = { + wal_directory = "\${STATE_DIRECTORY}"; + + global.remote_write = [{ + url = cfg.remoteWriteUrl; + }]; + + #configs = [{ + # scrape_configs = ... + #}]; + }; + integrations = { + # Scrape metrics about the agent itself + agent = { + enabled = true; + scrape_integration = true; + instance = config.networking.hostName; + }; + + # Set up integrated node exporter + node_exporter = { + enabled = true; + scrape_integration = true; + instance = config.networking.hostName; + enable_collectors = [ + "systemd" + ]; + }; + }; + }; + + systemd.services.grafana-agent = { + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + ExecStart = "${patched-grafana-agent}/bin/agent -disable-reporting -config.expand-env -config.file ${configFile}"; + RestartSec = 10; + Restart = "always"; + User = "grafana-agent"; + Group = "grafana-agent"; + SupplementaryGroups = [ + # Allow to read the systemd journal for loki log forwarding + "systemd-journal" + ]; + StateDirectory = "grafana-agent"; + Type = "exec"; + + # NOTE: No DynamicUser since that prevents the node_exporter systemd + # integration from connecting to the socket (because the dynamic user + # has an unknown UID in the root UID namespace, so dbus access control + # fails). + ProtectSystem = "strict"; + ProtectHome = "tmpfs"; + RemoveIPC = true; + PrivateTmp = true; + NoNewPrivileges = true; + RestrictSUIDSGID = true; + }; + }; + + users.users.grafana-agent = { + isSystemUser = true; + group = "grafana-agent"; + }; + users.groups.grafana-agent = {}; + }; +} + diff --git a/modules/monitoring/idle-load.patch b/modules/monitoring/idle-load.patch new file mode 100644 index 0000000..435a093 --- /dev/null +++ b/modules/monitoring/idle-load.patch @@ -0,0 +1,17 @@ +diff --git a/vendor/github.com/prometheus/prometheus/tsdb/wlog/watcher.go b/vendor/github.com/prometheus/prometheus/tsdb/wlog/watcher.go +index 5d7c84d3..ef79f016 100644 +--- a/vendor/github.com/prometheus/prometheus/tsdb/wlog/watcher.go ++++ b/vendor/github.com/prometheus/prometheus/tsdb/wlog/watcher.go +@@ -34,9 +34,9 @@ import ( + ) + + const ( +- readPeriod = 10 * time.Millisecond +- checkpointPeriod = 5 * time.Second +- segmentCheckPeriod = 100 * time.Millisecond ++ readPeriod = 1 * time.Second ++ checkpointPeriod = 10 * time.Second ++ segmentCheckPeriod = 5 * time.Second + consumer = "consumer" + ) + diff --git a/modules/monitoring/monitoring-server.nix b/modules/monitoring/monitoring-server.nix new file mode 100644 index 0000000..03f28f3 --- /dev/null +++ b/modules/monitoring/monitoring-server.nix @@ -0,0 +1,115 @@ +{ config, lib, pkgs, ... }: +with lib; + +let + cfg = config.queezle.monitoring.server; +in { + options.queezle.monitoring.server.enable = lib.mkEnableOption "prometheus and grafana server"; + + config = mkIf cfg.enable { + + # Local agent simplifies and unifies node scraping + queezle.monitoring.grafana-agent.enable = true; + + services.grafana = { + enable = true; + settings = { + analytics = { + reporting_enabled = false; + check_for_updates = false; + }; + server = { + domain = "grafana.queezle.xyz"; + root_url = "https://grafana.queezle.xyz/"; + + #protocol = "http"; + #http_addr = "127.0.0.1"; + #http_port = 3000; + protocol = "socket"; + socket = "/run/nginx-grafana/grafana.sock"; + socket_mode = "0777"; + }; + }; + # Currently using sqlite database + #database = { + # type = "postgres"; + # user = "grafana"; + # host = "/var/run/postgresql/"; + #}; + provision.enable = true; + provision.datasources.settings.datasources = [ + { + name = "prometheus"; + type = "prometheus"; + url = "http://localhost:9090"; + } + ]; + #provision.dashboards = [ + # { + # name = "yaner dashboards"; + # options.path = ./dashboards; + # options.foldersFromFilesStructure = true; + # updateIntervalSeconds = 999999999; + # } + #]; + }; + systemd.tmpfiles.rules = [ "d /run/nginx-grafana 0750 grafana nginx" ]; + + services.prometheus = { + enable = true; + stateDir = "prometheus"; + #listenAddress = "127.0.0.1"; # port 9090 + extraFlags = [ + "--storage.tsdb.retention.size=32GB" + "--web.enable-remote-write-receiver" + ]; + scrapeConfigs = [ + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ "127.0.0.1:9090" ]; + labels.instance = config.networking.hostName; + } + ]; + } + { + job_name = "grafana"; + static_configs = [ + { + targets = [ "127.0.0.1:3000" ]; + labels.instance = config.networking.hostName; + } + ]; + } + ]; + }; + + # Reverse proxy for remote write endpoint + # Encrypted/authenticated by using wireguard and firewall rules + # (i.e. no HTTPS to isolate metrics from potential letsencrypt problems) + services.nginx = { + virtualHosts = { + "prometheus" = { + listen = [ + { + # TODO limit to vpn-only ip? + addr = "[::]"; + port = 99; + } + ]; + forceSSL = false; + locations = { + "/" = { + return = "404"; + }; + "= /api/v1/write" = { + proxyPass = "http://127.0.0.1:9090"; + proxyWebsockets = true; + }; + }; + }; + }; + }; + }; +} -- GitLab