diff --git a/nixos/modules/services/monitoring/thanos.nix b/nixos/modules/services/monitoring/thanos.nix index e6d8afc66624..db8641aa6146 100644 --- a/nixos/modules/services/monitoring/thanos.nix +++ b/nixos/modules/services/monitoring/thanos.nix @@ -1,14 +1,37 @@ { config, lib, pkgs, ... }: -with lib; - let + inherit (lib) + collect + concatLists + concatStringsSep + flip + getAttrFromPath + hasPrefix + isList + length + literalExpression + literalMD + mapAttrsRecursiveCond + mapAttrsToList + mdDoc + mkEnableOption + mkIf + mkMerge + mkOption + mkPackageOptionMD + optional + optionalAttrs + optionalString + types + ; + cfg = config.services.thanos; nullOpt = type: description: mkOption { type = types.nullOr type; default = null; - description = lib.mdDoc description; + description = mdDoc description; }; optionToArgs = opt: v : optional (v != null) ''--${opt}="${toString v}"''; @@ -32,7 +55,7 @@ let option = mkOption { type = types.bool; default = false; - description = lib.mdDoc description; + description = mdDoc description; }; }; @@ -41,7 +64,7 @@ let option = mkOption { type = types.listOf types.str; default = []; - description = lib.mdDoc description; + description = mdDoc description; }; }; @@ -50,7 +73,7 @@ let option = mkOption { type = types.attrsOf types.str; default = {}; - description = lib.mdDoc description; + description = mdDoc description; }; }; @@ -59,7 +82,7 @@ let option = mkOption { type = types.str; inherit default; - description = lib.mdDoc description; + description = mdDoc description; }; }; @@ -86,7 +109,7 @@ let defaultText = literalMD '' calculated from `config.services.thanos.${cmd}` ''; - description = lib.mdDoc '' + description = mdDoc '' Arguments to the `thanos ${cmd}` command. Defaults to a list of arguments formed by converting the structured @@ -127,10 +150,10 @@ let if config.services.thanos..tracing.config == null then null else toString (toYAML "tracing.yaml" config.services.thanos..tracing.config); ''; - description = lib.mdDoc '' + description = mdDoc '' Path to YAML file that contains tracing configuration. - See format details: + See format details: ''; }; }; @@ -147,7 +170,7 @@ let If {option}`tracing.config-file` is set this option has no effect. - See format details: + See format details: ''; }; }; @@ -192,10 +215,10 @@ let if config.services.thanos..objstore.config == null then null else toString (toYAML "objstore.yaml" config.services.thanos..objstore.config); ''; - description = lib.mdDoc '' + description = mdDoc '' Path to YAML file that contains object store configuration. - See format details: + See format details: ''; }; }; @@ -212,7 +235,7 @@ let If {option}`objstore.config-file` is set this option has no effect. - See format details: + See format details: ''; }; }; @@ -231,7 +254,7 @@ let type = types.str; default = "/var/lib/${config.services.prometheus.stateDir}/data"; defaultText = literalExpression ''"/var/lib/''${config.services.prometheus.stateDir}/data"''; - description = lib.mdDoc '' + description = mdDoc '' Data directory of TSDB. ''; }; @@ -266,14 +289,14 @@ let Maximum size of concurrently allocatable bytes for chunks. ''; - store.grpc.series-sample-limit = mkParamDef types.int 0 '' - Maximum amount of samples returned via a single Series call. + store.limits.request-samples = mkParamDef types.int 0 '' + The maximum samples allowed for a single Series request. + The Series call fails if this limit is exceeded. `0` means no limit. - NOTE: for efficiency we take 120 as the number of samples in chunk (it - cannot be bigger than that), so the actual number of samples might be - lower, even though the maximum could be hit. + NOTE: For efficiency the limit is internally implemented as 'chunks limit' + considering each chunk contains a maximum of 120 samples. ''; store.grpc.series-max-concurrency = mkParamDef types.int 20 '' @@ -371,24 +394,25 @@ let Maximum number of queries processed concurrently by query node. ''; - query.replica-label = mkParam types.str '' - Label to treat as a replica indicator along which data is + query.replica-labels = mkAttrsParam "query.replica-label" '' + Labels to treat as a replica indicator along which data is + deduplicated. Still you will be able to query without deduplication using - `dedup=false` parameter. + 'dedup=false' parameter. Data includes time series, recording + rules, and alerting rules. ''; selector-labels = mkAttrsParam "selector-label" '' Query selector labels that will be exposed in info endpoint. ''; - store.addresses = mkListParam "store" '' - Addresses of statically configured store API servers. + endpoints = mkListParam "endpoint" '' + Addresses of statically configured Thanos API servers (repeatable). - The scheme may be prefixed with `dns+` or - `dnssrv+` to detect store API servers through - respective DNS lookups. + The scheme may be prefixed with 'dns+' or 'dnssrv+' to detect + Thanos API servers through respective DNS lookups. ''; store.sd-files = mkListParam "store.sd-files" '' @@ -430,6 +454,12 @@ let ''; }; + query-frontend = params.common cfg.query-frontend // { + query-frontend.downstream-url = mkParamDef types.str "http://localhost:9090" '' + URL of downstream Prometheus Query compatible API. + ''; + }; + rule = params.common cfg.rule // params.objstore cfg.rule // { labels = mkAttrsParam "label" '' @@ -447,7 +477,7 @@ let Rule files that should be used by rule manager. Can be in glob format. ''; - eval-interval = mkParamDef types.str "30s" '' + eval-interval = mkParamDef types.str "1m" '' The default evaluation interval to use. ''; @@ -597,10 +627,6 @@ let to render all samples for a human eye anyway ''; - block-sync-concurrency = mkParamDef types.int 20 '' - Number of goroutines to use when syncing block metadata from object storage. - ''; - compact.concurrency = mkParamDef types.int 1 '' Number of goroutines to use when compacting groups. ''; @@ -625,7 +651,7 @@ let Data directory relative to `/var/lib` of TSDB. ''; - labels = mkAttrsParam "labels" '' + labels = mkAttrsParam "label" '' External labels to announce. This flag will be removed in the future when handling multiple tsdb @@ -656,57 +682,56 @@ in { options.services.thanos = { - package = mkOption { - type = types.package; - default = pkgs.thanos; - defaultText = literalExpression "pkgs.thanos"; - description = lib.mdDoc '' - The thanos package that should be used. - ''; - }; + package = mkPackageOptionMD pkgs "thanos" {}; sidecar = paramsToOptions params.sidecar // { enable = mkEnableOption - (lib.mdDoc "the Thanos sidecar for Prometheus server"); + (mdDoc "the Thanos sidecar for Prometheus server"); arguments = mkArgumentsOption "sidecar"; }; store = paramsToOptions params.store // { enable = mkEnableOption - (lib.mdDoc "the Thanos store node giving access to blocks in a bucket provider."); + (mdDoc "the Thanos store node giving access to blocks in a bucket provider."); arguments = mkArgumentsOption "store"; }; query = paramsToOptions params.query // { enable = mkEnableOption - (lib.mdDoc ("the Thanos query node exposing PromQL enabled Query API " + + (mdDoc ("the Thanos query node exposing PromQL enabled Query API " + "with data retrieved from multiple store nodes")); arguments = mkArgumentsOption "query"; }; + query-frontend = paramsToOptions params.query-frontend // { + enable = mkEnableOption + (mdDoc ("the Thanos query frontend implements a service deployed in front of queriers to + improve query parallelization and caching.")); + arguments = mkArgumentsOption "query-frontend"; + }; + rule = paramsToOptions params.rule // { enable = mkEnableOption - (lib.mdDoc ("the Thanos ruler service which evaluates Prometheus rules against" + + (mdDoc ("the Thanos ruler service which evaluates Prometheus rules against" + " given Query nodes, exposing Store API and storing old blocks in bucket")); arguments = mkArgumentsOption "rule"; }; compact = paramsToOptions params.compact // { enable = mkEnableOption - (lib.mdDoc "the Thanos compactor which continuously compacts blocks in an object store bucket"); + (mdDoc "the Thanos compactor which continuously compacts blocks in an object store bucket"); arguments = mkArgumentsOption "compact"; }; downsample = paramsToOptions params.downsample // { enable = mkEnableOption - (lib.mdDoc "the Thanos downsampler which continuously downsamples blocks in an object store bucket"); + (mdDoc "the Thanos downsampler which continuously downsamples blocks in an object store bucket"); arguments = mkArgumentsOption "downsample"; }; receive = paramsToOptions params.receive // { enable = mkEnableOption - (lib.mdDoc ("the Thanos receiver which accept Prometheus remote write API requests " + - "and write to local tsdb (EXPERIMENTAL, this may change drastically without notice)")); + (mdDoc ("the Thanos receiver which accept Prometheus remote write API requests and write to local tsdb")); arguments = mkArgumentsOption "receive"; }; }; @@ -736,6 +761,7 @@ in { User = "prometheus"; Restart = "always"; ExecStart = thanos "sidecar"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; }; }) @@ -751,6 +777,7 @@ in { StateDirectory = cfg.store.stateDir; Restart = "always"; ExecStart = thanos "store"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; }; } @@ -764,6 +791,20 @@ in { DynamicUser = true; Restart = "always"; ExecStart = thanos "query"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; + }; + }; + }) + + (mkIf cfg.query-frontend.enable { + systemd.services.thanos-query-frontend = { + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + serviceConfig = { + DynamicUser = true; + Restart = "always"; + ExecStart = thanos "query-frontend"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; }; }) @@ -779,6 +820,7 @@ in { StateDirectory = cfg.rule.stateDir; Restart = "always"; ExecStart = thanos "rule"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; }; } @@ -797,6 +839,7 @@ in { DynamicUser = true; StateDirectory = cfg.compact.stateDir; ExecStart = thanos "compact"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; } // optionalAttrs (!wait) { inherit (cfg.compact) startAt; }; } @@ -813,6 +856,7 @@ in { StateDirectory = cfg.downsample.stateDir; Restart = "always"; ExecStart = thanos "downsample"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; }; } @@ -829,6 +873,7 @@ in { StateDirectory = cfg.receive.stateDir; Restart = "always"; ExecStart = thanos "receive"; + ExecReload = "${pkgs.coreutils}/bin/kill -HUP $MAINPID"; }; }; } diff --git a/nixos/tests/prometheus.nix b/nixos/tests/prometheus.nix index a075cfc1f1b7..011127389377 100644 --- a/nixos/tests/prometheus.nix +++ b/nixos/tests/prometheus.nix @@ -3,6 +3,7 @@ let queryPort = 9090; minioPort = 9000; pushgwPort = 9091; + frontPort = 9092; s3 = { accessKey = "BKIKJAA5BMMU2RHO6IBB"; @@ -152,10 +153,15 @@ in import ./make-test-python.nix { services.thanos.query = { enable = true; http-address = "0.0.0.0:${toString queryPort}"; - store.addresses = [ + endpoints = [ "prometheus:${toString grpcPort}" ]; }; + services.thanos.query-frontend = { + enable = true; + http-address = "0.0.0.0:${toString frontPort}"; + query-frontend.downstream-url = "http://127.0.0.1:${toString queryPort}"; + }; }; store = { pkgs, ... }: { @@ -178,7 +184,7 @@ in import ./make-test-python.nix { services.thanos.query = { enable = true; http-address = "0.0.0.0:${toString queryPort}"; - store.addresses = [ + endpoints = [ "localhost:${toString grpcPort}" ]; }; @@ -262,6 +268,10 @@ in import ./make-test-python.nix { query.wait_for_unit("thanos-query.service") wait_for_metric(query) + # Test Thanos query frontend service + query.wait_for_unit("thanos-query-frontend.service") + query.succeed("curl -sS http://localhost:${toString frontPort}/-/healthy") + # Test if the Thanos sidecar has correctly uploaded its TSDB to S3, if the # Thanos storage service has correctly downloaded it from S3 and if the Thanos # query service running on $store can correctly retrieve the metric: diff --git a/pkgs/servers/monitoring/thanos/default.nix b/pkgs/servers/monitoring/thanos/default.nix index 37814a417491..156bd4a6ec23 100644 --- a/pkgs/servers/monitoring/thanos/default.nix +++ b/pkgs/servers/monitoring/thanos/default.nix @@ -1,24 +1,25 @@ -{ lib, buildGoModule, fetchFromGitHub, fetchpatch }: +{ lib +, buildGoModule +, fetchFromGitHub +, go +, nix-update-script +, nixosTests +, testers +, thanos +}: + buildGoModule rec { pname = "thanos"; - version = "0.31.0"; + version = "0.32.5"; src = fetchFromGitHub { - rev = "v${version}"; owner = "thanos-io"; repo = "thanos"; - sha256 = "sha256-EJZGc4thu0WhVSSRolIRYg39S81Cgm+JHwpW5eE7mDc="; + rev = "refs/tags/v${version}"; + hash = "sha256-A4bDCyvctHmDBYzvWpeEO4u6KhoICN7BbRQK4aZCbIA="; }; - patches = [ - # https://github.com/thanos-io/thanos/pull/6126 - (fetchpatch { - url = "https://github.com/thanos-io/thanos/commit/a4c218bd690259fc0c78fe67e0739bd33d38541e.patch"; - hash = "sha256-Hxc1s5IXAyw01/o4JvOXuyYuOFy0+cBUv3OkRv4DCXs="; - }) - ]; - - vendorHash = "sha256-8+MUMux6v/O2syVyTx758yUBfJkertzibz6yFB05nWk="; + vendorHash = "sha256-ZjkMvbWq96Rte9WoxAWzeouVA/6mBqanvY9yHr9F5MM="; doCheck = true; @@ -30,12 +31,26 @@ buildGoModule rec { "-X ${t}.Branch=unknown" "-X ${t}.BuildUser=nix@nixpkgs" "-X ${t}.BuildDate=unknown" + "-X ${t}.GoVersion=${lib.getVersion go}" ]; + passthru = { + updateScript = nix-update-script { }; + tests = { + inherit (nixosTests) prometheus; + version = testers.testVersion { + command = "thanos --version"; + package = thanos; + }; + }; + }; + meta = with lib; { description = "Highly available Prometheus setup with long term storage capabilities"; homepage = "https://github.com/thanos-io/thanos"; + changelog = "https://github.com/thanos-io/thanos/releases/tag/v${version}"; license = licenses.asl20; - maintainers = with maintainers; [ basvandijk ]; + mainProgram = "thanos"; + maintainers = with maintainers; [ basvandijk anthonyroussel ]; }; }