From fb6fbcb85cd0167c9759b82f16013777327c07e5 Mon Sep 17 00:00:00 2001 From: Ivan Kozik Date: Thu, 5 Aug 2021 05:07:25 +0000 Subject: [PATCH] nixos/victoriametrics: set LimitNOFILE=1048576 to fix panic and restart loop This fixes: ``` systemd[1]: Started VictoriaMetrics time series database. victoria-metrics[379550]: 2021-08-04T19:33:39.833Z panic VictoriaMetrics/lib/storage/partition.go:954 FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08": cannot open source part for merging: cannot open metaindex file in stream mode: cannot open file "/var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin": open /var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin: too many open files victoria-metrics[379550]: panic: FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08": cannot open source part for merging: cannot open metaindex file in stream mode: cannot open file "/var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin": open /var/lib/victoriametrics/data/small/2021_08/1228_1228_20210804184120.712_20210804184121.899_16982E83CD7A763A/metaindex.bin: too many open files victoria-metrics[379550]: goroutine 629 [running]: victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.logMessage(0xbb3ea1, 0x5, 0xc001113800, 0x1e7, 0x4) victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:270 +0xc69 victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.logLevelSkipframes(0x1, 0xbb3ea1, 0x5, 0xbe3f8b, 0x4b, 0xc000bb3f88, 0x2, 0x2) victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:138 +0xd1 victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.logLevel(...) victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:130 victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger.Panicf(...) victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/logger/logger.go:126 victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*partition).smallPartsMerger(0xc0014d7980) victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/partition.go:954 +0x145 victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*partition).startMergeWorkers.func1(0xc0014d7980) victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/partition.go:933 +0x2b victoria-metrics[379550]: created by github.com/VictoriaMetrics/VictoriaMetrics/lib/storage.(*partition).startMergeWorkers victoria-metrics[379550]: github.com/VictoriaMetrics/VictoriaMetrics/lib/storage/partition.go:932 +0x6c systemd[1]: victoriametrics.service: Main process exited, code=exited, status=2/INVALIDARGUMENT systemd[1]: victoriametrics.service: Failed with result 'exit-code'. systemd[1]: victoriametrics.service: Consumed 587ms CPU time, received 6.5K IP traffic, sent 1.7K IP traffic. systemd[1]: victoriametrics.service: Scheduled restart job, restart counter is at 2064. systemd[1]: Stopped VictoriaMetrics time series database. systemd[1]: victoriametrics.service: Consumed 587ms CPU time, received 6.5K IP traffic, sent 1.7K IP traffic. systemd[1]: Starting VictoriaMetrics time series database... ``` --- nixos/modules/services/databases/victoriametrics.nix | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/nixos/modules/services/databases/victoriametrics.nix b/nixos/modules/services/databases/victoriametrics.nix index 5b09115bb2fb..9e2c79e61a39 100644 --- a/nixos/modules/services/databases/victoriametrics.nix +++ b/nixos/modules/services/databases/victoriametrics.nix @@ -53,6 +53,14 @@ let cfg = config.services.victoriametrics; in -retentionPeriod ${toString cfg.retentionPeriod} \ ${lib.escapeShellArgs cfg.extraOptions} ''; + # victoriametrics 1.59 with ~7GB of data seems to eventually panic when merging files and then + # begins restart-looping forever. Set LimitNOFILE= to a large number to work around this issue. + # + # panic: FATAL: unrecoverable error when merging small parts in the partition "/var/lib/victoriametrics/data/small/2021_08": + # cannot open source part for merging: cannot open values file in stream mode: + # cannot open file "/var/lib/victoriametrics/data/small/2021_08/[...]/values.bin": + # open /var/lib/victoriametrics/data/small/2021_08/[...]/values.bin: too many open files + LimitNOFILE = 1048576; }; wantedBy = [ "multi-user.target" ];