From 9ac395366bee383913c157911f76cbbbf09f226b Mon Sep 17 00:00:00 2001 From: Luke Granger-Brown Date: Sun, 14 Feb 2021 17:57:50 +0000 Subject: [PATCH 1/3] ceph: use bundled rocksdb Ceph have some custom patches to make RocksDB play nicely with Bluestore, which means the system RocksDB package doesn't quite work properly. --- pkgs/tools/filesystems/ceph/default.nix | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkgs/tools/filesystems/ceph/default.nix b/pkgs/tools/filesystems/ceph/default.nix index 61cc4573693d..2d9ecfbcaa93 100644 --- a/pkgs/tools/filesystems/ceph/default.nix +++ b/pkgs/tools/filesystems/ceph/default.nix @@ -9,7 +9,7 @@ , babeltrace, gperf , gtest , cunit, snappy -, rocksdb, makeWrapper +, makeWrapper , leveldb, oathToolkit , libnl, libcap_ng , rdkafka @@ -146,7 +146,7 @@ in rec { buildInputs = cryptoLibsMap.${cryptoStr} ++ [ boost ceph-python-env libxml2 optYasm optLibatomic_ops optLibs3 malloc zlib openldap lttng-ust babeltrace gperf gtest cunit - snappy rocksdb lz4 oathToolkit leveldb libnl libcap_ng rdkafka + snappy lz4 oathToolkit leveldb libnl libcap_ng rdkafka ] ++ lib.optionals stdenv.isLinux [ linuxHeaders util-linux libuuid udev keyutils optLibaio optLibxfs optZfs # ceph 14 @@ -171,12 +171,10 @@ in rec { cmakeFlags = [ "-DWITH_PYTHON3=ON" - "-DWITH_SYSTEM_ROCKSDB=OFF" + "-DWITH_SYSTEM_ROCKSDB=OFF" # breaks Bluestore "-DCMAKE_INSTALL_DATADIR=${placeholder "lib"}/lib" - "-DWITH_SYSTEM_BOOST=ON" - "-DWITH_SYSTEM_ROCKSDB=ON" "-DWITH_SYSTEM_GTEST=ON" "-DMGR_PYTHON_VERSION=${ceph-python-env.python.pythonVersion}" "-DWITH_SYSTEMD=OFF" From bfc11c643d9e0bd2d79c5de86d0bddce589b56d0 Mon Sep 17 00:00:00 2001 From: Luke Granger-Brown Date: Sun, 14 Feb 2021 17:59:29 +0000 Subject: [PATCH 2/3] nixos/tests/ceph-single-node-bluestore: init The current Ceph tests use the old method for OSDs to store data on disks, known as Filestore. This means there are no tests for the Bluestore functionality that run on install, which means that things like RocksDB being broken can slip through and break the Bluestore functionality in a subtle and difficult to debug manner. Add a test to check that Bluestore works, at least on a single node. --- nixos/tests/all-tests.nix | 1 + nixos/tests/ceph-single-node-bluestore.nix | 196 +++++++++++++++++++++ 2 files changed, 197 insertions(+) create mode 100644 nixos/tests/ceph-single-node-bluestore.nix diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 444580bc0bed..530847575bcf 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -55,6 +55,7 @@ in cassandra_3_11 = handleTest ./cassandra.nix { testPackage = pkgs.cassandra_3_11; }; ceph-multi-node = handleTestOn ["x86_64-linux"] ./ceph-multi-node.nix {}; ceph-single-node = handleTestOn ["x86_64-linux"] ./ceph-single-node.nix {}; + ceph-single-node-bluestore = handleTestOn ["x86_64-linux"] ./ceph-single-node-bluestore.nix {}; certmgr = handleTest ./certmgr.nix {}; cfssl = handleTestOn ["x86_64-linux"] ./cfssl.nix {}; charliecloud = handleTest ./charliecloud.nix {}; diff --git a/nixos/tests/ceph-single-node-bluestore.nix b/nixos/tests/ceph-single-node-bluestore.nix new file mode 100644 index 000000000000..cc873e8aee57 --- /dev/null +++ b/nixos/tests/ceph-single-node-bluestore.nix @@ -0,0 +1,196 @@ +import ./make-test-python.nix ({pkgs, lib, ...}: + +let + cfg = { + clusterId = "066ae264-2a5d-4729-8001-6ad265f50b03"; + monA = { + name = "a"; + ip = "192.168.1.1"; + }; + osd0 = { + name = "0"; + key = "AQBCEJNa3s8nHRAANvdsr93KqzBznuIWm2gOGg=="; + uuid = "55ba2294-3e24-478f-bee0-9dca4c231dd9"; + }; + osd1 = { + name = "1"; + key = "AQBEEJNac00kExAAXEgy943BGyOpVH1LLlHafQ=="; + uuid = "5e97a838-85b6-43b0-8950-cb56d554d1e5"; + }; + osd2 = { + name = "2"; + key = "AQAdyhZeIaUlARAAGRoidDAmS6Vkp546UFEf5w=="; + uuid = "ea999274-13d0-4dd5-9af9-ad25a324f72f"; + }; + }; + generateCephConfig = { daemonConfig }: { + enable = true; + global = { + fsid = cfg.clusterId; + monHost = cfg.monA.ip; + monInitialMembers = cfg.monA.name; + }; + } // daemonConfig; + + generateHost = { pkgs, cephConfig, networkConfig, ... }: { + virtualisation = { + memorySize = 512; + emptyDiskImages = [ 20480 20480 20480 ]; + vlans = [ 1 ]; + }; + + networking = networkConfig; + + environment.systemPackages = with pkgs; [ + bash + sudo + ceph + xfsprogs + ]; + + boot.kernelModules = [ "xfs" ]; + + services.ceph = cephConfig; + }; + + networkMonA = { + dhcpcd.enable = false; + interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [ + { address = cfg.monA.ip; prefixLength = 24; } + ]; + }; + cephConfigMonA = generateCephConfig { daemonConfig = { + mon = { + enable = true; + daemons = [ cfg.monA.name ]; + }; + mgr = { + enable = true; + daemons = [ cfg.monA.name ]; + }; + osd = { + enable = true; + daemons = [ cfg.osd0.name cfg.osd1.name cfg.osd2.name ]; + }; + }; }; + + # Following deployment is based on the manual deployment described here: + # https://docs.ceph.com/docs/master/install/manual-deployment/ + # For other ways to deploy a ceph cluster, look at the documentation at + # https://docs.ceph.com/docs/master/ + testscript = { ... }: '' + start_all() + + monA.wait_for_unit("network.target") + + # Bootstrap ceph-mon daemon + monA.succeed( + "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'", + "sudo -u ceph ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'", + "sudo -u ceph ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring", + "monmaptool --create --add ${cfg.monA.name} ${cfg.monA.ip} --fsid ${cfg.clusterId} /tmp/monmap", + "sudo -u ceph ceph-mon --mkfs -i ${cfg.monA.name} --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring", + "sudo -u ceph touch /var/lib/ceph/mon/ceph-${cfg.monA.name}/done", + "systemctl start ceph-mon-${cfg.monA.name}", + ) + monA.wait_for_unit("ceph-mon-${cfg.monA.name}") + monA.succeed("ceph mon enable-msgr2") + + # Can't check ceph status until a mon is up + monA.succeed("ceph -s | grep 'mon: 1 daemons'") + + # Start the ceph-mgr daemon, after copying in the keyring + monA.succeed( + "sudo -u ceph mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}/", + "ceph auth get-or-create mgr.${cfg.monA.name} mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-${cfg.monA.name}/keyring", + "systemctl start ceph-mgr-${cfg.monA.name}", + ) + monA.wait_for_unit("ceph-mgr-a") + monA.wait_until_succeeds("ceph -s | grep 'quorum ${cfg.monA.name}'") + monA.wait_until_succeeds("ceph -s | grep 'mgr: ${cfg.monA.name}(active,'") + + # Bootstrap OSDs + monA.succeed( + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}", + "echo bluestore > /var/lib/ceph/osd/ceph-${cfg.osd0.name}/type", + "ln -sf /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd0.name}/block", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}", + "echo bluestore > /var/lib/ceph/osd/ceph-${cfg.osd1.name}/type", + "ln -sf /dev/vdc /var/lib/ceph/osd/ceph-${cfg.osd1.name}/block", + "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd2.name}", + "echo bluestore > /var/lib/ceph/osd/ceph-${cfg.osd2.name}/type", + "ln -sf /dev/vdd /var/lib/ceph/osd/ceph-${cfg.osd2.name}/block", + "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd0.name}/keyring --name osd.${cfg.osd0.name} --add-key ${cfg.osd0.key}", + "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd1.name}/keyring --name osd.${cfg.osd1.name} --add-key ${cfg.osd1.key}", + "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd2.name}/keyring --name osd.${cfg.osd2.name} --add-key ${cfg.osd2.key}", + 'echo \'{"cephx_secret": "${cfg.osd0.key}"}\' | ceph osd new ${cfg.osd0.uuid} -i -', + 'echo \'{"cephx_secret": "${cfg.osd1.key}"}\' | ceph osd new ${cfg.osd1.uuid} -i -', + 'echo \'{"cephx_secret": "${cfg.osd2.key}"}\' | ceph osd new ${cfg.osd2.uuid} -i -', + ) + + # Initialize the OSDs with regular filestore + monA.succeed( + "ceph-osd -i ${cfg.osd0.name} --mkfs --osd-uuid ${cfg.osd0.uuid}", + "ceph-osd -i ${cfg.osd1.name} --mkfs --osd-uuid ${cfg.osd1.uuid}", + "ceph-osd -i ${cfg.osd2.name} --mkfs --osd-uuid ${cfg.osd2.uuid}", + "chown -R ceph:ceph /var/lib/ceph/osd", + "systemctl start ceph-osd-${cfg.osd0.name}", + "systemctl start ceph-osd-${cfg.osd1.name}", + "systemctl start ceph-osd-${cfg.osd2.name}", + ) + monA.wait_until_succeeds("ceph osd stat | grep -e '3 osds: 3 up[^,]*, 3 in'") + monA.wait_until_succeeds("ceph -s | grep 'mgr: ${cfg.monA.name}(active,'") + monA.wait_until_succeeds("ceph -s | grep 'HEALTH_OK'") + + monA.succeed( + "ceph osd pool create single-node-test 32 32", + "ceph osd pool ls | grep 'single-node-test'", + "ceph osd pool rename single-node-test single-node-other-test", + "ceph osd pool ls | grep 'single-node-other-test'", + ) + monA.wait_until_succeeds("ceph -s | grep '2 pools, 33 pgs'") + monA.succeed( + "ceph osd getcrushmap -o crush", + "crushtool -d crush -o decrushed", + "sed 's/step chooseleaf firstn 0 type host/step chooseleaf firstn 0 type osd/' decrushed > modcrush", + "crushtool -c modcrush -o recrushed", + "ceph osd setcrushmap -i recrushed", + "ceph osd pool set single-node-other-test size 2", + ) + monA.wait_until_succeeds("ceph -s | grep 'HEALTH_OK'") + monA.wait_until_succeeds("ceph -s | grep '33 active+clean'") + monA.fail( + "ceph osd pool ls | grep 'multi-node-test'", + "ceph osd pool delete single-node-other-test single-node-other-test --yes-i-really-really-mean-it", + ) + + # Shut down ceph by stopping ceph.target. + monA.succeed("systemctl stop ceph.target") + + # Start it up + monA.succeed("systemctl start ceph.target") + monA.wait_for_unit("ceph-mon-${cfg.monA.name}") + monA.wait_for_unit("ceph-mgr-${cfg.monA.name}") + monA.wait_for_unit("ceph-osd-${cfg.osd0.name}") + monA.wait_for_unit("ceph-osd-${cfg.osd1.name}") + monA.wait_for_unit("ceph-osd-${cfg.osd2.name}") + + # Ensure the cluster comes back up again + monA.succeed("ceph -s | grep 'mon: 1 daemons'") + monA.wait_until_succeeds("ceph -s | grep 'quorum ${cfg.monA.name}'") + monA.wait_until_succeeds("ceph osd stat | grep -e '3 osds: 3 up[^,]*, 3 in'") + monA.wait_until_succeeds("ceph -s | grep 'mgr: ${cfg.monA.name}(active,'") + monA.wait_until_succeeds("ceph -s | grep 'HEALTH_OK'") + ''; +in { + name = "basic-single-node-ceph-cluster-bluestore"; + meta = with pkgs.lib.maintainers; { + maintainers = [ lukegb ]; + }; + + nodes = { + monA = generateHost { pkgs = pkgs; cephConfig = cephConfigMonA; networkConfig = networkMonA; }; + }; + + testScript = testscript; +}) From 236df9cee7b22e37ce2e98a3ede370abc145dfea Mon Sep 17 00:00:00 2001 From: Luke Granger-Brown Date: Sun, 14 Feb 2021 18:01:16 +0000 Subject: [PATCH 3/3] ceph: add passthru.tests --- pkgs/tools/filesystems/ceph/default.nix | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkgs/tools/filesystems/ceph/default.nix b/pkgs/tools/filesystems/ceph/default.nix index 2d9ecfbcaa93..aaa5806d402a 100644 --- a/pkgs/tools/filesystems/ceph/default.nix +++ b/pkgs/tools/filesystems/ceph/default.nix @@ -13,6 +13,7 @@ , leveldb, oathToolkit , libnl, libcap_ng , rdkafka +, nixosTests # Optional Dependencies , yasm ? null, fcgi ? null, expat ? null @@ -199,6 +200,7 @@ in rec { meta = getMeta "Distributed storage system"; passthru.version = version; + passthru.tests = { inherit (nixosTests) ceph-single-node ceph-multi-node ceph-single-node-bluestore; }; }; ceph-client = runCommand "ceph-client-${version}" {