From c025ec185f8b53e57997e1fd0a172a285ec97e67 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Sat, 24 Dec 2022 18:37:47 +0100 Subject: [PATCH 1/2] nixos/lib/make-disk-image: make raitobezarius code owner of this primitive --- .github/CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 5385e4a1e0a7..be881b7f5548 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -48,6 +48,10 @@ # Nixpkgs build-support /pkgs/build-support/writers @lassulus @Profpatsch +# Nixpkgs make-disk-image +/doc/builders/images/makediskimage.section.md @raitobezarius +/nixos/lib/make-disk-image.nix @raitobezarius + # Nixpkgs documentation /maintainers/scripts/db-to-md.sh @jtojnar @ryantm /maintainers/scripts/doc @jtojnar @ryantm From 22adcaa4491dde18442a234252e1d7ed8c098672 Mon Sep 17 00:00:00 2001 From: Raito Bezarius Date: Tue, 20 Dec 2022 21:04:17 +0100 Subject: [PATCH 2/2] nixos/lib/make-disk-image: docs, UEFI vars recording, more determinism - Extensive documentation in NixOS manual - Deterministic mode that fixes various identifiers relative to disk partitions and filesystems in ext4 case - UEFI variable recording --- doc/builders/images.xml | 1 + doc/builders/images/makediskimage.section.md | 107 ++++++++++ .../from_md/release-notes/rl-2305.section.xml | 8 + .../manual/release-notes/rl-2305.section.md | 2 + nixos/lib/make-disk-image.nix | 193 ++++++++++++++++-- 5 files changed, 298 insertions(+), 13 deletions(-) create mode 100644 doc/builders/images/makediskimage.section.md diff --git a/doc/builders/images.xml b/doc/builders/images.xml index f86ebd86bee4..7d06130e3eca 100644 --- a/doc/builders/images.xml +++ b/doc/builders/images.xml @@ -10,4 +10,5 @@ + diff --git a/doc/builders/images/makediskimage.section.md b/doc/builders/images/makediskimage.section.md new file mode 100644 index 000000000000..9798a0be4d46 --- /dev/null +++ b/doc/builders/images/makediskimage.section.md @@ -0,0 +1,107 @@ +# `` {#sec-make-disk-image} + +`` is a function to create _disk images_ in multiple formats: raw, QCOW2 (QEMU), QCOW2-Compressed (compressed version), VDI (VirtualBox), VPC (VirtualPC). + +This function can create images in two ways: + +- using `cptofs` without any virtual machine to create a Nix store disk image, +- using a virtual machine to create a full NixOS installation. + +When testing early-boot or lifecycle parts of NixOS such as a bootloader or multiple generations, it is necessary to opt for a full NixOS system installation. +Whereas for many web servers, applications, it is possible to work with a Nix store only disk image and is faster to build. + +NixOS tests also use this function when preparing the VM. The `cptofs` method is used when `virtualisation.useBootLoader` is false (the default). Otherwise the second method is used. + +## Features + +For reference, read the function signature source code for documentation on arguments: . +Features are separated in various sections depending on if you opt for a Nix-store only image or a full NixOS image. + +### Common + +- arbitrary NixOS configuration +- automatic or bound disk size: `diskSize` parameter, `additionalSpace` can be set when `diskSize` is `auto` to add a constant of disk space +- multiple partition table layouts: EFI, legacy, legacy + GPT, hybrid, none through `partitionTableType` parameter +- OVMF or EFI firmwares and variables templates can be customized +- root filesystem `fsType` can be customized to whatever `mkfs.${fsType}` exist during operations +- root filesystem label can be customized, defaults to `nix-store` if it's a Nix store image, otherwise `nixpkgs/nixos` +- arbitrary code can be executed after disk image was produced with `postVM` +- the current nixpkgs can be realized as a channel in the disk image, which will change the hash of the image when the sources are updated +- additional store paths can be provided through `additionalPaths` + +### Full NixOS image + +- arbitrary contents with permissions can be placed in the target filesystem using `contents` +- a `/etc/nixpkgs/nixos/configuration.nix` can be provided through `configFile` +- bootloaders are supported +- EFI variables can be mutated during image production and the result is exposed in `$out` +- boot partition size when partition table is `efi` or `hybrid` + +### On bit-to-bit reproducibility + +Images are **NOT** deterministic, please do not hesitate to try to fix this, source of determinisms are (not exhaustive) : + +- bootloader installation have timestamps +- SQLite Nix store database contain registration times +- `/etc/shadow` is in a non-deterministic order + +A `deterministic` flag is available for best efforts determinism. + +## Usage + +To produce a Nix-store only image: +```nix +let + pkgs = import {}; + lib = pkgs.lib; + make-disk-image = import ; +in + make-disk-image { + inherit pkgs lib; + config = {}; + additionalPaths = [ ]; + format = "qcow2"; + onlyNixStore = true; + partitionTableType = "none"; + installBootLoader = false; + touchEFIVars = false; + diskSize = "auto"; + additionalSpace = "0M"; # Defaults to 512M. + copyChannel = false; + } +``` + +Some arguments can be left out, they are shown explicitly for the sake of the example. + +Building this derivation will provide a QCOW2 disk image containing only the Nix store and its registration information. + +To produce a NixOS installation image disk with UEFI and bootloader installed: +```nix +let + pkgs = import {}; + lib = pkgs.lib; + make-disk-image = import ; + evalConfig = import ; +in + make-disk-image { + inherit pkgs lib; + config = evalConfig { + modules = [ + { + fileSystems."/" = { device = "/dev/vda"; fsType = "ext4"; autoFormat = true; }; + boot.grub.device = "/dev/vda"; + } + ]; + }; + format = "qcow2"; + onlyNixStore = false; + partitionTableType = "legacy+gpt"; + installBootLoader = true; + touchEFIVars = true; + diskSize = "auto"; + additionalSpace = "0M"; # Defaults to 512M. + copyChannel = false; + } +``` + + diff --git a/nixos/doc/manual/from_md/release-notes/rl-2305.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2305.section.xml index b410a660c551..e84da9f5b6f8 100644 --- a/nixos/doc/manual/from_md/release-notes/rl-2305.section.xml +++ b/nixos/doc/manual/from_md/release-notes/rl-2305.section.xml @@ -311,6 +311,14 @@ + + + nixos/lib/make-disk-image.nix can now + mutate EFI variables, run user-provided EFI firmware or + variable templates. This is now extensively documented in the + NixOS manual. + + A new virtualisation.rosetta module was diff --git a/nixos/doc/manual/release-notes/rl-2305.section.md b/nixos/doc/manual/release-notes/rl-2305.section.md index 911575d8ab53..fcb7fe932baa 100644 --- a/nixos/doc/manual/release-notes/rl-2305.section.md +++ b/nixos/doc/manual/release-notes/rl-2305.section.md @@ -87,6 +87,8 @@ In addition to numerous new and upgraded packages, this release has the followin [headscale's example configuration](https://github.com/juanfont/headscale/blob/main/config-example.yaml) can be directly written as attribute-set in Nix within this option. +- `nixos/lib/make-disk-image.nix` can now mutate EFI variables, run user-provided EFI firmware or variable templates. This is now extensively documented in the NixOS manual. + - A new `virtualisation.rosetta` module was added to allow running `x86_64` binaries through [Rosetta](https://developer.apple.com/documentation/apple-silicon/about-the-rosetta-translation-environment) inside virtualised NixOS guests on Apple silicon. This feature works by default with the [UTM](https://docs.getutm.app/) virtualisation [package](https://search.nixos.org/packages?channel=unstable&show=utm&from=0&size=1&sort=relevance&type=packages&query=utm). - The new option `users.motdFile` allows configuring a Message Of The Day that can be updated dynamically. diff --git a/nixos/lib/make-disk-image.nix b/nixos/lib/make-disk-image.nix index e784ec9e6778..365fc1f03a5b 100644 --- a/nixos/lib/make-disk-image.nix +++ b/nixos/lib/make-disk-image.nix @@ -1,3 +1,85 @@ +/* Technical details + +`make-disk-image` has a bit of magic to minimize the amount of work to do in a virtual machine. + +It relies on the [LKL (Linux Kernel Library) project](https://github.com/lkl/linux) which provides Linux kernel as userspace library. + +The Nix-store only image only need to run LKL tools to produce an image and will never spawn a virtual machine, whereas full images will always require a virtual machine, but also use LKL. + +### Image preparation phase + +Image preparation phase will produce the initial image layout in a folder: + +- devise a root folder based on `$PWD` +- prepare the contents by copying and restoring ACLs in this root folder +- load in the Nix store database all additional paths computed by `pkgs.closureInfo` in a temporary Nix store +- run `nixos-install` in a temporary folder +- transfer from the temporary store the additional paths registered to the installed NixOS +- compute the size of the disk image based on the apparent size of the root folder +- partition the disk image using the corresponding script according to the partition table type +- format the partitions if needed +- use `cptofs` (LKL tool) to copy the root folder inside the disk image + +At this step, the disk image already contains the Nix store, it now only needs to be converted to the desired format to be used. + +### Image conversion phase + +Using `qemu-img`, the disk image is converted from a raw format to the desired format: qcow2(-compressed), vdi, vpc. + +### Image Partitioning + +#### `none` + +No partition table layout is written. The image is a bare filesystem image. + +#### `legacy` + +The image is partitioned using MBR. There is one primary ext4 partition starting at 1 MiB that fills the rest of the disk image. + +This partition layout is unsuitable for UEFI. + +#### `legacy+gpt` + +This partition table type uses GPT and: + +- create a "no filesystem" partition from 1MiB to 2MiB ; +- set `bios_grub` flag on this "no filesystem" partition, which marks it as a [GRUB BIOS partition](https://www.gnu.org/software/parted/manual/html_node/set.html) ; +- create a primary ext4 partition starting at 2MiB and extending to the full disk image ; +- perform optimal alignments checks on each partition + +This partition layout is unsuitable for UEFI boot, because it has no ESP (EFI System Partition) partition. It can work with CSM (Compatibility Support Module) which emulates legacy (BIOS) boot for UEFI. + +#### `efi` + +This partition table type uses GPT and: + +- creates an FAT32 ESP partition from 8MiB to specified `bootSize` parameter (256MiB by default), set it bootable ; +- creates an primary ext4 partition starting after the boot partition and extending to the full disk image + +#### `hybrid` + +This partition table type uses GPT and: + +- creates a "no filesystem" partition from 0 to 1MiB, set `bios_grub` flag on it ; +- creates an FAT32 ESP partition from 8MiB to specified `bootSize` parameter (256MiB by default), set it bootable ; +- creates a primary ext4 partition starting after the boot one and extending to the full disk image + +This partition could be booted by a BIOS able to understand GPT layouts and recognizing the MBR at the start. + +### How to run determinism analysis on results? + +Build your derivation with `--check` to rebuild it and verify it is the same. + +If it fails, you will be left with two folders with one having `.check`. + +You can use `diffoscope` to see the differences between the folders. + +However, `diffoscope` is currently not able to diff two QCOW2 filesystems, thus, it is advised to use raw format. + +Even if you use raw disks, `diffoscope` cannot diff the partition table and partitions recursively. + +To solve this, you can run `fdisk -l $image` and generate `dd if=$image of=$image-p$i.raw skip=$start count=$sectors` for each `(start, sectors)` listed in the `fdisk` output. Now, you will have each partition as a separate file and you can compare them in pairs. +*/ { pkgs , lib @@ -47,6 +129,18 @@ , # Whether to invoke `switch-to-configuration boot` during image creation installBootLoader ? true +, # Whether to output have EFIVARS available in $out/efi-vars.fd and use it during disk creation + touchEFIVars ? false + +, # OVMF firmware derivation + OVMF ? pkgs.OVMF.fd + +, # EFI firmware + efiFirmware ? OVMF.firmware + +, # EFI variables + efiVariables ? OVMF.variables + , # The root file system type. fsType ? "ext4" @@ -70,6 +164,22 @@ , # Disk image format, one of qcow2, qcow2-compressed, vdi, vpc, raw. format ? "raw" + # Whether to fix: + # - GPT Disk Unique Identifier (diskGUID) + # - GPT Partition Unique Identifier: depends on the layout, root partition UUID can be controlled through `rootGPUID` option + # - GPT Partition Type Identifier: fixed according to the layout, e.g. ESP partition, etc. through `parted` invocation. + # - Filesystem Unique Identifier when fsType = ext4 for *root partition*. + # BIOS/MBR support is "best effort" at the moment. + # Boot partitions may not be deterministic. + # Also, to fix last time checked of the ext4 partition if fsType = ext4. +, deterministic ? true + + # GPT Partition Unique Identifier for root partition. +, rootGPUID ? "F222513B-DED1-49FA-B591-20CE86A2FE7F" + # When fsType = ext4, this is the root Filesystem Unique Identifier. + # TODO: support other filesystems someday. +, rootFSUID ? (if fsType == "ext4" then rootGPUID else null) + , # Whether a nix channel based on the current source tree should be # made available inside the image. Useful for interactive use of nix # utils, but changes the hash of the image when the sources are @@ -80,15 +190,18 @@ additionalPaths ? [] }: -assert partitionTableType == "legacy" || partitionTableType == "legacy+gpt" || partitionTableType == "efi" || partitionTableType == "hybrid" || partitionTableType == "none"; -# We use -E offset=X below, which is only supported by e2fsprogs -assert partitionTableType != "none" -> fsType == "ext4"; +assert (lib.assertOneOf "partitionTableType" partitionTableType [ "legacy" "legacy+gpt" "efi" "hybrid" "none" ]); +assert (lib.assertMsg (fsType == "ext4" && deterministic -> rootFSUID != null) "In deterministic mode with a ext4 partition, rootFSUID must be non-null, by default, it is equal to rootGPUID."); + # We use -E offset=X below, which is only supported by e2fsprogs +assert (lib.assertMsg (partitionTableType != "none" -> fsType == "ext4") "to produce a partition table, we need to use -E offset flag which is support only for fsType = ext4"); +assert (lib.assertMsg (touchEFIVars -> partitionTableType == "hybrid" || partitionTableType == "efi" || partitionTableType == "legacy+gpt") "EFI variables can be used only with a partition table of type: hybrid, efi or legacy+gpt."); + # If only Nix store image, then: contents must be empty, configFile must be unset, and we should no install bootloader. +assert (lib.assertMsg (onlyNixStore -> contents == [] && configFile == null && !installBootLoader) "In a only Nix store image, the contents must be empty, no configuration must be provided and no bootloader should be installed."); # Either both or none of {user,group} need to be set -assert lib.all +assert (lib.assertMsg (lib.all (attrs: ((attrs.user or null) == null) == ((attrs.group or null) == null)) - contents; -assert onlyNixStore -> contents == [] && configFile == null && !installBootLoader; + contents) "Contents of the disk image should set none of {user, group} or both at the same time."); with lib; @@ -127,6 +240,14 @@ let format' = format; in let mkpart primary ext4 2MB -1 \ align-check optimal 2 \ print + ${optionalString deterministic '' + sgdisk \ + --disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C \ + --partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \ + --partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \ + --partition-guid=3:${rootGPUID} \ + $diskImage + ''} ''; efi = '' parted --script $diskImage -- \ @@ -134,6 +255,13 @@ let format' = format; in let mkpart ESP fat32 8MiB ${bootSize} \ set 1 boot on \ mkpart primary ext4 ${bootSize} -1 + ${optionalString deterministic '' + sgdisk \ + --disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C \ + --partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \ + --partition-guid=2:${rootGPUID} \ + $diskImage + ''} ''; hybrid = '' parted --script $diskImage -- \ @@ -143,10 +271,20 @@ let format' = format; in let mkpart no-fs 0 1024KiB \ set 2 bios_grub on \ mkpart primary ext4 ${bootSize} -1 + ${optionalString deterministic '' + sgdisk \ + --disk-guid=97FD5997-D90B-4AA3-8D16-C1723AEA73C \ + --partition-guid=1:1C06F03B-704E-4657-B9CD-681A087A2FDC \ + --partition-guid=2:970C694F-AFD0-4B99-B750-CDB7A329AB6F \ + --partition-guid=3:${rootGPUID} \ + $diskImage + ''} ''; none = ""; }.${partitionTableType}; + useEFIBoot = touchEFIVars; + nixpkgs = cleanSource pkgs.path; # FIXME: merge with channel.nix / make-channel.nix. @@ -171,7 +309,9 @@ let format' = format; in let config.system.build.nixos-enter nix systemdMinimal - ] ++ stdenv.initialPath); + ] + ++ lib.optional deterministic gptfdisk + ++ stdenv.initialPath); # I'm preserving the line below because I'm going to search for it across nixpkgs to consolidate # image building logic. The comment right below this now appears in 4 different places in nixpkgs :) @@ -368,20 +508,35 @@ let format' = format; in let diskImage=$out/${filename} ''; + createEFIVars = '' + efiVars=$out/efi-vars.fd + cp ${efiVariables} $efiVars + chmod 0644 $efiVars + ''; + buildImage = pkgs.vmTools.runInLinuxVM ( pkgs.runCommand name { - preVM = prepareImage; + preVM = prepareImage + lib.optionalString touchEFIVars createEFIVars; buildInputs = with pkgs; [ util-linux e2fsprogs dosfstools ]; postVM = moveOrConvertImage + postVM; + QEMU_OPTS = + concatStringsSep " " (lib.optional useEFIBoot "-drive if=pflash,format=raw,unit=0,readonly=on,file=${efiFirmware}" + ++ lib.optionals touchEFIVars [ + "-drive if=pflash,format=raw,unit=1,file=$efiVars" + ] + ); memSize = 1024; } '' export PATH=${binPath}:$PATH rootDisk=${if partitionTableType != "none" then "/dev/vda${rootPartition}" else "/dev/vda"} - # Some tools assume these exist - ln -s vda /dev/xvda - ln -s vda /dev/sda + # It is necessary to set root filesystem unique identifier in advance, otherwise + # bootloader might get the wrong one and fail to boot. + # At the end, we reset again because we want deterministic timestamps. + ${optionalString (fsType == "ext4" && deterministic) '' + tune2fs -T now ${optionalString deterministic "-U ${rootFSUID}"} -c 0 -i 0 $rootDisk + ''} # make systemd-boot find ESP without udev mkdir /dev/block ln -s /dev/vda1 /dev/block/254:1 @@ -396,6 +551,8 @@ let format' = format; in let mkdir -p /mnt/boot mkfs.vfat -n ESP /dev/vda1 mount /dev/vda1 /mnt/boot + + ${optionalString touchEFIVars "mount -t efivarfs efivarfs /sys/firmware/efi/efivars"} ''} # Install a configuration.nix @@ -405,7 +562,13 @@ let format' = format; in let ''} ${lib.optionalString installBootLoader '' - # Set up core system link, GRUB, etc. + # In this throwaway resource, we only have /dev/vda, but the actual VM may refer to another disk for bootloader, e.g. /dev/vdb + # Use this option to create a symlink from vda to any arbitrary device you want. + ${optionalString (config.boot.loader.grub.device != "/dev/vda") '' + ln -s /dev/vda ${config.boot.loader.grub.device} + ''} + + # Set up core system link, bootloader (sd-boot, GRUB, uboot, etc.), etc. NIXOS_INSTALL_BOOTLOADER=1 nixos-enter --root $mountPoint -- /nix/var/nix/profiles/system/bin/switch-to-configuration boot # The above scripts will generate a random machine-id and we don't want to bake a single ID into all our images @@ -432,8 +595,12 @@ let format' = format; in let # Make sure resize2fs works. Note that resize2fs has stricter criteria for resizing than a normal # mount, so the `-c 0` and `-i 0` don't affect it. Setting it to `now` doesn't produce deterministic # output, of course, but we can fix that when/if we start making images deterministic. + # In deterministic mode, this is fixed to 1970-01-01 (UNIX timestamp 0). + # This two-step approach is necessary otherwise `tune2fs` will want a fresher filesystem to perform + # some changes. ${optionalString (fsType == "ext4") '' - tune2fs -T now -c 0 -i 0 $rootDisk + tune2fs -T now ${optionalString deterministic "-U ${rootFSUID}"} -c 0 -i 0 $rootDisk + ${optionalString deterministic "tune2fs -f -T 19700101 $rootDisk"} ''} '' );