2018-10-15 00:39:26 +02:00
|
|
|
{ config, lib, ... }:
|
|
|
|
|
|
|
|
with lib;
|
|
|
|
|
|
|
|
{
|
|
|
|
meta = {
|
|
|
|
maintainers = [ maintainers.joachifm ];
|
|
|
|
};
|
|
|
|
|
2021-02-27 21:26:47 +01:00
|
|
|
imports = [
|
|
|
|
(lib.mkRenamedOptionModule [ "security" "virtualization" "flushL1DataCache" ] [ "security" "virtualisation" "flushL1DataCache" ])
|
|
|
|
];
|
|
|
|
|
2018-10-15 00:39:26 +02:00
|
|
|
options = {
|
|
|
|
security.allowUserNamespaces = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = true;
|
|
|
|
description = ''
|
2019-04-21 11:50:52 +02:00
|
|
|
Whether to allow creation of user namespaces.
|
|
|
|
|
|
|
|
The motivation for disabling user namespaces is the potential
|
|
|
|
presence of code paths where the kernel's permission checking
|
|
|
|
logic fails to account for namespacing, instead permitting a
|
|
|
|
namespaced process to act outside the namespace with the same
|
|
|
|
privileges as it would have inside it. This is particularly
|
2018-10-15 00:39:26 +02:00
|
|
|
damaging in the common case of running as root within the namespace.
|
2019-04-21 11:50:52 +02:00
|
|
|
|
|
|
|
When user namespace creation is disallowed, attempting to create a
|
|
|
|
user namespace fails with "no space left on device" (ENOSPC).
|
|
|
|
root may re-enable user namespace creation at runtime.
|
2018-10-15 00:39:26 +02:00
|
|
|
'';
|
|
|
|
};
|
2018-12-16 10:37:36 +01:00
|
|
|
|
2020-08-23 12:17:53 +02:00
|
|
|
security.unprivilegedUsernsClone = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
When disabled, unprivileged users will not be able to create new namespaces.
|
|
|
|
By default unprivileged user namespaces are disabled.
|
|
|
|
This option only works in a hardened profile.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2018-12-16 10:37:36 +01:00
|
|
|
security.protectKernelImage = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
Whether to prevent replacing the running kernel image.
|
|
|
|
'';
|
|
|
|
};
|
2018-12-26 22:22:55 +01:00
|
|
|
|
2018-12-26 22:24:04 +01:00
|
|
|
security.allowSimultaneousMultithreading = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = true;
|
|
|
|
description = ''
|
|
|
|
Whether to allow SMT/hyperthreading. Disabling SMT means that only
|
|
|
|
physical CPU cores will be usable at runtime, potentially at
|
|
|
|
significant performance cost.
|
|
|
|
|
|
|
|
The primary motivation for disabling SMT is to mitigate the risk of
|
|
|
|
leaking data between threads running on the same CPU core (due to
|
|
|
|
e.g., shared caches). This attack vector is unproven.
|
|
|
|
|
|
|
|
Disabling SMT is a supplement to the L1 data cache flushing mitigation
|
2019-07-19 15:49:37 +02:00
|
|
|
(see <xref linkend="opt-security.virtualisation.flushL1DataCache"/>)
|
2018-12-26 22:24:04 +01:00
|
|
|
versus malicious VM guests (SMT could "bring back" previously flushed
|
|
|
|
data).
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2019-07-30 02:24:56 +02:00
|
|
|
security.forcePageTableIsolation = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
|
|
|
description = ''
|
|
|
|
Whether to force-enable the Page Table Isolation (PTI) Linux kernel
|
|
|
|
feature even on CPU models that claim to be safe from Meltdown.
|
|
|
|
|
|
|
|
This hardening feature is most beneficial to systems that run untrusted
|
|
|
|
workloads that rely on address space isolation for security.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
|
2019-07-19 15:49:37 +02:00
|
|
|
security.virtualisation.flushL1DataCache = mkOption {
|
2018-12-26 22:22:55 +01:00
|
|
|
type = types.nullOr (types.enum [ "never" "cond" "always" ]);
|
|
|
|
default = null;
|
|
|
|
description = ''
|
|
|
|
Whether the hypervisor should flush the L1 data cache before
|
|
|
|
entering guests.
|
2018-12-26 22:24:04 +01:00
|
|
|
See also <xref linkend="opt-security.allowSimultaneousMultithreading"/>.
|
2018-12-26 22:22:55 +01:00
|
|
|
|
2019-05-13 09:15:17 +02:00
|
|
|
<variablelist>
|
2018-12-26 22:22:55 +01:00
|
|
|
<varlistentry>
|
|
|
|
<term><literal>null</literal></term>
|
|
|
|
<listitem><para>uses the kernel default</para></listitem>
|
|
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>"never"</literal></term>
|
|
|
|
<listitem><para>disables L1 data cache flushing entirely.
|
|
|
|
May be appropriate if all guests are trusted.</para></listitem>
|
|
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>"cond"</literal></term>
|
|
|
|
<listitem><para>flushes L1 data cache only for pre-determined
|
|
|
|
code paths. May leak information about the host address space
|
|
|
|
layout.</para></listitem>
|
|
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
|
|
<term><literal>"always"</literal></term>
|
|
|
|
<listitem><para>flushes L1 data cache every time the hypervisor
|
|
|
|
enters the guest. May incur significant performance cost.
|
|
|
|
</para></listitem>
|
|
|
|
</varlistentry>
|
2019-05-13 09:15:17 +02:00
|
|
|
</variablelist>
|
2018-12-26 22:22:55 +01:00
|
|
|
'';
|
|
|
|
};
|
2018-10-15 00:39:26 +02:00
|
|
|
};
|
|
|
|
|
2018-11-24 18:37:46 +01:00
|
|
|
config = mkMerge [
|
|
|
|
(mkIf (!config.security.allowUserNamespaces) {
|
|
|
|
# Setting the number of allowed user namespaces to 0 effectively disables
|
|
|
|
# the feature at runtime. Note that root may raise the limit again
|
|
|
|
# at any time.
|
|
|
|
boot.kernel.sysctl."user.max_user_namespaces" = 0;
|
2018-10-15 00:39:26 +02:00
|
|
|
|
2018-11-24 18:37:46 +01:00
|
|
|
assertions = [
|
2021-11-19 23:36:26 +01:00
|
|
|
{ assertion = config.nix.settings.sandbox -> config.security.allowUserNamespaces;
|
|
|
|
message = "`nix.settings.sandbox = true` conflicts with `!security.allowUserNamespaces`.";
|
2018-11-24 18:37:46 +01:00
|
|
|
}
|
|
|
|
];
|
|
|
|
})
|
2018-12-16 10:37:36 +01:00
|
|
|
|
2020-08-23 12:17:53 +02:00
|
|
|
(mkIf config.security.unprivilegedUsernsClone {
|
|
|
|
boot.kernel.sysctl."kernel.unprivileged_userns_clone" = mkDefault true;
|
|
|
|
})
|
|
|
|
|
2018-12-16 10:37:36 +01:00
|
|
|
(mkIf config.security.protectKernelImage {
|
|
|
|
# Disable hibernation (allows replacing the running kernel)
|
|
|
|
boot.kernelParams = [ "nohibernate" ];
|
|
|
|
# Prevent replacing the running kernel image w/o reboot
|
|
|
|
boot.kernel.sysctl."kernel.kexec_load_disabled" = mkDefault true;
|
|
|
|
})
|
2018-12-26 22:22:55 +01:00
|
|
|
|
2018-12-26 22:24:04 +01:00
|
|
|
(mkIf (!config.security.allowSimultaneousMultithreading) {
|
|
|
|
boot.kernelParams = [ "nosmt" ];
|
|
|
|
})
|
|
|
|
|
2019-07-30 02:24:56 +02:00
|
|
|
(mkIf config.security.forcePageTableIsolation {
|
|
|
|
boot.kernelParams = [ "pti=on" ];
|
|
|
|
})
|
|
|
|
|
2019-07-19 15:49:37 +02:00
|
|
|
(mkIf (config.security.virtualisation.flushL1DataCache != null) {
|
|
|
|
boot.kernelParams = [ "kvm-intel.vmentry_l1d_flush=${config.security.virtualisation.flushL1DataCache}" ];
|
2018-12-26 22:22:55 +01:00
|
|
|
})
|
2018-11-24 18:37:46 +01:00
|
|
|
];
|
2018-10-15 00:39:26 +02:00
|
|
|
}
|