opencv4: respect config.cudaCapabilities

This is needed for faster builds when debugging the opencv derivation, and it's more consistent with other cuda-enabled packages -DCUDA_GENERATION seems to expect architecture names, so we refactor cudaFlags to facilitate easier extraction of the configured archnames
2023-02-27 14:58:14 +02:00 · 2023-02-27 14:58:14 +02:00 · d378cc6fb2
commit d378cc6fb2
parent e305011223
6 changed files with 99 additions and 60 deletions
--- a/pkgs/applications/science/math/mxnet/default.nix
+++ b/pkgs/applications/science/math/mxnet/default.nix
@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
      "-DUSE_OLDCMAKECUDA=ON"  # see https://github.com/apache/incubator-mxnet/issues/10743
      "-DCUDA_ARCH_NAME=All"
      "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
-      "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
+      "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}"
    ] else [ "-DUSE_CUDA=OFF" ])
    ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

--- a/pkgs/development/compilers/cudatoolkit/flags.nix
+++ b/pkgs/development/compilers/cudatoolkit/flags.nix
@ -18,8 +18,15 @@ let
  # from improved performance, reduced file size, or greater hardware suppport by
  # passing a configuration based on your specific GPU environment.
  #
-  # config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
-  # config.cudaForwardCompat: bool for compatibility with future GPU generations
+  # config.cudaCapabilities :: List Capability
+  # List of hardware generations to build
+  # Last item is considered the optional forward-compatibility arch
+  # E.g. [ "8.0" ]
+  #
+  # config.cudaForwardCompat :: Bool
+  # Whether to include the forward compatibility gencode (+PTX)
+  # to support future GPU generations:
+  # E.g. true
  #
  # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351

@ -39,6 +46,9 @@ let
  # GPUs which are supported by the provided CUDA version.
  supportedGpus = builtins.filter isSupported gpus;

+  # supportedCapabilities :: List Capability
+  supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
+
  # cudaArchNameToVersions :: AttrSet String (List String)
  # Maps the name of a GPU architecture to different versions of that architecture.
  # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
@ -49,12 +59,6 @@ let
      (gpu: gpu.archName)
      supportedGpus;

-  # cudaArchNames :: List String
-  # NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
-  #   otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
-  #   from is already sorted, so we'll preserve that order here.
-  cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);
-
  # cudaComputeCapabilityToName :: AttrSet String String
  # Maps the version of a GPU architecture to the name of that architecture.
  # For example, "8.0" maps to "Ampere".
@ -67,23 +71,6 @@ let
      supportedGpus
  );

-  # cudaComputeCapabilities :: List String
-  # NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
-  #   otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
-  #   from is already sorted, so we'll preserve that order here.
-  # Use the user-provided list of CUDA capabilities if it's provided.
-  cudaComputeCapabilities = config.cudaCapabilities
-    or (lists.map (gpu: gpu.computeCapability) supportedGpus);
-
-  # cudaForwardComputeCapability :: String
-  cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";
-
-  # cudaComputeCapabilitiesAndForward :: List String
-  # The list of supported CUDA architectures, including the forward compatibility architecture.
-  # If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
-  cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
-    ++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;
-
  # dropDot :: String -> String
  dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;

@ -101,38 +88,79 @@ let
    "-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
  );

-  # cudaRealArches :: List String
-  # The real architectures are physical architectures supported by the CUDA version.
-  # For example, "sm_80".
-  cudaRealArches = archMapper "sm" cudaComputeCapabilities;
+  formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec {
+    inherit cudaCapabilities enableForwardCompat;

-  # cudaVirtualArches :: List String
-  # The virtual architectures are typically used for forward compatibility, when trying to support
-  # an architecture newer than the CUDA version allows.
-  # For example, "compute_80".
-  cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;
+    # forwardCapability :: String
+    # Forward "compute" capability, a.k.a PTX
+    # E.g. "8.6+PTX"
+    forwardCapability = (lists.last cudaCapabilities) + "+PTX";

-  # cudaArches :: List String
-  # By default, build for all supported architectures and forward compatibility via a virtual
-  # architecture for the newest supported architecture.
-  cudaArches = cudaRealArches ++
-    lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);
+    # capabilitiesAndForward :: List String
+    # The list of supported CUDA architectures, including the forward compatibility architecture.
+    # If forward compatibility is disabled, this will be the same as cudaCapabilities.
+    # E.g. [ "7.5" "8.6" "8.6+PTX" ]
+    capabilitiesAndForward = cudaCapabilities ++ lists.optionals enableForwardCompat [ forwardCapability ];

-  # cudaGencode :: List String
-  # A list of CUDA gencode arguments to pass to NVCC.
-  cudaGencode =
-    let
-      base = gencodeMapper "sm" cudaComputeCapabilities;
-      forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
-    in
-    base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;
+    # archNames :: List String
+    # E.g. [ "Turing" "Ampere" ]
+    archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities);
+
+    # realArches :: List String
+    # The real architectures are physical architectures supported by the CUDA version.
+    # E.g. [ "sm_75" "sm_86" ]
+    realArches = archMapper "sm" cudaCapabilities;
+
+    # virtualArches :: List String
+    # The virtual architectures are typically used for forward compatibility, when trying to support
+    # an architecture newer than the CUDA version allows.
+    # E.g. [ "compute_75" "compute_86" ]
+    virtualArches = archMapper "compute" cudaCapabilities;
+
+    # arches :: List String
+    # By default, build for all supported architectures and forward compatibility via a virtual
+    # architecture for the newest supported architecture.
+    # E.g. [ "sm_75" "sm_86" "compute_86" ]
+    arches = realArches ++
+      lists.optional enableForwardCompat (lists.last virtualArches);
+
+    # gencode :: List String
+    # A list of CUDA gencode arguments to pass to NVCC.
+    # E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ]
+    gencode =
+      let
+        base = gencodeMapper "sm" cudaCapabilities;
+        forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ];
+      in
+      base ++ lib.optionals enableForwardCompat forward;
+  };

 in
+# When changing names or formats: pause, validate, and update the assert
+assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
+  cudaCapabilities = [ "7.5" "8.6" ];
+  enableForwardCompat = true;
+
+  capabilitiesAndForward = [ "7.5" "8.6" "8.6+PTX" ];
+  forwardCapability = "8.6+PTX";
+
+  archNames = [ "Turing" "Ampere" ];
+  realArches = [ "sm_75" "sm_86" ];
+  virtualArches = [ "compute_75" "compute_86" ];
+  arches = [ "sm_75" "sm_86" "compute_86" ];
+
+  gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ];
+};
 {
-  inherit
-    cudaArchNames
-    cudaArchNameToVersions cudaComputeCapabilityToName
-    cudaRealArches cudaVirtualArches cudaArches
-    cudaGencode;
-  cudaCapabilities = cudaComputeCapabilitiesAndForward;
+  # formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } ->  { ... }
+  inherit formatCapabilities;
+
+  # cudaArchNameToVersions :: String => String
+  inherit cudaArchNameToVersions;
+
+  # cudaComputeCapabilityToName :: String => String
+  inherit cudaComputeCapabilityToName;
+} // formatCapabilities {
+  cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
+  enableForwardCompat = config.cudaForwardCompat or true;
 }
--- a/pkgs/development/libraries/opencv/4.x.nix
+++ b/pkgs/development/libraries/opencv/4.x.nix
@ -37,7 +37,7 @@
 , enableContrib ? true

 , enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64
-, cudatoolkit
+, cudaPackages ? { }
 , nvidia-optical-flow-sdk

 , enableUnfree ? false
@ -79,6 +79,9 @@
 }:

 let
+  inherit (cudaPackages) cudatoolkit;
+  inherit (cudaPackages.cudaFlags) cudaCapabilities;
+
  version = "4.7.0";

  src = fetchFromGitHub {
@ -342,6 +345,14 @@ stdenv.mkDerivation {
    "-DCUDA_FAST_MATH=ON"
    "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
    "-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr"
+
+    # OpenCV respects at least three variables:
+    # -DCUDA_GENERATION takes a single arch name, e.g. Volta
+    # -DCUDA_ARCH_BIN takes a semi-colon separated list of real arches, e.g. "8.0;8.6"
+    # -DCUDA_ARCH_PTX takes the virtual arch, e.g. "8.6"
+    "-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}"
+    "-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}"
+
    "-DNVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH=${nvidia-optical-flow-sdk}"
  ] ++ lib.optionals stdenv.isDarwin [
    "-DWITH_OPENCL=OFF"
--- a/pkgs/development/libraries/science/math/magma/generic.nix
+++ b/pkgs/development/libraries/science/math/magma/generic.nix
@ -37,13 +37,13 @@ let
  #   lists.subtractLists a b = b - a

  # For CUDA
-  supportedCudaSmArches = lists.intersectLists cudaFlags.cudaRealArches supportedGpuTargets;
+  supportedCudaSmArches = lists.intersectLists cudaFlags.realArches supportedGpuTargets;
  # Subtract the supported SM architectures from the real SM architectures to get the unsupported
  # SM architectures.
-  unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.cudaRealArches;
+  unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.realArches;

  # For ROCm
-  # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.cudaRealArches.
+  # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches.
  #   For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must
  #   remove it.
  rocmArches = lists.map (x: strings.removePrefix "gfx" x) hip.gpuTargets;
--- a/pkgs/development/libraries/science/math/nccl/default.nix
+++ b/pkgs/development/libraries/science/math/nccl/default.nix
@ -10,7 +10,7 @@ with cudaPackages;

 let
  # Output looks like "-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86"
-  gencode = lib.concatStringsSep " " cudaFlags.cudaGencode;
+  gencode = lib.concatStringsSep " " cudaFlags.gencode;
 in
 backendStdenv.mkDerivation rec {
  name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}";
--- a/pkgs/development/python-modules/jaxlib/default.nix
+++ b/pkgs/development/python-modules/jaxlib/default.nix
@ -164,7 +164,7 @@ let
      build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
      build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
      build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
-      build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
+      build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}"
    '' + ''
      CFG
    '';