opencv4: respect config.cudaCapabilities
This is needed for faster builds when debugging the opencv derivation, and it's more consistent with other cuda-enabled packages -DCUDA_GENERATION seems to expect architecture names, so we refactor cudaFlags to facilitate easier extraction of the configured archnames
This commit is contained in:
parent
e305011223
commit
d378cc6fb2
6 changed files with 99 additions and 60 deletions
|
@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
|
|||
"-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743
|
||||
"-DCUDA_ARCH_NAME=All"
|
||||
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
|
||||
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
|
||||
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}"
|
||||
] else [ "-DUSE_CUDA=OFF" ])
|
||||
++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";
|
||||
|
||||
|
|
|
@ -18,8 +18,15 @@ let
|
|||
# from improved performance, reduced file size, or greater hardware suppport by
|
||||
# passing a configuration based on your specific GPU environment.
|
||||
#
|
||||
# config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
|
||||
# config.cudaForwardCompat: bool for compatibility with future GPU generations
|
||||
# config.cudaCapabilities :: List Capability
|
||||
# List of hardware generations to build
|
||||
# Last item is considered the optional forward-compatibility arch
|
||||
# E.g. [ "8.0" ]
|
||||
#
|
||||
# config.cudaForwardCompat :: Bool
|
||||
# Whether to include the forward compatibility gencode (+PTX)
|
||||
# to support future GPU generations:
|
||||
# E.g. true
|
||||
#
|
||||
# Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351
|
||||
|
||||
|
@ -39,6 +46,9 @@ let
|
|||
# GPUs which are supported by the provided CUDA version.
|
||||
supportedGpus = builtins.filter isSupported gpus;
|
||||
|
||||
# supportedCapabilities :: List Capability
|
||||
supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
|
||||
|
||||
# cudaArchNameToVersions :: AttrSet String (List String)
|
||||
# Maps the name of a GPU architecture to different versions of that architecture.
|
||||
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
|
||||
|
@ -49,12 +59,6 @@ let
|
|||
(gpu: gpu.archName)
|
||||
supportedGpus;
|
||||
|
||||
# cudaArchNames :: List String
|
||||
# NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
|
||||
# otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
|
||||
# from is already sorted, so we'll preserve that order here.
|
||||
cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);
|
||||
|
||||
# cudaComputeCapabilityToName :: AttrSet String String
|
||||
# Maps the version of a GPU architecture to the name of that architecture.
|
||||
# For example, "8.0" maps to "Ampere".
|
||||
|
@ -67,23 +71,6 @@ let
|
|||
supportedGpus
|
||||
);
|
||||
|
||||
# cudaComputeCapabilities :: List String
|
||||
# NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
|
||||
# otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
|
||||
# from is already sorted, so we'll preserve that order here.
|
||||
# Use the user-provided list of CUDA capabilities if it's provided.
|
||||
cudaComputeCapabilities = config.cudaCapabilities
|
||||
or (lists.map (gpu: gpu.computeCapability) supportedGpus);
|
||||
|
||||
# cudaForwardComputeCapability :: String
|
||||
cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";
|
||||
|
||||
# cudaComputeCapabilitiesAndForward :: List String
|
||||
# The list of supported CUDA architectures, including the forward compatibility architecture.
|
||||
# If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
|
||||
cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
|
||||
++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;
|
||||
|
||||
# dropDot :: String -> String
|
||||
dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;
|
||||
|
||||
|
@ -101,38 +88,79 @@ let
|
|||
"-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
|
||||
);
|
||||
|
||||
# cudaRealArches :: List String
|
||||
# The real architectures are physical architectures supported by the CUDA version.
|
||||
# For example, "sm_80".
|
||||
cudaRealArches = archMapper "sm" cudaComputeCapabilities;
|
||||
formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec {
|
||||
inherit cudaCapabilities enableForwardCompat;
|
||||
|
||||
# cudaVirtualArches :: List String
|
||||
# The virtual architectures are typically used for forward compatibility, when trying to support
|
||||
# an architecture newer than the CUDA version allows.
|
||||
# For example, "compute_80".
|
||||
cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;
|
||||
# forwardCapability :: String
|
||||
# Forward "compute" capability, a.k.a PTX
|
||||
# E.g. "8.6+PTX"
|
||||
forwardCapability = (lists.last cudaCapabilities) + "+PTX";
|
||||
|
||||
# cudaArches :: List String
|
||||
# By default, build for all supported architectures and forward compatibility via a virtual
|
||||
# architecture for the newest supported architecture.
|
||||
cudaArches = cudaRealArches ++
|
||||
lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);
|
||||
# capabilitiesAndForward :: List String
|
||||
# The list of supported CUDA architectures, including the forward compatibility architecture.
|
||||
# If forward compatibility is disabled, this will be the same as cudaCapabilities.
|
||||
# E.g. [ "7.5" "8.6" "8.6+PTX" ]
|
||||
capabilitiesAndForward = cudaCapabilities ++ lists.optionals enableForwardCompat [ forwardCapability ];
|
||||
|
||||
# cudaGencode :: List String
|
||||
# A list of CUDA gencode arguments to pass to NVCC.
|
||||
cudaGencode =
|
||||
let
|
||||
base = gencodeMapper "sm" cudaComputeCapabilities;
|
||||
forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
|
||||
in
|
||||
base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;
|
||||
# archNames :: List String
|
||||
# E.g. [ "Turing" "Ampere" ]
|
||||
archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities);
|
||||
|
||||
# realArches :: List String
|
||||
# The real architectures are physical architectures supported by the CUDA version.
|
||||
# E.g. [ "sm_75" "sm_86" ]
|
||||
realArches = archMapper "sm" cudaCapabilities;
|
||||
|
||||
# virtualArches :: List String
|
||||
# The virtual architectures are typically used for forward compatibility, when trying to support
|
||||
# an architecture newer than the CUDA version allows.
|
||||
# E.g. [ "compute_75" "compute_86" ]
|
||||
virtualArches = archMapper "compute" cudaCapabilities;
|
||||
|
||||
# arches :: List String
|
||||
# By default, build for all supported architectures and forward compatibility via a virtual
|
||||
# architecture for the newest supported architecture.
|
||||
# E.g. [ "sm_75" "sm_86" "compute_86" ]
|
||||
arches = realArches ++
|
||||
lists.optional enableForwardCompat (lists.last virtualArches);
|
||||
|
||||
# gencode :: List String
|
||||
# A list of CUDA gencode arguments to pass to NVCC.
|
||||
# E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ]
|
||||
gencode =
|
||||
let
|
||||
base = gencodeMapper "sm" cudaCapabilities;
|
||||
forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ];
|
||||
in
|
||||
base ++ lib.optionals enableForwardCompat forward;
|
||||
};
|
||||
|
||||
in
|
||||
# When changing names or formats: pause, validate, and update the assert
|
||||
assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
|
||||
cudaCapabilities = [ "7.5" "8.6" ];
|
||||
enableForwardCompat = true;
|
||||
|
||||
capabilitiesAndForward = [ "7.5" "8.6" "8.6+PTX" ];
|
||||
forwardCapability = "8.6+PTX";
|
||||
|
||||
archNames = [ "Turing" "Ampere" ];
|
||||
realArches = [ "sm_75" "sm_86" ];
|
||||
virtualArches = [ "compute_75" "compute_86" ];
|
||||
arches = [ "sm_75" "sm_86" "compute_86" ];
|
||||
|
||||
gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ];
|
||||
};
|
||||
{
|
||||
inherit
|
||||
cudaArchNames
|
||||
cudaArchNameToVersions cudaComputeCapabilityToName
|
||||
cudaRealArches cudaVirtualArches cudaArches
|
||||
cudaGencode;
|
||||
cudaCapabilities = cudaComputeCapabilitiesAndForward;
|
||||
# formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } -> { ... }
|
||||
inherit formatCapabilities;
|
||||
|
||||
# cudaArchNameToVersions :: String => String
|
||||
inherit cudaArchNameToVersions;
|
||||
|
||||
# cudaComputeCapabilityToName :: String => String
|
||||
inherit cudaComputeCapabilityToName;
|
||||
} // formatCapabilities {
|
||||
cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
|
||||
enableForwardCompat = config.cudaForwardCompat or true;
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@
|
|||
, enableContrib ? true
|
||||
|
||||
, enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64
|
||||
, cudatoolkit
|
||||
, cudaPackages ? { }
|
||||
, nvidia-optical-flow-sdk
|
||||
|
||||
, enableUnfree ? false
|
||||
|
@ -79,6 +79,9 @@
|
|||
}:
|
||||
|
||||
let
|
||||
inherit (cudaPackages) cudatoolkit;
|
||||
inherit (cudaPackages.cudaFlags) cudaCapabilities;
|
||||
|
||||
version = "4.7.0";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
|
@ -342,6 +345,14 @@ stdenv.mkDerivation {
|
|||
"-DCUDA_FAST_MATH=ON"
|
||||
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
|
||||
"-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr"
|
||||
|
||||
# OpenCV respects at least three variables:
|
||||
# -DCUDA_GENERATION takes a single arch name, e.g. Volta
|
||||
# -DCUDA_ARCH_BIN takes a semi-colon separated list of real arches, e.g. "8.0;8.6"
|
||||
# -DCUDA_ARCH_PTX takes the virtual arch, e.g. "8.6"
|
||||
"-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}"
|
||||
"-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}"
|
||||
|
||||
"-DNVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH=${nvidia-optical-flow-sdk}"
|
||||
] ++ lib.optionals stdenv.isDarwin [
|
||||
"-DWITH_OPENCL=OFF"
|
||||
|
|
|
@ -37,13 +37,13 @@ let
|
|||
# lists.subtractLists a b = b - a
|
||||
|
||||
# For CUDA
|
||||
supportedCudaSmArches = lists.intersectLists cudaFlags.cudaRealArches supportedGpuTargets;
|
||||
supportedCudaSmArches = lists.intersectLists cudaFlags.realArches supportedGpuTargets;
|
||||
# Subtract the supported SM architectures from the real SM architectures to get the unsupported
|
||||
# SM architectures.
|
||||
unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.cudaRealArches;
|
||||
unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.realArches;
|
||||
|
||||
# For ROCm
|
||||
# NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.cudaRealArches.
|
||||
# NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches.
|
||||
# For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must
|
||||
# remove it.
|
||||
rocmArches = lists.map (x: strings.removePrefix "gfx" x) hip.gpuTargets;
|
||||
|
|
|
@ -10,7 +10,7 @@ with cudaPackages;
|
|||
|
||||
let
|
||||
# Output looks like "-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86"
|
||||
gencode = lib.concatStringsSep " " cudaFlags.cudaGencode;
|
||||
gencode = lib.concatStringsSep " " cudaFlags.gencode;
|
||||
in
|
||||
backendStdenv.mkDerivation rec {
|
||||
name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}";
|
||||
|
|
|
@ -164,7 +164,7 @@ let
|
|||
build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}"
|
||||
build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}"
|
||||
build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}"
|
||||
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}"
|
||||
build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}"
|
||||
'' + ''
|
||||
CFG
|
||||
'';
|
||||
|
|
Loading…
Reference in a new issue