From d924de58be179e38ce352171055c2832c3ca4d97 Mon Sep 17 00:00:00 2001 From: Serge K Date: Tue, 19 Apr 2022 21:52:52 +0300 Subject: [PATCH] cudaPackages.cudnn: migrate to redist cuda, fix missing zlib (#168748) * cudaPackages.cudnn: migrate to redist cudaPackages * cudaPackages.cudnn: fix missing zlib in rpath * cudaPackages.cudnn: remove the propagated cudatoolkit * cudaPackages.cudnn: use autoPatchelfHook instead of custom find ... -exec ldd | grep routine mark libcudnn_cnn_infer.so as needed for libcudnn.so on cudnn>=8.0.5 - a hint for autoPatchelf, as an alternative to manually adding $ORIGIN * cudaPackages.cudnn: use automatic hooks for rpath as a more common way to use addOpenGLRunpath and autoPatchelf with cudaPackages * cudaPackages.cudnn: consume individual cuda packages ...since cudnn is part of the cuda package set - introduces the scary useCudatoolkitRunfile function argument to discourage usage of the runfile-based cudatoolkit - instead of the rather hidden useRedist term in let ... in - repeats cudatoolkit_root pattern after cuda_joined in pytorch &c (the "toolchain view") - redist packages are marked optional to support cuda<11.4 where the attributes for redist packages do not exist * cudaPackages.cudnn: update to pname+version Co-authored-by: Sandro Co-authored-by: Sandro --- .../science/math/cudnn/extension.nix | 6 +- .../libraries/science/math/cudnn/generic.nix | 80 +++++++++++-------- pkgs/games/katago/default.nix | 1 + 3 files changed, 54 insertions(+), 33 deletions(-) diff --git a/pkgs/development/libraries/science/math/cudnn/extension.nix b/pkgs/development/libraries/science/math/cudnn/extension.nix index 172935fc9794..f1bdfb9836ed 100644 --- a/pkgs/development/libraries/science/math/cudnn/extension.nix +++ b/pkgs/development/libraries/science/math/cudnn/extension.nix @@ -5,7 +5,11 @@ final: prev: let ### CuDNN - buildCuDnnPackage = args: callPackage ./generic.nix {} args; + buildCuDnnPackage = args: + let + useCudatoolkitRunfile = lib.versionOlder cudaVersion "11.3.999"; + in + callPackage ./generic.nix { inherit useCudatoolkitRunfile; } args; toUnderscore = str: lib.replaceStrings ["."] ["_"] str; diff --git a/pkgs/development/libraries/science/math/cudnn/generic.nix b/pkgs/development/libraries/science/math/cudnn/generic.nix index 2e53bdcdfc4e..1e9bac5efed2 100644 --- a/pkgs/development/libraries/science/math/cudnn/generic.nix +++ b/pkgs/development/libraries/science/math/cudnn/generic.nix @@ -1,8 +1,14 @@ { stdenv , lib -, cudatoolkit +, zlib +, useCudatoolkitRunfile ? false +, cudaVersion +, cudaMajorVersion +, cudatoolkit # if cuda>=11: only used for .cc +, libcublas ? null # cuda <11 doesn't ship redist packages +, autoPatchelfHook +, autoAddOpenGLRunpathHook , fetchurl -, addOpenGLRunpath , # The distributed version of CUDNN includes both dynamically liked .so files, # as well as statically linked .a files. However, CUDNN is quite large # (multiple gigabytes), so you can save some space in your nix store by @@ -17,44 +23,53 @@ , url , hash ? null , sha256 ? null -, supportedCudaVersions ? [] +, supportedCudaVersions ? [ ] }: assert (hash != null) || (sha256 != null); +assert useCudatoolkitRunfile || (libcublas != null); + let + inherit (cudatoolkit) cc; + majorMinorPatch = version: lib.concatStringsSep "." (lib.take 3 (lib.splitVersion version)); version = majorMinorPatch fullVersion; -in stdenv.mkDerivation { - name = "cudatoolkit-${cudatoolkit.majorVersion}-cudnn-${version}"; + cudatoolkit_root = + if useCudatoolkitRunfile + then cudatoolkit + else libcublas; +in +stdenv.mkDerivation { + pname = "cudatoolkit-${cudaMajorVersion}-cudnn"; inherit version; - # It's often the case that the src depends on the version of cudatoolkit it's - # being linked against, so we pass in `cudatoolkit` as an argument to `mkSrc`. + src = fetchurl { inherit url hash sha256; }; - nativeBuildInputs = [ addOpenGLRunpath ]; + # Check and normalize Runpath against DT_NEEDED using autoPatchelf. + # Prepend /run/opengl-driver/lib using addOpenGLRunpath for dlopen("libcudacuda.so") + nativeBuildInputs = [ + autoPatchelfHook + autoAddOpenGLRunpathHook + ]; - # Some cuDNN libraries depend on things in cudatoolkit, eg. - # libcudnn_ops_infer.so.8 tries to load libcublas.so.11. So we need to patch - # cudatoolkit into RPATH. See also https://github.com/NixOS/nixpkgs/blob/88a2ad974692a5c3638fcdc2c772e5770f3f7b21/pkgs/development/python-modules/jaxlib/bin.nix#L78-L98. + # Used by autoPatchelfHook + buildInputs = [ + cc.cc.lib # libstdc++ + zlib + cudatoolkit_root + ]; + + # We used to patch Runpath here, but now we use autoPatchelfHook # # Note also that version <=8.3.0 contained a subdirectory "lib64/" but in # version 8.3.2 it seems to have been renamed to simply "lib/". installPhase = '' runHook preInstall - function fixRunPath { - p=$(patchelf --print-rpath $1) - patchelf --set-rpath "''${p:+$p:}${lib.makeLibraryPath [ stdenv.cc.cc cudatoolkit.lib ]}:${cudatoolkit}/lib:\$ORIGIN/" $1 - } - - for sofile in {lib,lib64}/lib*.so; do - fixRunPath $sofile - done - mkdir -p $out cp -a include $out/include [ -d "lib/" ] && cp -a lib $out/lib @@ -66,20 +81,20 @@ in stdenv.mkDerivation { runHook postInstall ''; - # Set RUNPATH so that libcuda in /run/opengl-driver(-32)/lib can be found. - # See the explanation in addOpenGLRunpath. - postFixup = '' - for lib in $out/lib/lib*.so; do - addOpenGLRunpath $lib - done + # Without --add-needed autoPatchelf forgets $ORIGIN on cuda>=8.0.5. + postFixup = lib.optionalString (lib.versionAtLeast fullVersion "8.0.5") '' + patchelf $out/lib/libcudnn.so --add-needed libcudnn_cnn_infer.so ''; - propagatedBuildInputs = [ - cudatoolkit - ]; - passthru = { - inherit cudatoolkit; + inherit useCudatoolkitRunfile; + + cudatoolkit = lib.warn '' + cudnn.cudatoolkit passthru attribute is deprecated; + if your derivation uses cudnn directly, it should probably consume cudaPackages instead + '' + cudatoolkit; + majorVersion = lib.versions.major version; }; @@ -89,9 +104,10 @@ in stdenv.mkDerivation { # official version constraints (as recorded in default.nix). In some cases # you _may_ be able to smudge version constraints, just know that you're # embarking into unknown and unsupported territory when doing so. - broken = !(elem cudatoolkit.majorMinorVersion supportedCudaVersions); + broken = !(elem cudaVersion supportedCudaVersions); description = "NVIDIA CUDA Deep Neural Network library (cuDNN)"; homepage = "https://developer.nvidia.com/cudnn"; + # TODO: consider marking unfreRedistributable when not using runfile license = licenses.unfree; platforms = [ "x86_64-linux" ]; maintainers = with maintainers; [ mdaiter samuela ]; diff --git a/pkgs/games/katago/default.nix b/pkgs/games/katago/default.nix index 6328b3f9bc82..a3d7ed5cba9d 100644 --- a/pkgs/games/katago/default.nix +++ b/pkgs/games/katago/default.nix @@ -52,6 +52,7 @@ stdenv.mkDerivation rec { eigen ] ++ lib.optionals (enableGPU && enableCuda) [ cudaPackages.cudnn + cudaPackages.cudatoolkit mesa.drivers ] ++ lib.optionals (enableGPU && !enableCuda) [ opencl-headers