Merge pull request #168380 from SomeoneSerge/faiss-cuda-split

faiss: use the redistributable cuda
2023-04-01 20:59:29 -04:00 · 2023-04-01 20:59:29 -04:00 · 6c77085e6a
commit 6c77085e6a
parent 2a969fc1be 896c77f9db
3 changed files with 157 additions and 7 deletions
--- a/pkgs/development/libraries/nvidia-thrust/default.nix
+++ b/pkgs/development/libraries/nvidia-thrust/default.nix
@ -0,0 +1,102 @@
+{ lib
+, config
+, fetchFromGitHub
+, stdenv
+, cmake
+, pkg-config
+, cudaPackages ? { }
+, symlinkJoin
+, tbb
+, hostSystem ? "CPP"
+, deviceSystem ? if config.cudaSupport or false then "CUDA" else "OMP"
+}:
+
+# Policy for device_vector<T>
+assert builtins.elem deviceSystem [
+  "CPP" # Serial on CPU
+  "OMP" # Parallel with OpenMP
+  "TBB" # Parallel with Intel TBB
+  "CUDA" # Parallel on GPU
+];
+
+# Policy for host_vector<T>
+# Always lives on CPU, but execution can be made parallel
+assert builtins.elem hostSystem [ "CPP" "OMP" "TBB" ];
+
+let
+  pname = "nvidia-thrust";
+  version = "1.16.0";
+
+  inherit (cudaPackages) backendStdenv cudaFlags;
+  cudaCapabilities = map cudaFlags.dropDot cudaFlags.cudaCapabilities;
+
+  tbbSupport = builtins.elem "TBB" [ deviceSystem hostSystem ];
+  cudaSupport = deviceSystem == "CUDA";
+
+  # TODO: Would like to use this:
+  cudaJoined = symlinkJoin {
+    name = "cuda-packages-unsplit";
+    paths = with cudaPackages; [
+      cuda_nvcc
+      cuda_nvrtc # symbols: cudaLaunchDevice, &c; notice postBuild
+      cuda_cudart # cuda_runtime.h
+      libcublas
+    ];
+    postBuild = ''
+      ln -s $out/lib $out/lib64
+    '';
+  };
+in
+stdenv.mkDerivation {
+  inherit pname version;
+
+  src = fetchFromGitHub {
+    owner = "NVIDIA";
+    repo = "thrust";
+    rev = version;
+    fetchSubmodules = true;
+    hash = "sha256-/EyznxWKuHuvHNjq+SQg27IaRbtkjXR2zlo2YgCWmUQ=";
+  };
+
+  # NVIDIA's "compiler hacks" seem like work-arounds for legacy toolchains and
+  # cause us errors such as:
+  # > Thrust's test harness uses CMAKE_CXX_COMPILER for the CUDA host compiler.
+  # > Refusing to overwrite specified CMAKE_CUDA_HOST_COMPILER
+  # So we un-fix cmake after them:
+  postPatch = ''
+    echo > cmake/ThrustCompilerHacks.cmake
+  '';
+
+  buildInputs = lib.optionals tbbSupport [ tbb ];
+
+  nativeBuildInputs = [
+    cmake
+    pkg-config
+  ] ++ lib.optionals cudaSupport [
+    # Goes in native build inputs because thrust looks for headers
+    # in a path relative to nvcc...
+    cudaJoined
+  ];
+
+  cmakeFlags = [
+    "-DTHRUST_INCLUDE_CUB_CMAKE=${if cudaSupport then "ON" else "OFF"}"
+    "-DTHRUST_DEVICE_SYSTEM=${deviceSystem}"
+    "-DTHRUST_HOST_SYSTEM=${hostSystem}"
+    "-DTHRUST_AUTO_DETECT_COMPUTE_ARCHS=OFF"
+    "-DTHRUST_DISABLE_ARCH_BY_DEFAULT=ON"
+  ] ++ lib.optionals cudaFlags.enableForwardCompat [
+    "-DTHRUST_ENABLE_COMPUTE_FUTURE=ON"
+  ] ++ map (sm: "THRUST_ENABLE_COMPUTE_${sm}") cudaCapabilities;
+
+  passthru = {
+    inherit cudaSupport cudaPackages cudaJoined;
+  };
+
+  meta = with lib; {
+    description = "A high-level C++ parallel algorithms library that builds on top of CUDA, TBB, OpenMP, etc";
+    homepage = "https://github.com/NVIDIA/thrust";
+    license = licenses.asl20;
+    platforms = platforms.unix;
+    maintainers = with maintainers; [ SomeoneSerge ];
+  };
+}
--- a/pkgs/development/libraries/science/math/faiss/default.nix
+++ b/pkgs/development/libraries/science/math/faiss/default.nix
@ -1,21 +1,24 @@
 { lib
 , config
 , fetchFromGitHub
+, symlinkJoin
 , stdenv
 , cmake
-, cudaPackages
+, cudaPackages ? { }
 , cudaSupport ? config.cudaSupport or false
-, cudaCapabilities ? [ "60" "70" "80" "86" ]
+, nvidia-thrust
+, useThrustSourceBuild ? true
 , pythonSupport ? true
 , pythonPackages
 , llvmPackages
+, boost
 , blas
 , swig
 , addOpenGLRunpath
 , optLevel ? let
    optLevels =
-      lib.optional stdenv.hostPlatform.avx2Support "avx2"
-      ++ lib.optional stdenv.hostPlatform.sse4_1Support "sse4"
+      lib.optionals stdenv.hostPlatform.avx2Support [ "avx2" ]
+      ++ lib.optionals stdenv.hostPlatform.sse4_1Support [ "sse4" ]
      ++ [ "generic" ];
  in
  # Choose the maximum available optimization level
@ -24,10 +27,31 @@
 , runCommand
 }:

+assert cudaSupport -> nvidia-thrust.cudaSupport;
+
 let
  pname = "faiss";
  version = "1.7.2";
-  inherit (cudaPackages) cudatoolkit;
+
+  inherit (cudaPackages) cudaFlags;
+  inherit (cudaFlags) cudaCapabilities dropDot;
+
+  cudaJoined = symlinkJoin {
+    name = "cuda-packages-unsplit";
+    paths = with cudaPackages; [
+      cuda_cudart # cuda_runtime.h
+      libcublas
+      libcurand
+    ] ++ lib.optionals useThrustSourceBuild [
+      nvidia-thrust
+    ] ++ lib.optionals (!useThrustSourceBuild) [
+      cuda_cccl
+    ] ++ lib.optionals (cudaPackages ? cuda_profiler_api) [
+      cuda_profiler_api # cuda_profiler_api.h
+    ] ++ lib.optionals (!(cudaPackages ? cuda_profiler_api)) [
+      cuda_nvprof # cuda_profiler_api.h
+    ];
+  };
 in
 stdenv.mkDerivation {
  inherit pname version;
@ -50,6 +74,8 @@ stdenv.mkDerivation {
    pythonPackages.wheel
  ] ++ lib.optionals stdenv.cc.isClang [
    llvmPackages.openmp
+  ] ++ lib.optionals cudaSupport [
+    cudaJoined
  ];

  propagatedBuildInputs = lib.optionals pythonSupport [
@ -57,7 +83,7 @@ stdenv.mkDerivation {
  ];

  nativeBuildInputs = [ cmake ] ++ lib.optionals cudaSupport [
-    cudatoolkit
+    cudaPackages.cuda_nvcc
    addOpenGLRunpath
  ] ++ lib.optionals pythonSupport [
    pythonPackages.python
@ -72,7 +98,8 @@ stdenv.mkDerivation {
    "-DFAISS_ENABLE_PYTHON=${if pythonSupport then "ON" else "OFF"}"
    "-DFAISS_OPT_LEVEL=${optLevel}"
  ] ++ lib.optionals cudaSupport [
-    "-DCMAKE_CUDA_ARCHITECTURES=${lib.concatStringsSep ";" cudaCapabilities}"
+    "-DCMAKE_CUDA_ARCHITECTURES=${builtins.concatStringsSep ";" (map dropDot cudaCapabilities)}"
+    "-DCUDAToolkit_INCLUDE_DIR=${cudaJoined}/include"
  ];


@ -101,6 +128,11 @@ stdenv.mkDerivation {
    addOpenGLRunpath $demos/bin/*
  '';

+  # Need buildPythonPackage for this one
+  # pythonCheckImports = [
+  #   "faiss"
+  # ];
+
  passthru = {
    inherit cudaSupport cudaPackages pythonSupport;

--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@ -10316,6 +10316,17 @@ with pkgs;

  nvfetcher = haskell.lib.compose.justStaticExecutables haskellPackages.nvfetcher;

+  nvidia-thrust = callPackage ../development/libraries/nvidia-thrust { };
+
+  nvidia-thrust-intel = callPackage ../development/libraries/nvidia-thrust {
+    hostSystem = "TBB";
+    deviceSystem = if config.cudaSupport or false then "CUDA" else "TBB";
+  };
+
+  nvidia-thrust-cuda = callPackage ../development/libraries/nvidia-thrust {
+    deviceSystem = "CUDA";
+  };
+
  miller = callPackage ../tools/text/miller { };

  milu = callPackage ../applications/misc/milu { };
@ -37814,6 +37825,11 @@ with pkgs;
    swig = swig4;
  };

+  faissWithCuda = faiss.override {
+    cudaSupport = true;
+    nvidia-thrust = nvidia-thrust-cuda;
+  };
+
  fityk = callPackage ../applications/science/misc/fityk { };

  galario = callPackage ../development/libraries/galario { };