Merge pull request #281576 from yannham/refactor/cuda-setup-hooks-refactor

cudaPackages: generalize and refactor setup hooks
This commit is contained in:
Someone 2024-03-19 20:06:18 +00:00 committed by GitHub
commit e7797267a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 154 additions and 95 deletions

View file

@ -144,4 +144,4 @@ All new projects should use the CUDA redistributables available in [`cudaPackage
| Find libraries | `configurePhase` | Missing dependency on a `dev` output | Add the missing dependency | The `dev` output typically contain CMake configuration files |
| Find libraries | `buildPhase` or `patchelf` | Missing dependency on a `lib` or `static` output | Add the missing dependency | The `lib` or `static` output typically contain the libraries |
In the scenario you are unable to run the resulting binary: this is arguably the most complicated as it could be any combination of the previous reasons. This type of failure typically occurs when a library attempts to load or open a library it depends on that it does not declare in its `DT_NEEDED` section. As a first step, ensure that dependencies are patched with [`cudaPackages.autoAddOpenGLRunpath`](https://search.nixos.org/packages?channel=unstable&type=packages&query=cudaPackages.autoAddOpenGLRunpath). Failing that, try running the application with [`nixGL`](https://github.com/guibou/nixGL) or a similar wrapper tool. If that works, it likely means that the application is attempting to load a library that is not in the `RPATH` or `RUNPATH` of the binary.
In the scenario you are unable to run the resulting binary: this is arguably the most complicated as it could be any combination of the previous reasons. This type of failure typically occurs when a library attempts to load or open a library it depends on that it does not declare in its `DT_NEEDED` section. As a first step, ensure that dependencies are patched with [`cudaPackages.autoAddDriverRunpath`](https://search.nixos.org/packages?channel=unstable&type=packages&query=cudaPackages.autoAddDriverRunpath). Failing that, try running the application with [`nixGL`](https://github.com/guibou/nixGL) or a similar wrapper tool. If that works, it likely means that the application is attempting to load a library that is not in the `RPATH` or `RUNPATH` of the binary.

View file

@ -37,7 +37,7 @@ mkDerivation rec {
nativeBuildInputs = [
cmake
] ++ lib.optionals cudaSupport [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
meta = with lib; {

View file

@ -63,7 +63,7 @@ stdenv.mkDerivation (finalAttrs: {
pkg-config
# Although not always needed, it is needed if cmakeFlags include
# GPU_API=cuda, and it doesn't users that don't enable the GPU package.
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
passthru = {

View file

@ -86,10 +86,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
nativeBuildInputs = [ cmake ninja pkg-config git ]
++ optionals cudaSupport [
cudaPackages.cuda_nvcc
# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = optionals effectiveStdenv.isDarwin darwinBuildInputs

View file

@ -87,7 +87,7 @@ buildGoModule rec {
];
nativeBuildInputs = [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
makeWrapper
];

View file

@ -139,7 +139,7 @@ rustPlatform.buildRustPackage {
] ++ optionals enableCuda [
# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = [ openssl ]

View file

@ -1,5 +1,5 @@
{
autoAddOpenGLRunpathHook,
autoAddDriverRunpath,
backendStdenv,
cmake,
cudatoolkit,
@ -31,7 +31,7 @@ backendStdenv.mkDerivation (
nativeBuildInputs =
[
autoAddOpenGLRunpathHook
autoAddDriverRunpath
pkg-config
]
# CMake has to run as a native, build-time dependency for libNVVM samples.

View file

@ -2,7 +2,7 @@
cudaVersion,
runPatches ? [],
autoPatchelfHook,
autoAddOpenGLRunpathHook,
autoAddDriverRunpath,
addOpenGLRunpath,
alsa-lib,
curlMinimal,
@ -76,7 +76,7 @@ backendStdenv.mkDerivation rec {
rsync
addOpenGLRunpath
autoPatchelfHook
autoAddOpenGLRunpathHook
autoAddDriverRunpath
markForCudatoolkitRootHook
]
++ lib.optionals (lib.versionOlder version "11") [libsForQt5.wrapQtAppsHook]

View file

@ -1,7 +1,7 @@
{
# General callPackage-supplied arguments
autoAddOpenGLRunpathHook,
autoAddCudaCompatRunpathHook,
autoAddDriverRunpath,
autoAddCudaCompatRunpath,
autoPatchelfHook,
backendStdenv,
fetchurl,
@ -193,16 +193,16 @@ backendStdenv.mkDerivation (
# in typically /lib/opengl-driver by adding that
# directory to the rpath of all ELF binaries.
# Check e.g. with `patchelf --print-rpath path/to/my/binary
autoAddOpenGLRunpathHook
autoAddDriverRunpath
markForCudatoolkitRootHook
]
# autoAddCudaCompatRunpathHook depends on cuda_compat and would cause
# autoAddCudaCompatRunpath depends on cuda_compat and would cause
# infinite recursion if applied to `cuda_compat` itself (beside the fact
# that it doesn't make sense in the first place)
++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [
# autoAddCudaCompatRunpathHook must appear AFTER autoAddOpenGLRunpathHook.
# autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath.
# See its documentation in ./setup-hooks/extension.nix.
autoAddCudaCompatRunpathHook
autoAddCudaCompatRunpath
];
buildInputs =

View file

@ -12,7 +12,7 @@
}:
let
inherit (cudaPackages)
autoAddOpenGLRunpathHook
autoAddDriverRunpath
backendStdenv
cuda_cccl
cuda_cudart
@ -44,7 +44,7 @@ backendStdenv.mkDerivation (
nativeBuildInputs =
[
which
autoAddOpenGLRunpathHook
autoAddDriverRunpath
python3
]
++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]

View file

@ -5,7 +5,7 @@
}:
let
inherit (cudaPackages)
autoAddOpenGLRunpathHook
autoAddDriverRunpath
backendStdenv
cuda_cccl
cuda_cudart
@ -29,7 +29,7 @@ backendStdenv.mkDerivation {
nativeBuildInputs =
[
cmake
autoAddOpenGLRunpathHook
autoAddDriverRunpath
]
++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];

View file

@ -3,25 +3,25 @@
# coming from the cuda_compat package by adding it to the RUNPATH.
echo "Sourcing auto-add-cuda-compat-runpath-hook"
elfHasDynamicSection() {
patchelf --print-rpath "$1" >& /dev/null
addCudaCompatRunpath() {
local libPath
local origRpath
if [[ $# -eq 0 ]]; then
echo "addCudaCompatRunpath: no library path provided" >&2
exit 1
elif [[ $# -gt 1 ]]; then
echo "addCudaCompatRunpath: too many arguments" >&2
exit 1
elif [[ "$1" == "" ]]; then
echo "addCudaCompatRunpath: empty library path" >&2
exit 1
else
libPath="$1"
fi
origRpath="$(patchelf --print-rpath "$libPath")"
patchelf --set-rpath "@libcudaPath@:$origRpath" "$libPath"
}
autoAddCudaCompatRunpathPhase() (
local outputPaths
mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
find "${outputPaths[@]}" -type f -print0 | while IFS= read -rd "" f; do
if isELF "$f"; then
# patchelf returns an error on statically linked ELF files
if elfHasDynamicSection "$f" ; then
echo "autoAddCudaCompatRunpathHook: patching $f"
local origRpath="$(patchelf --print-rpath "$f")"
patchelf --set-rpath "@libcudaPath@:$origRpath" "$f"
elif (( "${NIX_DEBUG:-0}" >= 1 )) ; then
echo "autoAddCudaCompatRunpathHook: skipping a statically-linked ELF file $f"
fi
fi
done
)
postFixupHooks+=(autoAddCudaCompatRunpathPhase)
postFixupHooks+=("autoFixElfFiles addCudaCompatRunpath")

View file

@ -0,0 +1,8 @@
# shellcheck shell=bash
# Run addDriverRunpath on all dynamically linked ELF files
echo "Sourcing auto-add-driver-runpath-hook"
if [ -z "${dontUseAutoAddDriverRunpath-}" ]; then
echo "Using autoAddDriverRunpath"
postFixupHooks+=("autoFixElfFiles addDriverRunpath")
fi

View file

@ -1,28 +0,0 @@
# shellcheck shell=bash
# Run addOpenGLRunpath on all dynamically linked, ELF files
echo "Sourcing auto-add-opengl-runpath-hook"
elfHasDynamicSection() {
patchelf --print-rpath "$1" >& /dev/null
}
autoAddOpenGLRunpathPhase() (
local outputPaths
mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
find "${outputPaths[@]}" -type f -print0 | while IFS= read -rd "" f; do
if isELF "$f"; then
# patchelf returns an error on statically linked ELF files
if elfHasDynamicSection "$f" ; then
echo "autoAddOpenGLRunpathHook: patching $f"
addOpenGLRunpath "$f"
elif (( "${NIX_DEBUG:-0}" >= 1 )) ; then
echo "autoAddOpenGLRunpathHook: skipping a statically-linked ELF file $f"
fi
fi
done
)
if [ -z "${dontUseAutoAddOpenGLRunpath-}" ]; then
echo "Using autoAddOpenGLRunpathPhase"
postFixupHooks+=(autoAddOpenGLRunpathPhase)
fi

View file

@ -0,0 +1,64 @@
# shellcheck shell=bash
# List all dynamically linked ELF files in the outputs and apply a generic fix
# action provided as a parameter (currently used to add the CUDA or the
# cuda_compat driver to the runpath of binaries)
echo "Sourcing cuda/fix-elf-files.sh"
# Returns the exit code of patchelf --print-rpath.
# A return code of 0 (success) means the ELF file has a dynamic section, while
# a non-zero return code means the ELF file is statically linked (or is not an
# ELF file).
elfHasDynamicSection() {
local libPath
if [[ $# -eq 0 ]]; then
echo "elfHasDynamicSection: no library path provided" >&2
exit 1
elif [[ $# -gt 1 ]]; then
echo "elfHasDynamicSection: too many arguments" >&2
exit 1
elif [[ "$1" == "" ]]; then
echo "elfHasDynamicSection: empty library path" >&2
exit 1
else
libPath="$1"
shift 1
fi
patchelf --print-rpath "$libPath" >& /dev/null
return $?
}
# Run a fix action on all dynamically linked ELF files in the outputs.
autoFixElfFiles() {
local fixAction
local outputPaths
if [[ $# -eq 0 ]]; then
echo "autoFixElfFiles: no fix action provided" >&2
exit 1
elif [[ $# -gt 1 ]]; then
echo "autoFixElfFiles: too many arguments" >&2
exit 1
elif [[ "$1" == "" ]]; then
echo "autoFixElfFiles: empty fix action" >&2
exit 1
else
fixAction="$1"
fi
mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
find "${outputPaths[@]}" -type f -print0 | while IFS= read -rd "" f; do
if ! isELF "$f"; then
continue
elif elfHasDynamicSection "$f"; then
# patchelf returns an error on statically linked ELF files, and in
# practice fixing actions all involve patchelf
echo "autoFixElfFiles: using $fixAction to fix $f" >&2
$fixAction "$f"
elif (( "${NIX_DEBUG:-0}" >= 1 )); then
echo "autoFixElfFiles: skipping a statically-linked ELF file $f"
fi
done
}

View file

@ -1,4 +1,19 @@
final: _: {
# Helper hook used in both autoAddCudaCompatRunpath and
# autoAddDriverRunpath that applies a generic patching action to all elf
# files with a dynamic linking section.
autoFixElfFiles =
final.callPackage
(
{makeSetupHook}:
makeSetupHook
{
name = "auto-fix-elf-files";
}
./auto-fix-elf-files.sh
)
{};
# Internal hook, used by cudatoolkit and cuda redist packages
# to accommodate automatic CUDAToolkit_ROOT construction
markForCudatoolkitRootHook =
@ -32,31 +47,36 @@ final: _: {
{}
);
autoAddOpenGLRunpathHook =
autoAddDriverRunpath =
final.callPackage
(
{addOpenGLRunpath, makeSetupHook}:
{addDriverRunpath, autoFixElfFiles, makeSetupHook}:
makeSetupHook
{
name = "auto-add-opengl-runpath-hook";
propagatedBuildInputs = [addOpenGLRunpath];
propagatedBuildInputs = [addDriverRunpath autoFixElfFiles];
}
./auto-add-opengl-runpath-hook.sh
./auto-add-driver-runpath-hook.sh
)
{};
# autoAddCudaCompatRunpathHook hook must be added AFTER `setupCudaHook`. Both
# Deprecated: an alias kept for compatibility. Consider removing after 24.11
autoAddOpenGLRunpathHook = final.autoAddDriverRunpath;
# autoAddCudaCompatRunpath hook must be added AFTER `setupCudaHook`. Both
# hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of
# patched elf files, but `cuda_compat` path must take precedence (otherwise,
# it doesn't have any effect) and thus appear first. Meaning this hook must be
# executed last.
autoAddCudaCompatRunpathHook =
autoAddCudaCompatRunpath =
final.callPackage
(
{makeSetupHook, cuda_compat ? null }:
{makeSetupHook, autoFixElfFiles, cuda_compat ? null }:
makeSetupHook
{
name = "auto-add-cuda-compat-runpath-hook";
propagatedBuildInputs = [autoFixElfFiles];
substitutions = {
# Hotfix Ofborg evaluation
libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else null;

View file

@ -36,7 +36,7 @@ stdenv.mkDerivation rec {
]
++ lib.optionals enableCuda [
cudaPackages.cuda_nvcc
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = [

View file

@ -57,7 +57,7 @@ stdenv.mkDerivation rec {
nativeBuildInputs = [ cmake ]
++ lib.optionals stdenv.isDarwin [ llvmPackages.openmp ]
++ lib.optionals cudaSupport [ cudaPackages.autoAddOpenGLRunpathHook ]
++ lib.optionals cudaSupport [ cudaPackages.autoAddDriverRunpath ]
++ lib.optionals rLibrary [ R ];
buildInputs = [ gtest ] ++ lib.optional cudaSupport cudaPackages.cudatoolkit

View file

@ -23,7 +23,7 @@
}:
let
inherit (cudaPackagesGoogle) autoAddOpenGLRunpathHook cudaVersion;
inherit (cudaPackagesGoogle) autoAddDriverRunpath cudaVersion;
version = "0.4.24";
@ -180,7 +180,7 @@ buildPythonPackage {
# Prebuilt wheels are dynamically linked against things that nix can't find.
# Run `autoPatchelfHook` to automagically fix them.
nativeBuildInputs = lib.optionals stdenv.isLinux [ autoPatchelfHook ]
++ lib.optionals cudaSupport [ autoAddOpenGLRunpathHook ];
++ lib.optionals cudaSupport [ autoAddDriverRunpath ];
# Dynamic link dependencies
buildInputs = [ stdenv.cc.cc.lib ];

View file

@ -51,7 +51,7 @@
}@inputs:
let
inherit (cudaPackagesGoogle) autoAddOpenGLRunpathHook cudaFlags cudaVersion cudnn nccl;
inherit (cudaPackagesGoogle) autoAddDriverRunpath cudaFlags cudaVersion cudnn nccl;
pname = "jaxlib";
version = "0.4.24";
@ -420,7 +420,7 @@ buildPythonPackage {
done
'';
nativeBuildInputs = lib.optionals cudaSupport [ autoAddOpenGLRunpathHook ];
nativeBuildInputs = lib.optionals cudaSupport [ autoAddDriverRunpath ];
propagatedBuildInputs = [
absl-py

View file

@ -22,7 +22,7 @@ buildPythonPackage rec {
nativeBuildInputs = [
unzip
autoPatchelfHook
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
preUnpack = ''

View file

@ -40,7 +40,7 @@ in buildPythonPackage {
nativeBuildInputs = lib.optionals stdenv.isLinux [
addOpenGLRunpath
autoPatchelfHook
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = lib.optionals stdenv.isLinux (with cudaPackages; [

View file

@ -338,7 +338,7 @@ in buildPythonPackage rec {
pythonRelaxDepsHook
removeReferencesTo
] ++ lib.optionals cudaSupport (with cudaPackages; [
autoAddOpenGLRunpathHook
autoAddDriverRunpath
cuda_nvcc
])
++ lib.optionals rocmSupport [ rocmtoolkit_joined ];

View file

@ -105,10 +105,10 @@ in gcc11Stdenv.mkDerivation rec {
strictDeps = true;
nativeBuildInputs = [
# autoAddOpenGLRunpathHook does not actually depend on or incur any dependency
# autoAddDriverRunpath does not actually depend on or incur any dependency
# of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of
# executables that need to use cuda at runtime.
cudaPackages_12.autoAddOpenGLRunpathHook
cudaPackages_12.autoAddDriverRunpath
cmake
git

View file

@ -48,7 +48,7 @@ buildGoModule rec {
vendorHash = "sha256-Fjvx15e/psxoqoS6c6GhiQfe7g2aI40EmPR26xLhrzg=";
nativeBuildInputs = [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
# Tests try to interact with running DCGM service.

View file

@ -94,7 +94,7 @@ stdenv'.mkDerivation rec {
autoPatchelfHook
makeWrapper
] ++ lib.optionals cudaSupport [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = [

View file

@ -45,9 +45,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
] ++ lib.optionals cudaSupport ( with cudaPackages ;[
cuda_nvcc
# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
autoAddOpenGLRunpathHook
autoAddDriverRunpath
]);
buildInputs = [

View file

@ -23,7 +23,7 @@ stdenv.mkDerivation rec {
};
nativeBuildInputs = [ cmake ] ++ lib.optionals cudaSupport [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = lib.optionals stdenv.isDarwin [