cudaPackages: generalize and refactor setup hook

This PR refactor CUDA setup hooks, and in particular
autoAddOpenGLRunpath and autoAddCudaCompatRunpathHook, that were using a
lot of code in common (in fact, I introduced the latter by copy pasting
most of the bash script of the former). This is not satisfying for
maintenance, as a recent patch showed, because we need to duplicate
changes to both hooks.

This commit abstract the common part in a single shell script that
applies a generic patch action to every elf file in the output. For
autoAddOpenGLRunpath the action is just addOpenGLRunpath (now
addDriverRunpath), and is few line function for
autoAddCudaCompatRunpathHook.

Doing so, we also takes the occasion to use the newer addDriverRunpath
instead of the previous addOpenGLRunpath, and rename the CUDA hook to
reflect that as well.

Co-Authored-By: Connor Baker <connor.baker@tweag.io>
This commit is contained in:
Yann Hamdaoui 2024-01-17 16:32:24 +01:00
parent 6a9c892aec
commit 63746cac08
No known key found for this signature in database
GPG key ID: 96305DE11214ABE6
28 changed files with 151 additions and 95 deletions

View file

@ -144,4 +144,4 @@ All new projects should use the CUDA redistributables available in [`cudaPackage
| Find libraries | `configurePhase` | Missing dependency on a `dev` output | Add the missing dependency | The `dev` output typically contain CMake configuration files |
| Find libraries | `buildPhase` or `patchelf` | Missing dependency on a `lib` or `static` output | Add the missing dependency | The `lib` or `static` output typically contain the libraries |
In the scenario you are unable to run the resulting binary: this is arguably the most complicated as it could be any combination of the previous reasons. This type of failure typically occurs when a library attempts to load or open a library it depends on that it does not declare in its `DT_NEEDED` section. As a first step, ensure that dependencies are patched with [`cudaPackages.autoAddOpenGLRunpath`](https://search.nixos.org/packages?channel=unstable&type=packages&query=cudaPackages.autoAddOpenGLRunpath). Failing that, try running the application with [`nixGL`](https://github.com/guibou/nixGL) or a similar wrapper tool. If that works, it likely means that the application is attempting to load a library that is not in the `RPATH` or `RUNPATH` of the binary.
In the scenario you are unable to run the resulting binary: this is arguably the most complicated as it could be any combination of the previous reasons. This type of failure typically occurs when a library attempts to load or open a library it depends on that it does not declare in its `DT_NEEDED` section. As a first step, ensure that dependencies are patched with [`cudaPackages.autoAddDriverRunpath`](https://search.nixos.org/packages?channel=unstable&type=packages&query=cudaPackages.autoAddDriverRunpath). Failing that, try running the application with [`nixGL`](https://github.com/guibou/nixGL) or a similar wrapper tool. If that works, it likely means that the application is attempting to load a library that is not in the `RPATH` or `RUNPATH` of the binary.

View file

@ -37,7 +37,7 @@ mkDerivation rec {
nativeBuildInputs = [
cmake
] ++ lib.optionals cudaSupport [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
meta = with lib; {

View file

@ -63,7 +63,7 @@ stdenv.mkDerivation (finalAttrs: {
pkg-config
# Although not always needed, it is needed if cmakeFlags include
# GPU_API=cuda, and it doesn't users that don't enable the GPU package.
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
passthru = {

View file

@ -86,10 +86,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
nativeBuildInputs = [ cmake ninja pkg-config git ]
++ optionals cudaSupport [
cudaPackages.cuda_nvcc
# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = optionals effectiveStdenv.isDarwin darwinBuildInputs

View file

@ -87,7 +87,7 @@ buildGoModule rec {
];
nativeBuildInputs = [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
makeWrapper
];

View file

@ -139,7 +139,7 @@ rustPlatform.buildRustPackage {
] ++ optionals enableCuda [
# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = [ openssl ]

View file

@ -1,5 +1,5 @@
{
autoAddOpenGLRunpathHook,
autoAddDriverRunpath,
backendStdenv,
cmake,
cudatoolkit,
@ -31,7 +31,7 @@ backendStdenv.mkDerivation (
nativeBuildInputs =
[
autoAddOpenGLRunpathHook
autoAddDriverRunpath
pkg-config
]
# CMake has to run as a native, build-time dependency for libNVVM samples.

View file

@ -2,7 +2,7 @@
cudaVersion,
runPatches ? [],
autoPatchelfHook,
autoAddOpenGLRunpathHook,
autoAddDriverRunpath,
addOpenGLRunpath,
alsa-lib,
curlMinimal,
@ -76,7 +76,7 @@ backendStdenv.mkDerivation rec {
rsync
addOpenGLRunpath
autoPatchelfHook
autoAddOpenGLRunpathHook
autoAddDriverRunpath
markForCudatoolkitRootHook
]
++ lib.optionals (lib.versionOlder version "11") [libsForQt5.wrapQtAppsHook]

View file

@ -1,7 +1,7 @@
{
# General callPackage-supplied arguments
autoAddOpenGLRunpathHook,
autoAddCudaCompatRunpathHook,
autoAddDriverRunpath,
autoAddCudaCompatRunpath,
autoPatchelfHook,
backendStdenv,
fetchurl,
@ -193,16 +193,16 @@ backendStdenv.mkDerivation (
# in typically /lib/opengl-driver by adding that
# directory to the rpath of all ELF binaries.
# Check e.g. with `patchelf --print-rpath path/to/my/binary
autoAddOpenGLRunpathHook
autoAddDriverRunpath
markForCudatoolkitRootHook
]
# autoAddCudaCompatRunpathHook depends on cuda_compat and would cause
# autoAddCudaCompatRunpath depends on cuda_compat and would cause
# infinite recursion if applied to `cuda_compat` itself (beside the fact
# that it doesn't make sense in the first place)
++ lib.optionals (pname != "cuda_compat" && flags.isJetsonBuild) [
# autoAddCudaCompatRunpathHook must appear AFTER autoAddOpenGLRunpathHook.
# autoAddCudaCompatRunpath must appear AFTER autoAddDriverRunpath.
# See its documentation in ./setup-hooks/extension.nix.
autoAddCudaCompatRunpathHook
autoAddCudaCompatRunpath
];
buildInputs =

View file

@ -12,7 +12,7 @@
}:
let
inherit (cudaPackages)
autoAddOpenGLRunpathHook
autoAddDriverRunpath
backendStdenv
cuda_cccl
cuda_cudart
@ -44,7 +44,7 @@ backendStdenv.mkDerivation (
nativeBuildInputs =
[
which
autoAddOpenGLRunpathHook
autoAddDriverRunpath
python3
]
++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]

View file

@ -5,7 +5,7 @@
}:
let
inherit (cudaPackages)
autoAddOpenGLRunpathHook
autoAddDriverRunpath
backendStdenv
cuda_cccl
cuda_cudart
@ -29,7 +29,7 @@ backendStdenv.mkDerivation {
nativeBuildInputs =
[
cmake
autoAddOpenGLRunpathHook
autoAddDriverRunpath
]
++ lib.optionals (lib.versionOlder cudaVersion "11.4") [cudatoolkit]
++ lib.optionals (lib.versionAtLeast cudaVersion "11.4") [cuda_nvcc];

View file

@ -3,25 +3,25 @@
# coming from the cuda_compat package by adding it to the RUNPATH.
echo "Sourcing auto-add-cuda-compat-runpath-hook"
elfHasDynamicSection() {
patchelf --print-rpath "$1" >& /dev/null
addCudaCompatRunpath() {
local libPath
local origRpath
if [[ $# -eq 0 ]]; then
echo "addCudaCompatRunpath: no library path provided" >&2
exit 1
elif [[ $# -gt 1 ]]; then
echo "addCudaCompatRunpath: too many arguments" >&2
exit 1
elif [[ "$1" == "" ]]; then
echo "addCudaCompatRunpath: empty library path" >&2
exit 1
else
libPath="$1"
fi
origRpath="$(patchelf --print-rpath "$libPath")"
patchelf --set-rpath "@libcudaPath@:$origRpath" "$libPath"
}
autoAddCudaCompatRunpathPhase() (
local outputPaths
mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
find "${outputPaths[@]}" -type f -print0 | while IFS= read -rd "" f; do
if isELF "$f"; then
# patchelf returns an error on statically linked ELF files
if elfHasDynamicSection "$f" ; then
echo "autoAddCudaCompatRunpathHook: patching $f"
local origRpath="$(patchelf --print-rpath "$f")"
patchelf --set-rpath "@libcudaPath@:$origRpath" "$f"
elif (( "${NIX_DEBUG:-0}" >= 1 )) ; then
echo "autoAddCudaCompatRunpathHook: skipping a statically-linked ELF file $f"
fi
fi
done
)
postFixupHooks+=(autoAddCudaCompatRunpathPhase)
postFixupHooks+=("autoFixElfFiles addCudaCompatRunpath")

View file

@ -0,0 +1,8 @@
# shellcheck shell=bash
# Run addDriverRunpath on all dynamically linked ELF files
echo "Sourcing auto-add-driver-runpath-hook"
if [ -z "${dontUseAutoAddDriverRunpath-}" ]; then
echo "Using autoAddDriverRunpath"
postFixupHooks+=("autoFixElfFiles addDriverRunpath")
fi

View file

@ -1,28 +0,0 @@
# shellcheck shell=bash
# Run addOpenGLRunpath on all dynamically linked, ELF files
echo "Sourcing auto-add-opengl-runpath-hook"
elfHasDynamicSection() {
patchelf --print-rpath "$1" >& /dev/null
}
autoAddOpenGLRunpathPhase() (
local outputPaths
mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
find "${outputPaths[@]}" -type f -print0 | while IFS= read -rd "" f; do
if isELF "$f"; then
# patchelf returns an error on statically linked ELF files
if elfHasDynamicSection "$f" ; then
echo "autoAddOpenGLRunpathHook: patching $f"
addOpenGLRunpath "$f"
elif (( "${NIX_DEBUG:-0}" >= 1 )) ; then
echo "autoAddOpenGLRunpathHook: skipping a statically-linked ELF file $f"
fi
fi
done
)
if [ -z "${dontUseAutoAddOpenGLRunpath-}" ]; then
echo "Using autoAddOpenGLRunpathPhase"
postFixupHooks+=(autoAddOpenGLRunpathPhase)
fi

View file

@ -0,0 +1,64 @@
# shellcheck shell=bash
# List all dynamically linked ELF files in the outputs and apply a generic fix
# action provided as a parameter (currently used to add the CUDA or the
# cuda_compat driver to the runpath of binaries)
echo "Sourcing cuda/fix-elf-files.sh"
# Returns the exit code of patchelf --print-rpath.
# A return code of 0 (success) means the ELF file has a dynamic section, while
# a non-zero return code means the ELF file is statically linked (or is not an
# ELF file).
elfHasDynamicSection() {
local libPath
if [[ $# -eq 0 ]]; then
echo "elfHasDynamicSection: no library path provided" >&2
exit 1
elif [[ $# -gt 1 ]]; then
echo "elfHasDynamicSection: too many arguments" >&2
exit 1
elif [[ "$1" == "" ]]; then
echo "elfHasDynamicSection: empty library path" >&2
exit 1
else
libPath="$1"
shift 1
fi
patchelf --print-rpath "$libPath" >& /dev/null
return $?
}
# Run a fix action on all dynamically linked ELF files in the outputs.
autoFixElfFiles() {
local fixAction
local outputPaths
if [[ $# -eq 0 ]]; then
echo "autoFixElfFiles: no fix action provided" >&2
exit 1
elif [[ $# -gt 1 ]]; then
echo "autoFixElfFiles: too many arguments" >&2
exit 1
elif [[ "$1" == "" ]]; then
echo "autoFixElfFiles: empty fix action" >&2
exit 1
else
fixAction="$1"
fi
mapfile -t outputPaths < <(for o in $(getAllOutputNames); do echo "${!o}"; done)
find "${outputPaths[@]}" -type f -print0 | while IFS= read -rd "" f; do
if ! isELF "$f"; then
continue
elif elfHasDynamicSection "$f"; then
# patchelf returns an error on statically linked ELF files, and in
# practice fixing actions all involve patchelf
echo "autoFixElfFiles: using $fixAction to fix $f" >&2
$fixAction "$f"
elif (( "${NIX_DEBUG:-0}" >= 1 )); then
echo "autoFixElfFiles: skipping a statically-linked ELF file $f"
fi
done
}

View file

@ -1,4 +1,19 @@
final: _: {
# Helper hook used in both autoAddCudaCompatRunpath and
# autoAddDriverRunpath that applies a generic patching action to all elf
# files with a dynamic linking section.
autoFixElfFiles =
final.callPackage
(
{makeSetupHook}:
makeSetupHook
{
name = "auto-fix-elf-files";
}
./auto-fix-elf-files.sh
)
{};
# Internal hook, used by cudatoolkit and cuda redist packages
# to accommodate automatic CUDAToolkit_ROOT construction
markForCudatoolkitRootHook =
@ -32,31 +47,33 @@ final: _: {
{}
);
autoAddOpenGLRunpathHook =
autoAddDriverRunpath =
final.callPackage
(
{addOpenGLRunpath, makeSetupHook}:
{addDriverRunpath, autoFixElfFiles, makeSetupHook}:
makeSetupHook
{
name = "auto-add-opengl-runpath-hook";
propagatedBuildInputs = [addOpenGLRunpath];
propagatedBuildInputs = [addDriverRunpath autoFixElfFiles];
}
./auto-add-opengl-runpath-hook.sh
./auto-add-driver-runpath-hook.sh
)
{};
# autoAddCudaCompatRunpathHook hook must be added AFTER `setupCudaHook`. Both
# autoAddCudaCompatRunpath hook must be added AFTER `setupCudaHook`. Both
# hooks prepend a path with `libcuda.so` to the `DT_RUNPATH` section of
# patched elf files, but `cuda_compat` path must take precedence (otherwise,
# it doesn't have any effect) and thus appear first. Meaning this hook must be
# executed last.
autoAddCudaCompatRunpathHook =
autoAddCudaCompatRunpath =
final.callPackage
(
{makeSetupHook, cuda_compat ? null }:
{makeSetupHook, autoFixElfFiles, cuda_compat ? null }:
makeSetupHook
{
name = "auto-add-cuda-compat-runpath-hook";
propagatedBuildInputs = [autoFixElfFiles];
substitutions = {
# Hotfix Ofborg evaluation
libcudaPath = if final.flags.isJetsonBuild then "${cuda_compat}/compat" else null;

View file

@ -36,7 +36,7 @@ stdenv.mkDerivation rec {
]
++ lib.optionals enableCuda [
cudaPackages.cuda_nvcc
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = [

View file

@ -57,7 +57,7 @@ stdenv.mkDerivation rec {
nativeBuildInputs = [ cmake ]
++ lib.optionals stdenv.isDarwin [ llvmPackages.openmp ]
++ lib.optionals cudaSupport [ cudaPackages.autoAddOpenGLRunpathHook ]
++ lib.optionals cudaSupport [ cudaPackages.autoAddDriverRunpath ]
++ lib.optionals rLibrary [ R ];
buildInputs = [ gtest ] ++ lib.optional cudaSupport cudaPackages.cudatoolkit

View file

@ -23,7 +23,7 @@
}:
let
inherit (cudaPackagesGoogle) autoAddOpenGLRunpathHook cudaVersion;
inherit (cudaPackagesGoogle) autoAddDriverRunpath cudaVersion;
version = "0.4.24";
@ -180,7 +180,7 @@ buildPythonPackage {
# Prebuilt wheels are dynamically linked against things that nix can't find.
# Run `autoPatchelfHook` to automagically fix them.
nativeBuildInputs = lib.optionals stdenv.isLinux [ autoPatchelfHook ]
++ lib.optionals cudaSupport [ autoAddOpenGLRunpathHook ];
++ lib.optionals cudaSupport [ autoAddDriverRunpath ];
# Dynamic link dependencies
buildInputs = [ stdenv.cc.cc.lib ];

View file

@ -51,7 +51,7 @@
}@inputs:
let
inherit (cudaPackagesGoogle) autoAddOpenGLRunpathHook cudaFlags cudaVersion cudnn nccl;
inherit (cudaPackagesGoogle) autoAddDriverRunpath cudaFlags cudaVersion cudnn nccl;
pname = "jaxlib";
version = "0.4.24";
@ -420,7 +420,7 @@ buildPythonPackage {
done
'';
nativeBuildInputs = lib.optionals cudaSupport [ autoAddOpenGLRunpathHook ];
nativeBuildInputs = lib.optionals cudaSupport [ autoAddDriverRunpath ];
propagatedBuildInputs = [
absl-py

View file

@ -22,7 +22,7 @@ buildPythonPackage rec {
nativeBuildInputs = [
unzip
autoPatchelfHook
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
preUnpack = ''

View file

@ -40,7 +40,7 @@ in buildPythonPackage {
nativeBuildInputs = lib.optionals stdenv.isLinux [
addOpenGLRunpath
autoPatchelfHook
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = lib.optionals stdenv.isLinux (with cudaPackages; [

View file

@ -338,7 +338,7 @@ in buildPythonPackage rec {
pythonRelaxDepsHook
removeReferencesTo
] ++ lib.optionals cudaSupport (with cudaPackages; [
autoAddOpenGLRunpathHook
autoAddDriverRunpath
cuda_nvcc
])
++ lib.optionals rocmSupport [ rocmtoolkit_joined ];

View file

@ -105,10 +105,10 @@ in gcc11Stdenv.mkDerivation rec {
strictDeps = true;
nativeBuildInputs = [
# autoAddOpenGLRunpathHook does not actually depend on or incur any dependency
# autoAddDriverRunpath does not actually depend on or incur any dependency
# of cudaPackages. It merely adds an impure, non-Nix PATH to the RPATHs of
# executables that need to use cuda at runtime.
cudaPackages_12.autoAddOpenGLRunpathHook
cudaPackages_12.autoAddDriverRunpath
cmake
git

View file

@ -48,7 +48,7 @@ buildGoModule rec {
vendorHash = "sha256-Fjvx15e/psxoqoS6c6GhiQfe7g2aI40EmPR26xLhrzg=";
nativeBuildInputs = [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
# Tests try to interact with running DCGM service.

View file

@ -80,7 +80,7 @@ stdenv.mkDerivation rec {
autoPatchelfHook
makeWrapper
] ++ lib.optionals cudaSupport [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = [

View file

@ -45,9 +45,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
] ++ lib.optionals cudaSupport ( with cudaPackages ;[
cuda_nvcc
# TODO: Replace with autoAddDriverRunpath
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
autoAddOpenGLRunpathHook
autoAddDriverRunpath
]);
buildInputs = [

View file

@ -23,7 +23,7 @@ stdenv.mkDerivation rec {
};
nativeBuildInputs = [ cmake ] ++ lib.optionals cudaSupport [
cudaPackages.autoAddOpenGLRunpathHook
cudaPackages.autoAddDriverRunpath
];
buildInputs = lib.optionals stdenv.isDarwin [