Merge pull request #259266 from markuskowa/upd-libflame

This commit is contained in:
Artturi 2023-10-06 22:18:58 +03:00 committed by GitHub
commit d17b5e1c1b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 78 deletions

View file

@ -0,0 +1,23 @@
{ lib, stdenv, fetchFromGitHub, cmake } :
stdenv.mkDerivation rec {
pname = "aocl-utils";
version = "4.1";
src = fetchFromGitHub {
owner = "amd";
repo = "aocl-utils";
rev = version;
hash = "sha256-7Vc3kE+YfqIt6VfvSamsVQRemolzs1sNJUVUZFKk/O8=";
};
nativeBuildInputs = [ cmake ];
meta = with lib; {
description = "Interface to all AMD AOCL libraries to access CPU features";
homepage = "https://github.com/amd/aocl-utils";
license = licenses.bsd3;
platforms = [ "x86_64-linux" ];
maintainers = [ maintainers.markuskowa ];
};
}

View file

@ -1,34 +0,0 @@
diff --git a/Makefile b/Makefile
index 5549ce30..ac2ee51e 100644
--- a/Makefile
+++ b/Makefile
@@ -583,14 +583,14 @@ endif
# --- Shared library linker rules ---
-$(LIBFLAME_SO_PATH): $(MK_ALL_FLAMEC_OBJS)
+$(LIBFLAME_SO_PATH): $(MK_ALL_FLAMEC_OBJS) $(LAPACKE_A_PATH)
ifeq ($(ENABLE_VERBOSE),yes)
ifeq ($(FLA_ENABLE_MAX_ARG_LIST_HACK),yes)
$(CAT) $(AR_OBJ_LIST_FILE) | xargs -n$(AR_CHUNK_SIZE) $(AR) $(ARFLAGS) $(LIBFLAME_A)
ifeq ($(OS_NAME),Darwin)
- $(LINKER) $(SOFLAGS) -o $@ -Wl,-force_load,$(LIBFLAME_A) $(LDFLAGS)
+ $(LINKER) $(SOFLAGS) -o $@ -Wl,-force_load,$(LIBFLAME_A),$(LAPACKE_A_PATH) $(LDFLAGS)
else
- $(LINKER) $(SOFLAGS) -o $@ -Wl,--whole-archive,$(LIBFLAME_A),--no-whole-archive $(LDFLAGS)
+ $(LINKER) $(SOFLAGS) -o $@ -Wl,--whole-archive,$(LIBFLAME_A),$(LAPACKE_A_PATH)--no-whole-archive $(LDFLAGS)
endif
else
# NOTE: Can't use $^ automatic variable as long as $(AR_OBJ_LIST_FILE) is in
@@ -602,9 +602,9 @@ else
ifeq ($(FLA_ENABLE_MAX_ARG_LIST_HACK),yes)
@$(CAT) $(AR_OBJ_LIST_FILE) | xargs -n$(AR_CHUNK_SIZE) $(AR) $(ARFLAGS) $(LIBFLAME_A)
ifeq ($(OS_NAME),Darwin)
- @$(LINKER) $(SOFLAGS) -o $@ -Wl,-force_load,$(LIBFLAME_A) $(LDFLAGS)
+ @$(LINKER) $(SOFLAGS) -o $@ -Wl,-force_load,$(LIBFLAME_A),$(LAPACKE_A_PATH) $(LDFLAGS)
else
- @$(LINKER) $(SOFLAGS) -o $@ -Wl,--whole-archive,$(LIBFLAME_A),--no-whole-archive $(LDFLAGS)
+ @$(LINKER) $(SOFLAGS) -o $@ -Wl,--whole-archive,$(LIBFLAME_A),$(LAPACKE_A_PATH),--no-whole-archive $(LDFLAGS)
endif
else
# NOTE: Can't use $^ automatic variable as long as $(AR_OBJ_LIST_FILE) is in

View file

@ -1,78 +1,62 @@
{ lib
, stdenv
, fetchFromGitHub
, cmake
, gfortran
, python3
, amd-blis
, aocl-utils
, withOpenMP ? true
, blas64 ? false
, withAMDOpt ? false
}:
# right now only LP64 is supported
assert !blas64;
stdenv.mkDerivation rec {
pname = "amd-libflame";
version = "3.0";
version = "4.1";
src = fetchFromGitHub {
owner = "amd";
repo = "libflame";
rev = version;
hash = "sha256-jESae5NqANw90RBbIHH2oGEq5/mudc4IONv50P/AeQ0=";
hash = "sha256-SZk11oOAnvn1vb7ucX6U9b0YtAJNxl3tQu4ExHpwwoo=";
};
patches = [
# The LAPACKE interface is compiled as a separate static library,
# we want the main dynamic library to provide LAPACKE symbols.
# This patch adds lapacke.a to the shared library as well.
./add-lapacke.diff
];
passthru = { inherit blas64; };
nativeBuildInputs = [ gfortran python3 ];
buildInputs = [ amd-blis ];
configureFlags = [
# Build a dynamic library with a LAPACK interface.
"--disable-static-build"
"--enable-dynamic-build"
"--enable-lapack2flame"
# Use C BLAS interface.
"--enable-cblas-interfaces"
# Avoid overloading maximum number of arguments.
"--enable-max-arg-list-hack"
# libflame by default leaves BLAS symbols unresolved and leaves it
# up to the application to explicitly link to a BLAS. This is
# problematic for us, since then the BLAS library becomes an
# implicit dependency. Moreover, since the point of the AMD forks
# is to optimized for recent AMD CPUs, link against AMD BLIS.
"LDFLAGS=-lcblas"
]
++ lib.optionals withOpenMP [ "--enable-multithreading=openmp" ];
enableParallelBuilding = true;
postPatch = ''
patchShebangs build
# Enforce reproducible build compiler flags
substituteInPlace CMakeLists.txt --replace '-mtune=native' ""
'';
passthru = { inherit blas64; };
nativeBuildInputs = [ cmake gfortran python3 ];
buildInputs = [ amd-blis aocl-utils ];
cmakeFlags = [
"-DLIBAOCLUTILS_LIBRARY_PATH=${lib.getLib aocl-utils}/lib"
"-DLIBAOCLUTILS_INCLUDE_PATH=${lib.getDev aocl-utils}/include"
"-DENABLE_BUILTIN_LAPACK2FLAME=ON"
"-DENABLE_CBLAS_INTERFACES=ON"
"-DENABLE_EXT_LAPACK_INTERFACE=ON"
]
++ lib.optional (!withOpenMP) "ENABLE_MULTITHREADING=OFF"
++ lib.optional blas64 "ENABLE_ILP64=ON"
++ lib.optional withAMDOpt "ENABLE_AMD_OPT=ON";
postInstall = ''
ln -s $out/lib/libflame.so.${version} $out/lib/liblapack.so.3
ln -s $out/lib/libflame.so.${version} $out/lib/liblapacke.so.3
ln -s $out/lib/libflame.so $out/lib/liblapack.so.3
ln -s $out/lib/libflame.so $out/lib/liblapacke.so.3
'';
meta = with lib; {
description = "LAPACK-compatible linear algebra library optimized for AMD CPUs";
homepage = "https://developer.amd.com/amd-aocl/blas-library/";
license = licenses.bsd3;
maintainers = with maintainers; [ ];
maintainers = [ maintainers.markuskowa ];
platforms = [ "x86_64-linux" ];
};
}