diff --git a/pkgs/development/python-modules/autofaiss/default.nix b/pkgs/development/python-modules/autofaiss/default.nix new file mode 100644 index 000000000000..443a086c5f82 --- /dev/null +++ b/pkgs/development/python-modules/autofaiss/default.nix @@ -0,0 +1,60 @@ +{ buildPythonPackage +, embedding-reader +, faiss +, fetchFromGitHub +, fire +, fsspec +, lib +, numpy +, pyarrow +, pytestCheckHook +, pythonRelaxDepsHook +}: + +buildPythonPackage rec { + pname = "autofaiss"; + version = "2.15.3"; + + src = fetchFromGitHub { + owner = "criteo"; + repo = pname; + rev = "refs/tags/${version}"; + hash = "sha256-RJOOUMI4w1YPEjDKi0YkqTXU01AbVoPn2+Id6kdC5pA="; + }; + + nativeBuildInputs = [ pythonRelaxDepsHook ]; + + pythonRemoveDeps = [ + # The `dataclasses` packages is a python2-only backport, unnecessary in + # python3. + "dataclasses" + # We call it faiss, not faiss-cpu. + "faiss-cpu" + ]; + + pythonRelaxDeps = [ + # As of v2.15.3, autofaiss asks for pyarrow<8 but we have pyarrow v9.0.0 in + # nixpkgs at the time of writing (2022-12-15). + "pyarrow" + ]; + + propagatedBuildInputs = [ embedding-reader fsspec numpy faiss fire pyarrow ]; + + checkInputs = [ pytestCheckHook ]; + + disabledTests = [ + # Attempts to spin up a Spark cluster and talk to it which doesn't work in + # the Nix build environment. + "test_build_partitioned_indexes" + "test_index_correctness_in_distributed_mode_with_multiple_indices" + "test_index_correctness_in_distributed_mode" + "test_quantize_with_pyspark" + ]; + + meta = with lib; { + description = "Automatically create Faiss knn indices with the most optimal similarity search parameters"; + homepage = "https://github.com/criteo/autofaiss"; + license = licenses.asl20; + maintainers = with maintainers; [ samuela ]; + }; +} diff --git a/pkgs/development/python-modules/embedding-reader/default.nix b/pkgs/development/python-modules/embedding-reader/default.nix new file mode 100644 index 000000000000..2d4af5ca1f12 --- /dev/null +++ b/pkgs/development/python-modules/embedding-reader/default.nix @@ -0,0 +1,39 @@ +{ buildPythonPackage +, fetchFromGitHub +, fsspec +, lib +, numpy +, pandas +, pyarrow +, pytestCheckHook +, pythonRelaxDepsHook +}: + +buildPythonPackage rec { + pname = "embedding-reader"; + version = "1.5.0"; + + src = fetchFromGitHub { + owner = "rom1504"; + repo = pname; + rev = "refs/tags/${version}"; + hash = "sha256-uyeIcAW9O9PR4cqmifC6Lx+Hn6XPb1RH/ksmUWvbdtw="; + }; + + nativeBuildInputs = [ pythonRelaxDepsHook ]; + + pythonRelaxDeps = [ "pyarrow" ]; + + propagatedBuildInputs = [ fsspec numpy pandas pyarrow ]; + + checkInputs = [ pytestCheckHook ]; + + pythonImportsCheck = [ "embedding_reader" ]; + + meta = with lib; { + description = "Efficiently read embedding in streaming from any filesystem"; + homepage = "https://github.com/rom1504/embedding-reader"; + license = licenses.mit; + maintainers = with maintainers; [ samuela ]; + }; +} diff --git a/pkgs/top-level/python-packages.nix b/pkgs/top-level/python-packages.nix index 127bee350da6..405b9de1c9db 100644 --- a/pkgs/top-level/python-packages.nix +++ b/pkgs/top-level/python-packages.nix @@ -780,6 +780,8 @@ self: super: with self; { autocommand = callPackage ../development/python-modules/autocommand { }; + autofaiss = callPackage ../development/python-modules/autofaiss { }; + autograd = callPackage ../development/python-modules/autograd { }; autoit-ripper = callPackage ../development/python-modules/autoit-ripper { }; @@ -2935,6 +2937,8 @@ self: super: with self; { email-validator = callPackage ../development/python-modules/email-validator { }; + embedding-reader = callPackage ../development/python-modules/embedding-reader { }; + embrace = callPackage ../development/python-modules/embrace { }; emcee = callPackage ../development/python-modules/emcee { };