Merge pull request #292873 from ghthor/tabby

Tabby: bump 0.7.0 -> 0.8.3 and add systemd service
2024-03-07 21:51:06 +00:00 · 2024-03-07 21:51:06 +00:00 · 0340f82b24
commit 0340f82b24
parent 23fd262ddf d9188fc882
5 changed files with 5865 additions and 24 deletions
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@ -783,6 +783,7 @@
  ./services/misc/svnserve.nix
  ./services/misc/synergy.nix
  ./services/misc/sysprof.nix
+  ./services/misc/tabby.nix
  ./services/misc/tandoor-recipes.nix
  ./services/misc/taskserver
  ./services/misc/tautulli.nix
--- a/nixos/modules/services/misc/tabby.nix
+++ b/nixos/modules/services/misc/tabby.nix
@ -0,0 +1,203 @@
+{ config, lib, pkgs, ... }:
+let
+  inherit (lib) types;
+
+  cfg = config.services.tabby;
+  format = pkgs.formats.toml { };
+  tabbyPackage = cfg.package.override {
+    inherit (cfg) acceleration;
+  };
+in
+{
+  options = {
+    services.tabby = {
+      enable = lib.mkEnableOption (
+        lib.mdDoc "Self-hosted AI coding assistant using large language models"
+      );
+
+      package = lib.mkPackageOption pkgs "tabby" { };
+
+      port = lib.mkOption {
+        type = types.port;
+        default = 11029;
+        description = lib.mdDoc ''
+          Specifies the bind port on which the tabby server HTTP interface listens.
+        '';
+      };
+
+      model = lib.mkOption {
+        type = types.str;
+        default = "TabbyML/StarCoder-1B";
+        description = lib.mdDoc ''
+          Specify the model that tabby will use to generate completions.
+
+          This model will be downloaded automatically if it is not already present.
+
+          If you want to utilize an existing model that you've already
+          downloaded you'll need to move it into tabby's state directory which
+          lives in `/var/lib/tabby`. Because the tabby.service is configured to
+          use a DyanmicUser the service will need to have been started at least
+          once before you can move the locally existing model into
+          `/var/lib/tabby`. You can set the model to 'none' and tabby will
+          startup and fail to download a model, but will have created the
+          `/var/lib/tabby` directory. You can then copy over the model manually
+          into `/var/lib/tabby`, update the model option to the name you just
+          downloaded and copied over then `nixos-rebuild switch` to start using
+          it.
+
+          $ tabby download --model TabbyML/DeepseekCoder-6.7B
+          $ find ~/.tabby/ | tail -n1
+          /home/ghthor/.tabby/models/TabbyML/DeepseekCoder-6.7B/ggml/q8_0.v2.gguf
+          $ sudo rsync -r ~/.tabby/models/ /var/lib/tabby/models/
+          $ sudo chown -R tabby:tabby /var/lib/tabby/models/
+
+          See for Model Options:
+          > https://github.com/TabbyML/registry-tabby
+        '';
+      };
+
+      acceleration = lib.mkOption {
+        type = types.nullOr (types.enum [ "cpu" "rocm" "cuda" "metal" ]);
+        default = null;
+        example = "rocm";
+        description = lib.mdDoc ''
+          Specifies the device to use for hardware acceleration.
+
+          -   `cpu`: no acceleration just use the CPU
+          -  `rocm`: supported by modern AMD GPUs
+          -  `cuda`: supported by modern NVIDIA GPUs
+          - `metal`: supported on darwin aarch64 machines
+
+          Tabby will try and determine what type of acceleration that is
+          already enabled in your configuration when `acceleration = null`.
+
+          - nixpkgs.config.cudaSupport
+          - nixpkgs.config.rocmSupport
+          - if stdenv.isDarwin && stdenv.isAarch64
+
+          IFF multiple acceleration methods are found to be enabled or if you
+          haven't set either `cudaSupport or rocmSupport` you will have to
+          specify the device type manually here otherwise it will default to
+          the first from the list above or to cpu.
+        '';
+      };
+
+      settings = lib.mkOption {
+        inherit (format) type;
+        default = { };
+        description = lib.mdDoc ''
+          Tabby scheduler configuration
+
+          See for more details:
+          > https://tabby.tabbyml.com/docs/configuration/#repository-context-for-code-completion
+        '';
+        example = lib.literalExpression ''
+          settings = {
+            repositories = [
+              { name = "tabby"; git_url = "https://github.com/TabbyML/tabby.git"; }
+              { name = "CTranslate2"; git_url = "git@github.com:OpenNMT/CTranslate2.git"; }
+
+              # local directory is also supported, but limited by systemd DynamicUser=1
+              # adding local repositories will need to be done manually
+              { name = "repository_a"; git_url = "file:///var/lib/tabby/repository_a"; }
+            ];
+          };
+        '';
+      };
+
+      usageCollection = lib.mkOption {
+        type = types.bool;
+        default = false;
+        description = lib.mdDoc ''
+          Enable sending anonymous usage data.
+
+          See for more details:
+          > https://tabby.tabbyml.com/docs/configuration#usage-collection
+        '';
+      };
+
+      indexInterval = lib.mkOption {
+        type = types.str;
+        default = "5hours";
+        example = "5hours";
+        description = lib.mdDoc ''
+          Run tabby scheduler to generate the index database at this interval.
+          Updates by default every 5 hours. This value applies to
+          `OnUnitInactiveSec`
+
+          The format is described in
+          {manpage}`systemd.time(7)`.
+
+          To disable running `tabby scheduler --now` updates, set to `"never"`
+        '';
+      };
+    };
+  };
+
+  # TODO(ghthor): firewall config
+
+  config = lib.mkIf cfg.enable {
+    environment = {
+      etc."tabby/config.toml".source = format.generate "config.toml" cfg.settings;
+      systemPackages = [ tabbyPackage ];
+    };
+
+
+    systemd = let
+      serviceUser = {
+        WorkingDirectory = "/var/lib/tabby";
+        StateDirectory = [ "tabby" ];
+        ConfigurationDirectory = [ "tabby" ];
+        DynamicUser = true;
+        User = "tabby";
+        Group = "tabby";
+      };
+
+      serviceEnv = lib.mkMerge [
+        {
+          TABBY_ROOT = "%S/tabby";
+        }
+        (lib.mkIf (!cfg.usageCollection) {
+          TABBY_DISABLE_USAGE_COLLECTION = "1";
+        })
+      ];
+    in {
+      services.tabby = {
+        wantedBy = [ "multi-user.target" ];
+        description = "Self-hosted AI coding assistant using large language models";
+        after = [ "network.target" ];
+        environment = serviceEnv;
+        serviceConfig = lib.mkMerge [
+          serviceUser
+          {
+            ExecStart =
+              "${lib.getExe tabbyPackage} serve --model ${cfg.model} --port ${toString cfg.port} --device ${tabbyPackage.featureDevice}";
+          }
+        ];
+      };
+
+      services.tabby-scheduler = lib.mkIf (cfg.indexInterval != "never") {
+        wantedBy = [ "multi-user.target" ];
+        description = "Tabby repository indexing service";
+        after = [ "network.target" ];
+        environment = serviceEnv;
+        preStart = "cp -f /etc/tabby/config.toml \${TABBY_ROOT}/config.toml";
+        serviceConfig = lib.mkMerge [
+          serviceUser
+          {
+            # Type = "oneshot";
+            ExecStart = "${lib.getExe tabbyPackage} scheduler --now";
+          }
+        ];
+      };
+      timers.tabby-scheduler = lib.mkIf (cfg.indexInterval != "never") {
+        description = "Update timer for tabby-scheduler";
+        partOf = [ "tabby-scheduler.service" ];
+        wantedBy = [ "timers.target" ];
+        timerConfig.OnUnitInactiveSec = cfg.indexInterval;
+      };
+    };
+  };
+
+  meta.maintainers = with lib.maintainers; [ ghthor ];
+}
--- a/pkgs/by-name/ta/tabby/0001-nix-build-use-nix-native-llama-cpp-package.patch
+++ b/pkgs/by-name/ta/tabby/0001-nix-build-use-nix-native-llama-cpp-package.patch
@ -0,0 +1,29 @@
+From c0152b6bbd751313be756fdcd7b3e3912567b535 Mon Sep 17 00:00:00 2001
+From: Will Owens <ghthor@gmail.com>
+Date: Fri, 1 Mar 2024 01:37:55 -0500
+Subject: [PATCH] nix-build: use nix native llama-cpp package
+
+---
+ crates/llama-cpp-bindings/build.rs | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
+index 06629ac4..aa004493 100644
+--- a/crates/llama-cpp-bindings/build.rs
+++ b/crates/llama-cpp-bindings/build.rs
+@@ -12,10 +12,10 @@ fn main() {
+ 
+     println!("cargo:rerun-if-changed=include/engine.h");
+     println!("cargo:rerun-if-changed=src/engine.cc");
+    println!("cargo:rustc-link-search=native={}", env::var("LLAMA_CPP_LIB").unwrap());
+     println!("cargo:rustc-link-lib=llama");
+-    println!("cargo:rustc-link-lib=ggml_static");
+    println!("cargo:rustc-link-lib=ggml_shared");
+ 
+-    build_llama_cpp();
+     build_cxx_binding();
+ }
+ 
+-- 
+2.43.1
+
--- a/pkgs/by-name/ta/tabby/Cargo.lock
+++ b/pkgs/by-name/ta/tabby/Cargo.lock
--- a/pkgs/by-name/ta/tabby/package.nix
+++ b/pkgs/by-name/ta/tabby/package.nix
@ -1,36 +1,132 @@
-{ lib
+{ config
+, lib
+, rustPlatform
 , fetchFromGitHub
-, gcc12
-, cmake
+, stdenv
+
 , git
 , openssl
 , pkg-config
 , protobuf
-, rustPlatform
-, addOpenGLRunpath
-, cudatoolkit
-, nvidia ? true
+
+, llama-cpp
+
+, cudaSupport ? config.cudaSupport
+, cudaPackages ? { }
+
+, rocmSupport ? config.rocmSupport
+
+, darwin
+, metalSupport ? stdenv.isDarwin && stdenv.isAarch64
+
+  # one of [ null "cpu" "rocm" "cuda" "metal" ];
+, acceleration ? null
 }:

-rustPlatform.buildRustPackage rec {
-  version = "0.7.0";
+let
+  inherit (lib) optional optionals flatten;
+  # References:
+  # https://github.com/NixOS/nixpkgs/blob/master/pkgs/by-name/ll/llama-cpp/package.nix
+  # https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/ollama/default.nix
+
  pname = "tabby";
+  version = "0.8.3";
+
+
+  availableAccelerations = flatten [
+    (optional cudaSupport "cuda")
+    (optional rocmSupport "rocm")
+    (optional metalSupport "metal")
+  ];
+
+  warnIfMultipleAccelerationMethods = configured: (let
+    len = builtins.length configured;
+    result = if len == 0 then "cpu" else (builtins.head configured);
+  in
+    lib.warnIf (len > 1) ''
+      building tabby with multiple acceleration methods enabled is not
+      supported; falling back to `${result}`
+    ''
+    result
+  );
+
+  # If user did not not override the acceleration attribute, then try to use one of
+  # - nixpkgs.config.cudaSupport
+  # - nixpkgs.config.rocmSupport
+  # - metal if (stdenv.isDarwin && stdenv.isAarch64)
+  # !! warn if multiple acceleration methods are enabled and default to the first one in the list
+  featureDevice = if (builtins.isNull acceleration) then (warnIfMultipleAccelerationMethods availableAccelerations) else acceleration;
+
+  warnIfNotLinux = api: (lib.warnIfNot stdenv.isLinux
+    "building tabby with `${api}` is only supported on linux; falling back to cpu"
+    stdenv.isLinux);
+  warnIfNotDarwinAarch64 = api: (lib.warnIfNot (stdenv.isDarwin && stdenv.isAarch64)
+    "building tabby with `${api}` is only supported on Darwin-aarch64; falling back to cpu"
+    (stdenv.isDarwin && stdenv.isAarch64));
+
+  validAccel = lib.assertOneOf "tabby.featureDevice" featureDevice [ "cpu" "rocm" "cuda" "metal" ];
+
+  # TODO(ghthor): there is a bug here where featureDevice could be cuda, but enableCuda is false
+  #  The would result in a startup failure of the service module.
+  enableRocm = validAccel && (featureDevice == "rocm") && (warnIfNotLinux "rocm");
+  enableCuda = validAccel && (featureDevice == "cuda") && (warnIfNotLinux "cuda");
+  enableMetal = validAccel && (featureDevice == "metal") && (warnIfNotDarwinAarch64 "metal");
+
+  # We have to use override here because tabby doesn't actually tell llama-cpp
+  # to use a specific device type as it is relying on llama-cpp only being
+  # built to use one type of device.
+  #
+  # See: https://github.com/TabbyML/tabby/blob/v0.8.3/crates/llama-cpp-bindings/include/engine.h#L20
+  #
+  llamaccpPackage = llama-cpp.override {
+    rocmSupport = enableRocm;
+    cudaSupport = enableCuda;
+    metalSupport = enableMetal;
+  };
+
+  # TODO(ghthor): some of this can be removed
+  darwinBuildInputs = [ llamaccpPackage ]
+  ++ optionals stdenv.isDarwin (with darwin.apple_sdk.frameworks; [
+    Foundation
+    Accelerate
+    CoreVideo
+    CoreGraphics
+  ]
+  ++ optionals enableMetal [ Metal MetalKit ]);
+
+  cudaBuildInputs = [ llamaccpPackage ];
+  rocmBuildInputs = [ llamaccpPackage ];
+
+  LLAMA_CPP_LIB = "${llamaccpPackage.outPath}/lib";
+
+in
+rustPlatform.buildRustPackage {
+  inherit pname version;
+  inherit featureDevice;

  src = fetchFromGitHub {
    owner = "TabbyML";
    repo = "tabby";
    rev = "v${version}";
-    hash = "sha256-BTPJWvqO4IuQAiUEER9PYfu4aQsz5RI77WsA/gQu5Jc=";
+    hash = "sha256-+5Q5XKfh7+g24y2hBqJC/jNEoRytDdcRdn838xc7c8w=";
    fetchSubmodules = true;
  };

-  cargoHash = "sha256-Du0ya9J+0tz72mSid5If0VFX2lLC7YtwNQ/MALpFv2M=";
+  cargoLock = {
+    lockFile = ./Cargo.lock;
+    outputHashes = {
+      "tree-sitter-c-0.20.6" = "sha256-Etl4s29YSOxiqPo4Z49N6zIYqNpIsdk/Qd0jR8jdvW4=";
+      "tree-sitter-cpp-0.20.3" = "sha256-UrQ48CoUMSHmlHzOMu22c9N4hxJtHL2ZYRabYjf5byA=";
+    };
+  };

  # https://github.com/TabbyML/tabby/blob/v0.7.0/.github/workflows/release.yml#L39
  cargoBuildFlags = [
    "--release"
    "--package" "tabby"
-  ] ++ lib.optional nvidia [
+  ] ++ optionals enableRocm [
+    "--features" "rocm"
+  ] ++ optionals enableCuda [
    "--features" "cuda"
  ];

@ -40,23 +136,24 @@ rustPlatform.buildRustPackage rec {
    pkg-config
    protobuf
    git
-    cmake
-    gcc12
-
-  ] ++ lib.optional nvidia [
-    addOpenGLRunpath
+  ] ++ optionals enableCuda [
+    # TODO: Replace with autoAddDriverRunpath
+    # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
+    cudaPackages.autoAddOpenGLRunpathHook
  ];

  buildInputs = [ openssl ]
-    ++ lib.optional nvidia cudatoolkit
+  ++ optionals stdenv.isDarwin darwinBuildInputs
+  ++ optionals enableCuda cudaBuildInputs
+  ++ optionals enableRocm rocmBuildInputs
  ;

-  postInstall = ''
-    ${if nvidia then ''
-    addOpenGLRunpath "$out/bin/tabby"
-    '' else ''
-    ''}
-  '';
+  env = lib.mergeAttrsList [
+    { inherit LLAMA_CPP_LIB; }
+    # Work around https://github.com/NixOS/nixpkgs/issues/166205
+    (lib.optionalAttrs stdenv.cc.isClang { NIX_LDFLAGS = "-l${stdenv.cc.libcxx.cxxabi.libName}"; })
+  ];
+  patches = [ ./0001-nix-build-use-nix-native-llama-cpp-package.patch ];

  # Fails with:
  # file cannot create directory: /var/empty/local/lib64/cmake/Llama
@ -69,5 +166,6 @@ rustPlatform.buildRustPackage rec {
    mainProgram = "tabby";
    license = licenses.asl20;
    maintainers = [ maintainers.ghthor ];
+    broken = stdenv.isDarwin && !stdenv.isAarch64;
  };
 }