Merge pull request #292873 from ghthor/tabby
Tabby: bump 0.7.0 -> 0.8.3 and add systemd service
This commit is contained in:
commit
0340f82b24
5 changed files with 5865 additions and 24 deletions
|
@ -783,6 +783,7 @@
|
|||
./services/misc/svnserve.nix
|
||||
./services/misc/synergy.nix
|
||||
./services/misc/sysprof.nix
|
||||
./services/misc/tabby.nix
|
||||
./services/misc/tandoor-recipes.nix
|
||||
./services/misc/taskserver
|
||||
./services/misc/tautulli.nix
|
||||
|
|
203
nixos/modules/services/misc/tabby.nix
Normal file
203
nixos/modules/services/misc/tabby.nix
Normal file
|
@ -0,0 +1,203 @@
|
|||
{ config, lib, pkgs, ... }:
|
||||
let
|
||||
inherit (lib) types;
|
||||
|
||||
cfg = config.services.tabby;
|
||||
format = pkgs.formats.toml { };
|
||||
tabbyPackage = cfg.package.override {
|
||||
inherit (cfg) acceleration;
|
||||
};
|
||||
in
|
||||
{
|
||||
options = {
|
||||
services.tabby = {
|
||||
enable = lib.mkEnableOption (
|
||||
lib.mdDoc "Self-hosted AI coding assistant using large language models"
|
||||
);
|
||||
|
||||
package = lib.mkPackageOption pkgs "tabby" { };
|
||||
|
||||
port = lib.mkOption {
|
||||
type = types.port;
|
||||
default = 11029;
|
||||
description = lib.mdDoc ''
|
||||
Specifies the bind port on which the tabby server HTTP interface listens.
|
||||
'';
|
||||
};
|
||||
|
||||
model = lib.mkOption {
|
||||
type = types.str;
|
||||
default = "TabbyML/StarCoder-1B";
|
||||
description = lib.mdDoc ''
|
||||
Specify the model that tabby will use to generate completions.
|
||||
|
||||
This model will be downloaded automatically if it is not already present.
|
||||
|
||||
If you want to utilize an existing model that you've already
|
||||
downloaded you'll need to move it into tabby's state directory which
|
||||
lives in `/var/lib/tabby`. Because the tabby.service is configured to
|
||||
use a DyanmicUser the service will need to have been started at least
|
||||
once before you can move the locally existing model into
|
||||
`/var/lib/tabby`. You can set the model to 'none' and tabby will
|
||||
startup and fail to download a model, but will have created the
|
||||
`/var/lib/tabby` directory. You can then copy over the model manually
|
||||
into `/var/lib/tabby`, update the model option to the name you just
|
||||
downloaded and copied over then `nixos-rebuild switch` to start using
|
||||
it.
|
||||
|
||||
$ tabby download --model TabbyML/DeepseekCoder-6.7B
|
||||
$ find ~/.tabby/ | tail -n1
|
||||
/home/ghthor/.tabby/models/TabbyML/DeepseekCoder-6.7B/ggml/q8_0.v2.gguf
|
||||
$ sudo rsync -r ~/.tabby/models/ /var/lib/tabby/models/
|
||||
$ sudo chown -R tabby:tabby /var/lib/tabby/models/
|
||||
|
||||
See for Model Options:
|
||||
> https://github.com/TabbyML/registry-tabby
|
||||
'';
|
||||
};
|
||||
|
||||
acceleration = lib.mkOption {
|
||||
type = types.nullOr (types.enum [ "cpu" "rocm" "cuda" "metal" ]);
|
||||
default = null;
|
||||
example = "rocm";
|
||||
description = lib.mdDoc ''
|
||||
Specifies the device to use for hardware acceleration.
|
||||
|
||||
- `cpu`: no acceleration just use the CPU
|
||||
- `rocm`: supported by modern AMD GPUs
|
||||
- `cuda`: supported by modern NVIDIA GPUs
|
||||
- `metal`: supported on darwin aarch64 machines
|
||||
|
||||
Tabby will try and determine what type of acceleration that is
|
||||
already enabled in your configuration when `acceleration = null`.
|
||||
|
||||
- nixpkgs.config.cudaSupport
|
||||
- nixpkgs.config.rocmSupport
|
||||
- if stdenv.isDarwin && stdenv.isAarch64
|
||||
|
||||
IFF multiple acceleration methods are found to be enabled or if you
|
||||
haven't set either `cudaSupport or rocmSupport` you will have to
|
||||
specify the device type manually here otherwise it will default to
|
||||
the first from the list above or to cpu.
|
||||
'';
|
||||
};
|
||||
|
||||
settings = lib.mkOption {
|
||||
inherit (format) type;
|
||||
default = { };
|
||||
description = lib.mdDoc ''
|
||||
Tabby scheduler configuration
|
||||
|
||||
See for more details:
|
||||
> https://tabby.tabbyml.com/docs/configuration/#repository-context-for-code-completion
|
||||
'';
|
||||
example = lib.literalExpression ''
|
||||
settings = {
|
||||
repositories = [
|
||||
{ name = "tabby"; git_url = "https://github.com/TabbyML/tabby.git"; }
|
||||
{ name = "CTranslate2"; git_url = "git@github.com:OpenNMT/CTranslate2.git"; }
|
||||
|
||||
# local directory is also supported, but limited by systemd DynamicUser=1
|
||||
# adding local repositories will need to be done manually
|
||||
{ name = "repository_a"; git_url = "file:///var/lib/tabby/repository_a"; }
|
||||
];
|
||||
};
|
||||
'';
|
||||
};
|
||||
|
||||
usageCollection = lib.mkOption {
|
||||
type = types.bool;
|
||||
default = false;
|
||||
description = lib.mdDoc ''
|
||||
Enable sending anonymous usage data.
|
||||
|
||||
See for more details:
|
||||
> https://tabby.tabbyml.com/docs/configuration#usage-collection
|
||||
'';
|
||||
};
|
||||
|
||||
indexInterval = lib.mkOption {
|
||||
type = types.str;
|
||||
default = "5hours";
|
||||
example = "5hours";
|
||||
description = lib.mdDoc ''
|
||||
Run tabby scheduler to generate the index database at this interval.
|
||||
Updates by default every 5 hours. This value applies to
|
||||
`OnUnitInactiveSec`
|
||||
|
||||
The format is described in
|
||||
{manpage}`systemd.time(7)`.
|
||||
|
||||
To disable running `tabby scheduler --now` updates, set to `"never"`
|
||||
'';
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
# TODO(ghthor): firewall config
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
environment = {
|
||||
etc."tabby/config.toml".source = format.generate "config.toml" cfg.settings;
|
||||
systemPackages = [ tabbyPackage ];
|
||||
};
|
||||
|
||||
|
||||
systemd = let
|
||||
serviceUser = {
|
||||
WorkingDirectory = "/var/lib/tabby";
|
||||
StateDirectory = [ "tabby" ];
|
||||
ConfigurationDirectory = [ "tabby" ];
|
||||
DynamicUser = true;
|
||||
User = "tabby";
|
||||
Group = "tabby";
|
||||
};
|
||||
|
||||
serviceEnv = lib.mkMerge [
|
||||
{
|
||||
TABBY_ROOT = "%S/tabby";
|
||||
}
|
||||
(lib.mkIf (!cfg.usageCollection) {
|
||||
TABBY_DISABLE_USAGE_COLLECTION = "1";
|
||||
})
|
||||
];
|
||||
in {
|
||||
services.tabby = {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
description = "Self-hosted AI coding assistant using large language models";
|
||||
after = [ "network.target" ];
|
||||
environment = serviceEnv;
|
||||
serviceConfig = lib.mkMerge [
|
||||
serviceUser
|
||||
{
|
||||
ExecStart =
|
||||
"${lib.getExe tabbyPackage} serve --model ${cfg.model} --port ${toString cfg.port} --device ${tabbyPackage.featureDevice}";
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
services.tabby-scheduler = lib.mkIf (cfg.indexInterval != "never") {
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
description = "Tabby repository indexing service";
|
||||
after = [ "network.target" ];
|
||||
environment = serviceEnv;
|
||||
preStart = "cp -f /etc/tabby/config.toml \${TABBY_ROOT}/config.toml";
|
||||
serviceConfig = lib.mkMerge [
|
||||
serviceUser
|
||||
{
|
||||
# Type = "oneshot";
|
||||
ExecStart = "${lib.getExe tabbyPackage} scheduler --now";
|
||||
}
|
||||
];
|
||||
};
|
||||
timers.tabby-scheduler = lib.mkIf (cfg.indexInterval != "never") {
|
||||
description = "Update timer for tabby-scheduler";
|
||||
partOf = [ "tabby-scheduler.service" ];
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig.OnUnitInactiveSec = cfg.indexInterval;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
meta.maintainers = with lib.maintainers; [ ghthor ];
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
From c0152b6bbd751313be756fdcd7b3e3912567b535 Mon Sep 17 00:00:00 2001
|
||||
From: Will Owens <ghthor@gmail.com>
|
||||
Date: Fri, 1 Mar 2024 01:37:55 -0500
|
||||
Subject: [PATCH] nix-build: use nix native llama-cpp package
|
||||
|
||||
---
|
||||
crates/llama-cpp-bindings/build.rs | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/crates/llama-cpp-bindings/build.rs b/crates/llama-cpp-bindings/build.rs
|
||||
index 06629ac4..aa004493 100644
|
||||
--- a/crates/llama-cpp-bindings/build.rs
|
||||
+++ b/crates/llama-cpp-bindings/build.rs
|
||||
@@ -12,10 +12,10 @@ fn main() {
|
||||
|
||||
println!("cargo:rerun-if-changed=include/engine.h");
|
||||
println!("cargo:rerun-if-changed=src/engine.cc");
|
||||
+ println!("cargo:rustc-link-search=native={}", env::var("LLAMA_CPP_LIB").unwrap());
|
||||
println!("cargo:rustc-link-lib=llama");
|
||||
- println!("cargo:rustc-link-lib=ggml_static");
|
||||
+ println!("cargo:rustc-link-lib=ggml_shared");
|
||||
|
||||
- build_llama_cpp();
|
||||
build_cxx_binding();
|
||||
}
|
||||
|
||||
--
|
||||
2.43.1
|
||||
|
5510
pkgs/by-name/ta/tabby/Cargo.lock
generated
Normal file
5510
pkgs/by-name/ta/tabby/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,36 +1,132 @@
|
|||
{ lib
|
||||
{ config
|
||||
, lib
|
||||
, rustPlatform
|
||||
, fetchFromGitHub
|
||||
, gcc12
|
||||
, cmake
|
||||
, stdenv
|
||||
|
||||
, git
|
||||
, openssl
|
||||
, pkg-config
|
||||
, protobuf
|
||||
, rustPlatform
|
||||
, addOpenGLRunpath
|
||||
, cudatoolkit
|
||||
, nvidia ? true
|
||||
|
||||
, llama-cpp
|
||||
|
||||
, cudaSupport ? config.cudaSupport
|
||||
, cudaPackages ? { }
|
||||
|
||||
, rocmSupport ? config.rocmSupport
|
||||
|
||||
, darwin
|
||||
, metalSupport ? stdenv.isDarwin && stdenv.isAarch64
|
||||
|
||||
# one of [ null "cpu" "rocm" "cuda" "metal" ];
|
||||
, acceleration ? null
|
||||
}:
|
||||
|
||||
rustPlatform.buildRustPackage rec {
|
||||
version = "0.7.0";
|
||||
let
|
||||
inherit (lib) optional optionals flatten;
|
||||
# References:
|
||||
# https://github.com/NixOS/nixpkgs/blob/master/pkgs/by-name/ll/llama-cpp/package.nix
|
||||
# https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/misc/ollama/default.nix
|
||||
|
||||
pname = "tabby";
|
||||
version = "0.8.3";
|
||||
|
||||
|
||||
availableAccelerations = flatten [
|
||||
(optional cudaSupport "cuda")
|
||||
(optional rocmSupport "rocm")
|
||||
(optional metalSupport "metal")
|
||||
];
|
||||
|
||||
warnIfMultipleAccelerationMethods = configured: (let
|
||||
len = builtins.length configured;
|
||||
result = if len == 0 then "cpu" else (builtins.head configured);
|
||||
in
|
||||
lib.warnIf (len > 1) ''
|
||||
building tabby with multiple acceleration methods enabled is not
|
||||
supported; falling back to `${result}`
|
||||
''
|
||||
result
|
||||
);
|
||||
|
||||
# If user did not not override the acceleration attribute, then try to use one of
|
||||
# - nixpkgs.config.cudaSupport
|
||||
# - nixpkgs.config.rocmSupport
|
||||
# - metal if (stdenv.isDarwin && stdenv.isAarch64)
|
||||
# !! warn if multiple acceleration methods are enabled and default to the first one in the list
|
||||
featureDevice = if (builtins.isNull acceleration) then (warnIfMultipleAccelerationMethods availableAccelerations) else acceleration;
|
||||
|
||||
warnIfNotLinux = api: (lib.warnIfNot stdenv.isLinux
|
||||
"building tabby with `${api}` is only supported on linux; falling back to cpu"
|
||||
stdenv.isLinux);
|
||||
warnIfNotDarwinAarch64 = api: (lib.warnIfNot (stdenv.isDarwin && stdenv.isAarch64)
|
||||
"building tabby with `${api}` is only supported on Darwin-aarch64; falling back to cpu"
|
||||
(stdenv.isDarwin && stdenv.isAarch64));
|
||||
|
||||
validAccel = lib.assertOneOf "tabby.featureDevice" featureDevice [ "cpu" "rocm" "cuda" "metal" ];
|
||||
|
||||
# TODO(ghthor): there is a bug here where featureDevice could be cuda, but enableCuda is false
|
||||
# The would result in a startup failure of the service module.
|
||||
enableRocm = validAccel && (featureDevice == "rocm") && (warnIfNotLinux "rocm");
|
||||
enableCuda = validAccel && (featureDevice == "cuda") && (warnIfNotLinux "cuda");
|
||||
enableMetal = validAccel && (featureDevice == "metal") && (warnIfNotDarwinAarch64 "metal");
|
||||
|
||||
# We have to use override here because tabby doesn't actually tell llama-cpp
|
||||
# to use a specific device type as it is relying on llama-cpp only being
|
||||
# built to use one type of device.
|
||||
#
|
||||
# See: https://github.com/TabbyML/tabby/blob/v0.8.3/crates/llama-cpp-bindings/include/engine.h#L20
|
||||
#
|
||||
llamaccpPackage = llama-cpp.override {
|
||||
rocmSupport = enableRocm;
|
||||
cudaSupport = enableCuda;
|
||||
metalSupport = enableMetal;
|
||||
};
|
||||
|
||||
# TODO(ghthor): some of this can be removed
|
||||
darwinBuildInputs = [ llamaccpPackage ]
|
||||
++ optionals stdenv.isDarwin (with darwin.apple_sdk.frameworks; [
|
||||
Foundation
|
||||
Accelerate
|
||||
CoreVideo
|
||||
CoreGraphics
|
||||
]
|
||||
++ optionals enableMetal [ Metal MetalKit ]);
|
||||
|
||||
cudaBuildInputs = [ llamaccpPackage ];
|
||||
rocmBuildInputs = [ llamaccpPackage ];
|
||||
|
||||
LLAMA_CPP_LIB = "${llamaccpPackage.outPath}/lib";
|
||||
|
||||
in
|
||||
rustPlatform.buildRustPackage {
|
||||
inherit pname version;
|
||||
inherit featureDevice;
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "TabbyML";
|
||||
repo = "tabby";
|
||||
rev = "v${version}";
|
||||
hash = "sha256-BTPJWvqO4IuQAiUEER9PYfu4aQsz5RI77WsA/gQu5Jc=";
|
||||
hash = "sha256-+5Q5XKfh7+g24y2hBqJC/jNEoRytDdcRdn838xc7c8w=";
|
||||
fetchSubmodules = true;
|
||||
};
|
||||
|
||||
cargoHash = "sha256-Du0ya9J+0tz72mSid5If0VFX2lLC7YtwNQ/MALpFv2M=";
|
||||
cargoLock = {
|
||||
lockFile = ./Cargo.lock;
|
||||
outputHashes = {
|
||||
"tree-sitter-c-0.20.6" = "sha256-Etl4s29YSOxiqPo4Z49N6zIYqNpIsdk/Qd0jR8jdvW4=";
|
||||
"tree-sitter-cpp-0.20.3" = "sha256-UrQ48CoUMSHmlHzOMu22c9N4hxJtHL2ZYRabYjf5byA=";
|
||||
};
|
||||
};
|
||||
|
||||
# https://github.com/TabbyML/tabby/blob/v0.7.0/.github/workflows/release.yml#L39
|
||||
cargoBuildFlags = [
|
||||
"--release"
|
||||
"--package" "tabby"
|
||||
] ++ lib.optional nvidia [
|
||||
] ++ optionals enableRocm [
|
||||
"--features" "rocm"
|
||||
] ++ optionals enableCuda [
|
||||
"--features" "cuda"
|
||||
];
|
||||
|
||||
|
@ -40,23 +136,24 @@ rustPlatform.buildRustPackage rec {
|
|||
pkg-config
|
||||
protobuf
|
||||
git
|
||||
cmake
|
||||
gcc12
|
||||
|
||||
] ++ lib.optional nvidia [
|
||||
addOpenGLRunpath
|
||||
] ++ optionals enableCuda [
|
||||
# TODO: Replace with autoAddDriverRunpath
|
||||
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
||||
cudaPackages.autoAddOpenGLRunpathHook
|
||||
];
|
||||
|
||||
buildInputs = [ openssl ]
|
||||
++ lib.optional nvidia cudatoolkit
|
||||
++ optionals stdenv.isDarwin darwinBuildInputs
|
||||
++ optionals enableCuda cudaBuildInputs
|
||||
++ optionals enableRocm rocmBuildInputs
|
||||
;
|
||||
|
||||
postInstall = ''
|
||||
${if nvidia then ''
|
||||
addOpenGLRunpath "$out/bin/tabby"
|
||||
'' else ''
|
||||
''}
|
||||
'';
|
||||
env = lib.mergeAttrsList [
|
||||
{ inherit LLAMA_CPP_LIB; }
|
||||
# Work around https://github.com/NixOS/nixpkgs/issues/166205
|
||||
(lib.optionalAttrs stdenv.cc.isClang { NIX_LDFLAGS = "-l${stdenv.cc.libcxx.cxxabi.libName}"; })
|
||||
];
|
||||
patches = [ ./0001-nix-build-use-nix-native-llama-cpp-package.patch ];
|
||||
|
||||
# Fails with:
|
||||
# file cannot create directory: /var/empty/local/lib64/cmake/Llama
|
||||
|
@ -69,5 +166,6 @@ rustPlatform.buildRustPackage rec {
|
|||
mainProgram = "tabby";
|
||||
license = licenses.asl20;
|
||||
maintainers = [ maintainers.ghthor ];
|
||||
broken = stdenv.isDarwin && !stdenv.isAarch64;
|
||||
};
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue