ollama: 0.0.17 -> 0.1.7, use llama-cpp
This commit is contained in:
parent
ff77d3c409
commit
ea3a7c56fd
3 changed files with 64 additions and 11 deletions
|
@ -1,35 +1,50 @@
|
|||
{ lib
|
||||
, buildGoModule
|
||||
, fetchFromGitHub
|
||||
, llama-cpp
|
||||
, stdenv
|
||||
, darwin
|
||||
}:
|
||||
|
||||
buildGoModule rec {
|
||||
pname = "ollama";
|
||||
version = "0.0.17";
|
||||
version = "0.1.7";
|
||||
|
||||
src = fetchFromGitHub {
|
||||
owner = "jmorganca";
|
||||
repo = "ollama";
|
||||
rev = "v${version}";
|
||||
hash = "sha256-idsFcjsRD1zPmG742gnYQJcgSWDA2DLMHksCFNe2GiY=";
|
||||
hash = "sha256-rzcuRU2qcYTMo/GxiSHwJYnvA9samfWlztMEhOGzbRg=";
|
||||
};
|
||||
|
||||
buildInputs = lib.optionals stdenv.isDarwin (with darwin.apple_sdk_11_0.frameworks; [
|
||||
Accelerate
|
||||
MetalPerformanceShaders
|
||||
MetalKit
|
||||
]);
|
||||
patches = [
|
||||
# disable passing the deprecated gqa flag to llama-cpp-server
|
||||
# see https://github.com/ggerganov/llama.cpp/issues/2975
|
||||
./disable-gqa.patch
|
||||
|
||||
vendorHash = "sha256-IgEf/WOc1eNGCif1fViIFxbgZAd6mHBqfxcaqH/WvGg=";
|
||||
# replace the call to the bundled llama-cpp-server with the one in the llama-cpp package
|
||||
./set-llamacpp-path.patch
|
||||
];
|
||||
|
||||
ldflags = [ "-s" "-w" ];
|
||||
postPatch = ''
|
||||
substituteInPlace llm/llama.go \
|
||||
--subst-var-by llamaCppServer "${llama-cpp}/bin/llama-cpp-server"
|
||||
'';
|
||||
|
||||
vendorHash = "sha256-Qt5QVqRkwK61BJPVhFWtox6b9E8BpAIseNB0yhh+/90=";
|
||||
|
||||
ldflags = [
|
||||
"-s"
|
||||
"-w"
|
||||
"-X=github.com/jmorganca/ollama/version.Version=${version}"
|
||||
"-X=github.com/jmorganca/ollama/server.mode=release"
|
||||
];
|
||||
|
||||
meta = with lib; {
|
||||
description = "Get up and running with large language models locally";
|
||||
homepage = "https://github.com/jmorganca/ollama";
|
||||
license = licenses.mit;
|
||||
maintainers = with maintainers; [ dit7ya ];
|
||||
mainProgram = "ollama";
|
||||
maintainers = with maintainers; [ dit7ya elohmeier ];
|
||||
platforms = platforms.unix;
|
||||
};
|
||||
}
|
||||
|
|
15
pkgs/tools/misc/ollama/disable-gqa.patch
Normal file
15
pkgs/tools/misc/ollama/disable-gqa.patch
Normal file
|
@ -0,0 +1,15 @@
|
|||
diff --git a/llm/llama.go b/llm/llama.go
|
||||
index 0b460e9..b79e04a 100644
|
||||
--- a/llm/llama.go
|
||||
+++ b/llm/llama.go
|
||||
@@ -299,10 +299,6 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
|
||||
params = append(params, "--n-gpu-layers", fmt.Sprintf("%d", numGPU))
|
||||
}
|
||||
|
||||
- if opts.NumGQA > 0 {
|
||||
- params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
|
||||
- }
|
||||
-
|
||||
if len(adapters) > 0 {
|
||||
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
|
||||
params = append(params, "--lora", adapters[0])
|
23
pkgs/tools/misc/ollama/set-llamacpp-path.patch
Normal file
23
pkgs/tools/misc/ollama/set-llamacpp-path.patch
Normal file
|
@ -0,0 +1,23 @@
|
|||
diff --git a/llm/llama.go b/llm/llama.go
|
||||
index f23d5d8..6563550 100644
|
||||
--- a/llm/llama.go
|
||||
+++ b/llm/llama.go
|
||||
@@ -25,7 +25,6 @@ import (
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
-//go:embed llama.cpp/*/build/*/bin/*
|
||||
var llamaCppEmbed embed.FS
|
||||
|
||||
type ModelRunner struct {
|
||||
@@ -33,6 +32,10 @@ type ModelRunner struct {
|
||||
}
|
||||
|
||||
func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
+ return []ModelRunner{
|
||||
+ {Path: "@llamaCppServer@"},
|
||||
+ }
|
||||
+
|
||||
buildPath := path.Join("llama.cpp", runnerType, "build")
|
||||
var runners []string
|
||||
|
Loading…
Reference in a new issue