mirror of https://github.com/ollama/ollama.git
Compare commits
10 Commits
e2b7f59b81
...
189b9420ea
Author | SHA1 | Date |
---|---|---|
|
189b9420ea | |
|
bd15eba4e4 | |
|
b4289cc3a0 | |
|
4e18d92c28 | |
|
8c7702dacc | |
|
e99128a223 | |
|
cf9abf5001 | |
|
ce7853091c | |
|
01250de101 | |
|
c9c0e24a87 |
|
@ -15,19 +15,19 @@ func main() {
|
|||
}
|
||||
|
||||
messages := []api.Message{
|
||||
api.Message{
|
||||
{
|
||||
Role: "system",
|
||||
Content: "Provide very brief, concise responses",
|
||||
},
|
||||
api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Name some unusual animals",
|
||||
},
|
||||
api.Message{
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: "Monotreme, platypus, echidna",
|
||||
},
|
||||
api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "which of these is the most dangerous?",
|
||||
},
|
||||
|
|
|
@ -6,7 +6,9 @@ import (
|
|||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/ollama/ollama/format"
|
||||
|
@ -146,3 +148,35 @@ func GetSystemInfo() SystemInfo {
|
|||
GPUs: gpus,
|
||||
}
|
||||
}
|
||||
|
||||
func cudaJetpack() string {
|
||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||
if CudaTegra != "" {
|
||||
ver := strings.Split(CudaTegra, ".")
|
||||
if len(ver) > 0 {
|
||||
return "jetpack" + ver[0]
|
||||
}
|
||||
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
|
||||
r := regexp.MustCompile(` R(\d+) `)
|
||||
m := r.FindSubmatch(data)
|
||||
if len(m) != 2 {
|
||||
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
|
||||
} else {
|
||||
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
|
||||
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
|
||||
// https://developer.nvidia.com/embedded/jetpack-archive
|
||||
switch l4t {
|
||||
case 35:
|
||||
return "jetpack5"
|
||||
case 36:
|
||||
return "jetpack6"
|
||||
default:
|
||||
// Newer Jetson systems use the SBSU runtime
|
||||
slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
|
|
@ -78,6 +78,8 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
|||
}
|
||||
|
||||
slog.Info("discovering available GPUs...")
|
||||
requested := envconfig.LLMLibrary()
|
||||
jetpack := cudaJetpack()
|
||||
|
||||
// For our initial discovery pass, we gather all the known GPUs through
|
||||
// all the libraries that were detected. This pass may include GPUs that
|
||||
|
@ -86,6 +88,14 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
|||
// times concurrently leading to memory contention
|
||||
for dir := range libDirs {
|
||||
var dirs []string
|
||||
if dir != "" {
|
||||
if requested != "" && filepath.Base(dir) != requested {
|
||||
slog.Debug("skipping available library at users request", "requested", requested, "libDir", dir)
|
||||
continue
|
||||
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
|
||||
continue
|
||||
}
|
||||
}
|
||||
if dir == "" {
|
||||
dirs = []string{LibOllamaPath}
|
||||
} else {
|
||||
|
|
|
@ -48,16 +48,10 @@ Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5]
|
|||
|
||||
**Experimental LLM Library Override**
|
||||
|
||||
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass autodetection, so for example, if you have a CUDA card, but want to force the CPU LLM library with AVX2 vector support, use:
|
||||
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to limit autodetection, so for example, if you have both CUDA and AMD GPUs, but want to force the CUDA v13 only, use:
|
||||
|
||||
```shell
|
||||
OLLAMA_LLM_LIBRARY="cpu_avx2" ollama serve
|
||||
```
|
||||
|
||||
You can see what features your CPU has with the following.
|
||||
|
||||
```shell
|
||||
cat /proc/cpuinfo| grep flags | head -1
|
||||
OLLAMA_LLM_LIBRARY="cuda_v13" ollama serve
|
||||
```
|
||||
|
||||
## Installing older or pre-release versions on Linux
|
||||
|
|
16
go.mod
16
go.mod
|
@ -23,6 +23,13 @@ require (
|
|||
github.com/mattn/go-runewidth v0.0.14
|
||||
github.com/nlpodyssey/gopickle v0.3.0
|
||||
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
||||
github.com/prometheus/client_golang v1.19.1
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0
|
||||
go.opentelemetry.io/otel v1.30.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.49.0
|
||||
go.opentelemetry.io/otel/metric v1.30.0
|
||||
go.opentelemetry.io/otel/sdk v1.30.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.30.0
|
||||
golang.org/x/image v0.22.0
|
||||
golang.org/x/tools v0.30.0
|
||||
gonum.org/v1/gonum v0.15.0
|
||||
|
@ -30,19 +37,26 @@ require (
|
|||
|
||||
require (
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/chewxy/hm v1.0.0 // indirect
|
||||
github.com/chewxy/math32 v1.11.0 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/go-logr/logr v1.4.2 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/google/flatbuffers v24.3.25+incompatible // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.53.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.0 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/xtgo/set v1.0.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.30.0 // indirect
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
gorgonia.org/vecf32 v0.9.0 // indirect
|
||||
|
|
40
go.sum
40
go.sum
|
@ -12,12 +12,16 @@ github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6IC
|
|||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
|
||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
|
||||
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
|
||||
github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
|
||||
|
@ -34,7 +38,6 @@ github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWH
|
|||
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
|
||||
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 h1:cBzrdJPAFBsgCrDPnZxlp1dF2+k4r1kVpD7+1S1PVjY=
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLcxEuYUlAd/EXyjc/v55nd3+47YAgWbSXVxPrNI=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
|
@ -70,6 +73,11 @@ github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2H
|
|||
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
|
||||
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
||||
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
|
@ -133,8 +141,8 @@ github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa02
|
|||
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
|
||||
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
|
@ -166,12 +174,20 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
|||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
|
||||
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE=
|
||||
github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U=
|
||||
github.com/prometheus/procfs v0.15.0 h1:A82kmvXJq2jTu5YUhSGNlYoxh85zLnKgPz4bMZgI5Ek=
|
||||
github.com/prometheus/procfs v0.15.0/go.mod h1:Y0RJ/Y5g5wJpkTisOtqwDSo4HwhGmLB4VQSw2sQJLHk=
|
||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
|
||||
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
|
||||
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
|
||||
|
@ -204,6 +220,20 @@ github.com/xtgo/set v1.0.0/go.mod h1:d3NHzGzSa0NmB2NhFyECA+QdRp29oEn2xbT+TpeFoM8
|
|||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0 h1:GotCpbh7YkCHdFs+hYMdvAEyGsBZifFognqrOnBwyJM=
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0/go.mod h1:6b0AS55EEPj7qP44khqF5dqTUq+RkakDMShFaW1EcA4=
|
||||
go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts=
|
||||
go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.49.0 h1:Er5I1g/YhfYv9Affk9nJLfH/+qCCVVg1f2R9AbJfqDQ=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.49.0/go.mod h1:KfQ1wpjf3zsHjzP149P4LyAwWRupc6c7t1ZJ9eXpKQM=
|
||||
go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w=
|
||||
go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ=
|
||||
go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE=
|
||||
go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.30.0 h1:QJLT8Pe11jyHBHfSAgYH7kEmT24eX792jZO1bo4BXkM=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.30.0/go.mod h1:waS6P3YqFNzeP01kuo/MBBYqaoBJl7efRQHOaydhy1Y=
|
||||
go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc=
|
||||
go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o=
|
||||
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 h1:lGdhQUN/cnWdSH3291CUuxSEqc+AsGTiDxPP3r2J0l4=
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E=
|
||||
|
|
|
@ -26,6 +26,8 @@ import (
|
|||
|
||||
"github.com/gin-contrib/cors"
|
||||
"github.com/gin-gonic/gin"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"golang.org/x/image/webp"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
|
@ -41,12 +43,14 @@ import (
|
|||
"github.com/ollama/ollama/model/parsers"
|
||||
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||
"github.com/ollama/ollama/server/internal/registry"
|
||||
"github.com/ollama/ollama/telemetry"
|
||||
"github.com/ollama/ollama/template"
|
||||
"github.com/ollama/ollama/thinking"
|
||||
"github.com/ollama/ollama/tools"
|
||||
"github.com/ollama/ollama/types/errtypes"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
const signinURLStr = "https://ollama.com/connect?name=%s&key=%s"
|
||||
|
@ -79,6 +83,7 @@ type Server struct {
|
|||
addr net.Addr
|
||||
sched *Scheduler
|
||||
lowVRAM bool
|
||||
metrics *telemetry.Metrics
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
@ -508,6 +513,17 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
|
||||
attrs := metric.WithAttributes(
|
||||
attribute.String("model", req.Model),
|
||||
attribute.String("reason", res.DoneReason),
|
||||
)
|
||||
s.metrics.TotalDuration.Add(c.Request.Context(), res.TotalDuration.Seconds(), attrs)
|
||||
s.metrics.LoadDuration.Add(c.Request.Context(), res.LoadDuration.Seconds(), attrs)
|
||||
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(cr.PromptEvalCount), attrs)
|
||||
s.metrics.PromptEvalDuration.Add(c.Request.Context(), cr.PromptEvalDuration.Seconds(), attrs)
|
||||
s.metrics.EvalCount.Add(c.Request.Context(), int64(cr.EvalCount), attrs)
|
||||
s.metrics.EvalDuration.Add(c.Request.Context(), cr.EvalDuration.Seconds(), attrs)
|
||||
|
||||
if !req.Raw {
|
||||
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
|
||||
if err != nil {
|
||||
|
@ -706,6 +722,15 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
|||
LoadDuration: checkpointLoaded.Sub(checkpointStart),
|
||||
PromptEvalCount: count,
|
||||
}
|
||||
|
||||
attrs := metric.WithAttributes(
|
||||
attribute.String("model", req.Model),
|
||||
)
|
||||
s.metrics.TotalDuration.Add(c.Request.Context(), resp.TotalDuration.Seconds(), attrs)
|
||||
s.metrics.LoadDuration.Add(c.Request.Context(), resp.LoadDuration.Seconds(), attrs)
|
||||
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(resp.PromptEvalCount), attrs)
|
||||
s.metrics.PromptEvalDuration.Add(c.Request.Context(), resp.TotalDuration.Seconds(), attrs)
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
|
@ -1408,11 +1433,21 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||
}
|
||||
corsConfig.AllowOrigins = envconfig.AllowedOrigins()
|
||||
|
||||
m, err := telemetry.InitMetrics()
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("Metrics initialization failed with %s", err))
|
||||
}
|
||||
s.metrics = m
|
||||
s.metrics.Start.Record(nil, time.Now().UnixMicro()/1e6, metric.WithAttributes(
|
||||
attribute.String("version", version.Version),
|
||||
))
|
||||
|
||||
r := gin.Default()
|
||||
r.HandleMethodNotAllowed = true
|
||||
r.Use(
|
||||
cors.New(corsConfig),
|
||||
allowedHostsMiddleware(s.addr),
|
||||
prometheusMetricsMiddleware(s.metrics),
|
||||
)
|
||||
|
||||
// General
|
||||
|
@ -1448,6 +1483,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||
r.POST("/api/embed", s.EmbedHandler)
|
||||
r.POST("/api/embeddings", s.EmbeddingsHandler)
|
||||
|
||||
r.GET("/metrics", s.MetricsHandler)
|
||||
|
||||
// Inference (OpenAI compatibility)
|
||||
r.POST("/v1/chat/completions", middleware.ChatMiddleware(), s.ChatHandler)
|
||||
r.POST("/v1/completions", middleware.CompletionsMiddleware(), s.GenerateHandler)
|
||||
|
@ -1993,6 +2030,17 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
res.DoneReason = r.DoneReason.String()
|
||||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
|
||||
attrs := metric.WithAttributes(
|
||||
attribute.String("model", req.Model),
|
||||
attribute.String("reason", res.DoneReason),
|
||||
)
|
||||
s.metrics.TotalDuration.Add(c.Request.Context(), res.TotalDuration.Seconds(), attrs)
|
||||
s.metrics.LoadDuration.Add(c.Request.Context(), res.LoadDuration.Seconds(), attrs)
|
||||
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(r.PromptEvalCount), attrs)
|
||||
s.metrics.PromptEvalDuration.Add(c.Request.Context(), r.PromptEvalDuration.Seconds(), attrs)
|
||||
s.metrics.EvalCount.Add(c.Request.Context(), int64(r.EvalCount), attrs)
|
||||
s.metrics.EvalDuration.Add(c.Request.Context(), r.EvalDuration.Seconds(), attrs)
|
||||
}
|
||||
|
||||
if builtinParser != nil {
|
||||
|
@ -2136,3 +2184,47 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
|
|||
}
|
||||
return msgs
|
||||
}
|
||||
|
||||
func prometheusMetricsMiddleware(m *telemetry.Metrics) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
// Call the next middleware/handler
|
||||
c.Next()
|
||||
|
||||
responseStatus := c.Writer.Status()
|
||||
statusText := http.StatusText(responseStatus)
|
||||
|
||||
route := c.FullPath()
|
||||
|
||||
m.RecordRequests(c.Request.Context(), "all", int64(responseStatus), statusText)
|
||||
|
||||
// Record the specific route action metric
|
||||
if route != "" {
|
||||
action := routeToAction(route)
|
||||
m.RecordRequests(c.Request.Context(), action, int64(responseStatus), statusText)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// routeToAction converts a route pattern to an action string (e.g., `/api/pull` -> "pull").
|
||||
func routeToAction(route string) string {
|
||||
// Customized mapping goes in the case statements.
|
||||
switch route {
|
||||
case "/api/chat", "/v1/chat/completions":
|
||||
return "chat"
|
||||
case "/api/embed", "/v1/embeddings":
|
||||
return "embed"
|
||||
default:
|
||||
// Default action derived from the route itself (e.g., `/api/pull` -> "pull")
|
||||
parts := strings.Split(route, "/")
|
||||
if len(parts) > 2 {
|
||||
return parts[len(parts)-1] // Use the last part of the route as the action
|
||||
}
|
||||
|
||||
return "head"
|
||||
}
|
||||
}
|
||||
|
||||
// MetricsHandler returns the gin.HandlerFunc that provides the Prometheus metrics format on GET requests
|
||||
func (s *Server) MetricsHandler(c *gin.Context) {
|
||||
promhttp.Handler().ServeHTTP(c.Writer, c.Request)
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import (
|
|||
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func createTestFile(t *testing.T, name string) (string, string) {
|
||||
|
@ -493,6 +494,20 @@ func TestRoutes(t *testing.T) {
|
|||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Metrics Handler",
|
||||
Method: http.MethodGet,
|
||||
Path: "/metrics",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
},
|
||||
Expected: func(t *testing.T, resp *http.Response) {
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
assert.Equal(t, contentType, "text/plain; version=0.0.4; charset=utf-8; escaping=values")
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
assert.Nil(t, err)
|
||||
assert.Contains(t, string(body), "http_requests_total")
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
modelsDir := t.TempDir()
|
||||
|
@ -964,3 +979,27 @@ func TestWaitForStream(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRouteToAction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
route string
|
||||
expectedAction string
|
||||
}{
|
||||
{"Chat completion v1", "/v1/chat/completions", "chat"},
|
||||
{"Chat API", "/api/chat", "chat"},
|
||||
{"Embed v1", "/v1/embeddings", "embed"},
|
||||
{"Embed API", "/api/embed", "embed"},
|
||||
{"Pull API", "/api/pull", "pull"},
|
||||
{"Push API", "/api/push", "push"},
|
||||
{"Root path", "/", "head"},
|
||||
{"Anyother path", "/api/anyother", "anyother"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
action := routeToAction(tt.route)
|
||||
assert.Equal(t, tt.expectedAction, action)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.17.0"
|
||||
)
|
||||
|
||||
const (
|
||||
namespace = "ollama"
|
||||
)
|
||||
|
||||
type Metrics struct {
|
||||
Start metric.Int64Gauge
|
||||
Requests metric.Int64Counter
|
||||
TotalDuration metric.Float64Counter
|
||||
LoadDuration metric.Float64Counter
|
||||
PromptEvalCount metric.Int64Counter
|
||||
PromptEvalDuration metric.Float64Counter
|
||||
EvalCount metric.Int64Counter
|
||||
EvalDuration metric.Float64Counter
|
||||
}
|
||||
|
||||
func NewMetrics(meter metric.Meter) *Metrics {
|
||||
build, _ := meter.Int64Gauge(
|
||||
"ollama_build_info",
|
||||
metric.WithDescription("Ollama start date (as Unixtime) and build version."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
req, _ := meter.Int64Counter(
|
||||
"http_requests_total",
|
||||
metric.WithDescription("The total number of requests on the endpoints."),
|
||||
metric.WithUnit("requests"),
|
||||
)
|
||||
|
||||
totalDuration, _ := meter.Float64Counter(
|
||||
"ollama_total_duration_seconds",
|
||||
metric.WithDescription("The request total duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
loadDuration, _ := meter.Float64Counter(
|
||||
"ollama_load_duration_seconds",
|
||||
metric.WithDescription("The request load duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
promptEvalCount, _ := meter.Int64Counter(
|
||||
"ollama_prompt_eval_total",
|
||||
metric.WithDescription("The number of prompt token evaluated."),
|
||||
metric.WithUnit("tokens"),
|
||||
)
|
||||
|
||||
promptEvalDuration, _ := meter.Float64Counter(
|
||||
"ollama_prompt_eval_duration_seconds",
|
||||
metric.WithDescription("The prompt evaluation duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
evalCount, _ := meter.Int64Counter(
|
||||
"ollama_eval_total",
|
||||
metric.WithDescription("The number of token evaluated."),
|
||||
metric.WithUnit("tokens"),
|
||||
)
|
||||
|
||||
evalDuration, _ := meter.Float64Counter(
|
||||
"ollama_eval_duration_seconds",
|
||||
metric.WithDescription("The prompt evaluation duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
return &Metrics{
|
||||
Start: build,
|
||||
Requests: req,
|
||||
TotalDuration: totalDuration,
|
||||
LoadDuration: loadDuration,
|
||||
PromptEvalCount: promptEvalCount,
|
||||
PromptEvalDuration: promptEvalDuration,
|
||||
EvalCount: evalCount,
|
||||
EvalDuration: evalDuration,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Metrics) RecordRequests(ctx context.Context, action string, statusCode int64, status string) {
|
||||
m.Requests.Add(ctx, 1, metric.WithAttributes(
|
||||
attribute.String("action", action),
|
||||
attribute.Int64("status_code", statusCode),
|
||||
attribute.String("status", status),
|
||||
))
|
||||
}
|
||||
|
||||
func NewPrometheusMeterProvider(res *resource.Resource, exp *prometheus.Exporter) (*sdkmetric.MeterProvider, error) {
|
||||
if exp == nil {
|
||||
return nil, errors.New("exporter cannot be nil")
|
||||
}
|
||||
meterProvider := sdkmetric.NewMeterProvider(
|
||||
sdkmetric.WithResource(res),
|
||||
sdkmetric.WithReader(exp),
|
||||
)
|
||||
|
||||
// Start go runtime metric collection.
|
||||
err := runtime.Start(runtime.WithMeterProvider(meterProvider),
|
||||
runtime.WithMinimumReadMemStatsInterval(time.Second))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return meterProvider, nil
|
||||
}
|
||||
|
||||
func InitMetrics() (*Metrics, error) {
|
||||
res, err := resource.New(context.Background(),
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceNameKey.String(namespace),
|
||||
semconv.ServiceVersionKey.String("v0.1.0"),
|
||||
),
|
||||
resource.WithProcessRuntimeDescription(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
exporter, err := prometheus.New()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mp, err := NewPrometheusMeterProvider(res, exporter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
otel.SetMeterProvider(mp)
|
||||
|
||||
meter := mp.Meter(namespace, metric.WithInstrumentationVersion(runtime.Version()))
|
||||
return NewMetrics(meter), nil
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
package telemetry
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/noop"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
)
|
||||
|
||||
func TestNewMetrics(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
meter metric.Meter
|
||||
expectedMetric string
|
||||
}{
|
||||
{
|
||||
name: "Valid Meter",
|
||||
meter: noop.NewMeterProvider().Meter("test"),
|
||||
expectedMetric: "http_requests_total",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
metrics := NewMetrics(tt.meter)
|
||||
|
||||
// Ensure the metric is registered correctly
|
||||
assert.NotNil(t, metrics)
|
||||
assert.NotNil(t, metrics.Requests)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewPrometheusMeterProvider(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
wantErr bool
|
||||
mockPrometheus func() (*prometheus.Exporter, error)
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
name: "Successful creation of meter provider",
|
||||
wantErr: false,
|
||||
mockPrometheus: func() (*prometheus.Exporter, error) {
|
||||
return &prometheus.Exporter{
|
||||
Reader: sdkmetric.NewManualReader(),
|
||||
}, nil
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Error on resource creation",
|
||||
wantErr: true,
|
||||
expectedError: errors.New("error creating prometheus resource"),
|
||||
mockPrometheus: func() (*prometheus.Exporter, error) {
|
||||
return nil, errors.New("error creating prometheus resource")
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
res := resource.NewSchemaless() // Use an empty resource for testing.
|
||||
exp, _ := tt.mockPrometheus()
|
||||
mp, err := NewPrometheusMeterProvider(res, exp)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.NotNil(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, mp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue