Compare commits

...

10 Commits

Author SHA1 Message Date
Lapo Luchini 189b9420ea
Merge b4289cc3a0 into bd15eba4e4 2025-10-07 19:18:29 -04:00
Daniel Hiltgen bd15eba4e4
Bring back escape valve for llm libraries and fix Jetpack6 crash (#12529)
* Bring back escape valve for llm libraries

If the new discovery logic picks the wrong library, this gives users the
ability to force a specific one using the same pattern as before. This
can also potentially speed up bootstrap discovery if one of the libraries
takes a long time to load and ultimately bind to no devices.  For example
unsupported AMD iGPUS can sometimes take a while to discover and rule out.

* Bypass extra discovery on jetpack systems

On at least Jetpack6, cuda_v12 appears to expose the iGPU, but crashes later on in
cublasInit so if we detect a Jetpack, short-circuit and use that variant.
2025-10-07 16:06:14 -07:00
Lapo Luchini b4289cc3a0
Merge tag 'v0.12.2' into add-otel-metrics 2025-09-26 00:04:43 +02:00
Lapo Luchini 4e18d92c28
Merge tag 'v0.11.3' into add-otel-metrics 2025-08-06 17:42:18 +02:00
Lapo Luchini 8c7702dacc
Add some of the metrics in `EmbedHandler` too. 2025-06-22 10:43:11 +02:00
Lapo Luchini e99128a223
Add same metrics in `ChatHandler` as in `GenerateHandler`. 2025-06-22 10:37:41 +02:00
Lapo Luchini cf9abf5001
Add build version (and start date). 2025-06-22 10:14:31 +02:00
Lapo Luchini ce7853091c
Add all `ollama run --verbose` metrics to OTel. 2025-06-22 09:35:59 +02:00
Lapo Luchini 01250de101
Merge branch 'main' into add-otel-metrics 2025-06-21 15:35:35 +02:00
amila-ku c9c0e24a87 metrics: Add metrics endpoint and basic request metrics 2024-10-14 23:34:04 +01:00
10 changed files with 456 additions and 18 deletions

View File

@ -15,19 +15,19 @@ func main() {
}
messages := []api.Message{
api.Message{
{
Role: "system",
Content: "Provide very brief, concise responses",
},
api.Message{
{
Role: "user",
Content: "Name some unusual animals",
},
api.Message{
{
Role: "assistant",
Content: "Monotreme, platypus, echidna",
},
api.Message{
{
Role: "user",
Content: "which of these is the most dangerous?",
},

View File

@ -6,7 +6,9 @@ import (
"log/slog"
"os"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"github.com/ollama/ollama/format"
@ -146,3 +148,35 @@ func GetSystemInfo() SystemInfo {
GPUs: gpus,
}
}
func cudaJetpack() string {
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
if CudaTegra != "" {
ver := strings.Split(CudaTegra, ".")
if len(ver) > 0 {
return "jetpack" + ver[0]
}
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
r := regexp.MustCompile(` R(\d+) `)
m := r.FindSubmatch(data)
if len(m) != 2 {
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
} else {
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch l4t {
case 35:
return "jetpack5"
case 36:
return "jetpack6"
default:
// Newer Jetson systems use the SBSU runtime
slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
}
}
}
}
}
return ""
}

View File

@ -78,6 +78,8 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
}
slog.Info("discovering available GPUs...")
requested := envconfig.LLMLibrary()
jetpack := cudaJetpack()
// For our initial discovery pass, we gather all the known GPUs through
// all the libraries that were detected. This pass may include GPUs that
@ -86,6 +88,14 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
// times concurrently leading to memory contention
for dir := range libDirs {
var dirs []string
if dir != "" {
if requested != "" && filepath.Base(dir) != requested {
slog.Debug("skipping available library at users request", "requested", requested, "libDir", dir)
continue
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
continue
}
}
if dir == "" {
dirs = []string{LibOllamaPath}
} else {

View File

@ -48,16 +48,10 @@ Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5]
**Experimental LLM Library Override**
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass autodetection, so for example, if you have a CUDA card, but want to force the CPU LLM library with AVX2 vector support, use:
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to limit autodetection, so for example, if you have both CUDA and AMD GPUs, but want to force the CUDA v13 only, use:
```shell
OLLAMA_LLM_LIBRARY="cpu_avx2" ollama serve
```
You can see what features your CPU has with the following.
```shell
cat /proc/cpuinfo| grep flags | head -1
OLLAMA_LLM_LIBRARY="cuda_v13" ollama serve
```
## Installing older or pre-release versions on Linux

16
go.mod
View File

@ -23,6 +23,13 @@ require (
github.com/mattn/go-runewidth v0.0.14
github.com/nlpodyssey/gopickle v0.3.0
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
github.com/prometheus/client_golang v1.19.1
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0
go.opentelemetry.io/otel v1.30.0
go.opentelemetry.io/otel/exporters/prometheus v0.49.0
go.opentelemetry.io/otel/metric v1.30.0
go.opentelemetry.io/otel/sdk v1.30.0
go.opentelemetry.io/otel/sdk/metric v1.30.0
golang.org/x/image v0.22.0
golang.org/x/tools v0.30.0
gonum.org/v1/gonum v0.15.0
@ -30,19 +37,26 @@ require (
require (
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bytedance/sonic/loader v0.1.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/chewxy/hm v1.0.0 // indirect
github.com/chewxy/math32 v1.11.0 // indirect
github.com/cloudwego/base64x v0.1.4 // indirect
github.com/cloudwego/iasm v0.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-logr/logr v1.4.2 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/flatbuffers v24.3.25+incompatible // indirect
github.com/kr/text v0.2.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.53.0 // indirect
github.com/prometheus/procfs v0.15.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/xtgo/set v1.0.0 // indirect
go.opentelemetry.io/otel/trace v1.30.0 // indirect
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
gorgonia.org/vecf32 v0.9.0 // indirect

40
go.sum
View File

@ -12,12 +12,16 @@ github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6IC
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
@ -34,7 +38,6 @@ github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWH
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 h1:cBzrdJPAFBsgCrDPnZxlp1dF2+k4r1kVpD7+1S1PVjY=
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLcxEuYUlAd/EXyjc/v55nd3+47YAgWbSXVxPrNI=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@ -70,6 +73,11 @@ github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2H
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
@ -133,8 +141,8 @@ github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa02
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
@ -166,12 +174,20 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE=
github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U=
github.com/prometheus/procfs v0.15.0 h1:A82kmvXJq2jTu5YUhSGNlYoxh85zLnKgPz4bMZgI5Ek=
github.com/prometheus/procfs v0.15.0/go.mod h1:Y0RJ/Y5g5wJpkTisOtqwDSo4HwhGmLB4VQSw2sQJLHk=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
@ -204,6 +220,20 @@ github.com/xtgo/set v1.0.0/go.mod h1:d3NHzGzSa0NmB2NhFyECA+QdRp29oEn2xbT+TpeFoM8
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0 h1:GotCpbh7YkCHdFs+hYMdvAEyGsBZifFognqrOnBwyJM=
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0/go.mod h1:6b0AS55EEPj7qP44khqF5dqTUq+RkakDMShFaW1EcA4=
go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts=
go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc=
go.opentelemetry.io/otel/exporters/prometheus v0.49.0 h1:Er5I1g/YhfYv9Affk9nJLfH/+qCCVVg1f2R9AbJfqDQ=
go.opentelemetry.io/otel/exporters/prometheus v0.49.0/go.mod h1:KfQ1wpjf3zsHjzP149P4LyAwWRupc6c7t1ZJ9eXpKQM=
go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w=
go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ=
go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE=
go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg=
go.opentelemetry.io/otel/sdk/metric v1.30.0 h1:QJLT8Pe11jyHBHfSAgYH7kEmT24eX792jZO1bo4BXkM=
go.opentelemetry.io/otel/sdk/metric v1.30.0/go.mod h1:waS6P3YqFNzeP01kuo/MBBYqaoBJl7efRQHOaydhy1Y=
go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc=
go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o=
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 h1:lGdhQUN/cnWdSH3291CUuxSEqc+AsGTiDxPP3r2J0l4=
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E=

View File

@ -26,6 +26,8 @@ import (
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"golang.org/x/image/webp"
"golang.org/x/sync/errgroup"
@ -41,12 +43,14 @@ import (
"github.com/ollama/ollama/model/parsers"
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/server/internal/registry"
"github.com/ollama/ollama/telemetry"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/thinking"
"github.com/ollama/ollama/tools"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const signinURLStr = "https://ollama.com/connect?name=%s&key=%s"
@ -79,6 +83,7 @@ type Server struct {
addr net.Addr
sched *Scheduler
lowVRAM bool
metrics *telemetry.Metrics
}
func init() {
@ -508,6 +513,17 @@ func (s *Server) GenerateHandler(c *gin.Context) {
res.TotalDuration = time.Since(checkpointStart)
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
attrs := metric.WithAttributes(
attribute.String("model", req.Model),
attribute.String("reason", res.DoneReason),
)
s.metrics.TotalDuration.Add(c.Request.Context(), res.TotalDuration.Seconds(), attrs)
s.metrics.LoadDuration.Add(c.Request.Context(), res.LoadDuration.Seconds(), attrs)
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(cr.PromptEvalCount), attrs)
s.metrics.PromptEvalDuration.Add(c.Request.Context(), cr.PromptEvalDuration.Seconds(), attrs)
s.metrics.EvalCount.Add(c.Request.Context(), int64(cr.EvalCount), attrs)
s.metrics.EvalDuration.Add(c.Request.Context(), cr.EvalDuration.Seconds(), attrs)
if !req.Raw {
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
if err != nil {
@ -706,6 +722,15 @@ func (s *Server) EmbedHandler(c *gin.Context) {
LoadDuration: checkpointLoaded.Sub(checkpointStart),
PromptEvalCount: count,
}
attrs := metric.WithAttributes(
attribute.String("model", req.Model),
)
s.metrics.TotalDuration.Add(c.Request.Context(), resp.TotalDuration.Seconds(), attrs)
s.metrics.LoadDuration.Add(c.Request.Context(), resp.LoadDuration.Seconds(), attrs)
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(resp.PromptEvalCount), attrs)
s.metrics.PromptEvalDuration.Add(c.Request.Context(), resp.TotalDuration.Seconds(), attrs)
c.JSON(http.StatusOK, resp)
}
@ -1408,11 +1433,21 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
}
corsConfig.AllowOrigins = envconfig.AllowedOrigins()
m, err := telemetry.InitMetrics()
if err != nil {
slog.Warn(fmt.Sprintf("Metrics initialization failed with %s", err))
}
s.metrics = m
s.metrics.Start.Record(nil, time.Now().UnixMicro()/1e6, metric.WithAttributes(
attribute.String("version", version.Version),
))
r := gin.Default()
r.HandleMethodNotAllowed = true
r.Use(
cors.New(corsConfig),
allowedHostsMiddleware(s.addr),
prometheusMetricsMiddleware(s.metrics),
)
// General
@ -1448,6 +1483,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
r.POST("/api/embed", s.EmbedHandler)
r.POST("/api/embeddings", s.EmbeddingsHandler)
r.GET("/metrics", s.MetricsHandler)
// Inference (OpenAI compatibility)
r.POST("/v1/chat/completions", middleware.ChatMiddleware(), s.ChatHandler)
r.POST("/v1/completions", middleware.CompletionsMiddleware(), s.GenerateHandler)
@ -1993,6 +2030,17 @@ func (s *Server) ChatHandler(c *gin.Context) {
res.DoneReason = r.DoneReason.String()
res.TotalDuration = time.Since(checkpointStart)
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
attrs := metric.WithAttributes(
attribute.String("model", req.Model),
attribute.String("reason", res.DoneReason),
)
s.metrics.TotalDuration.Add(c.Request.Context(), res.TotalDuration.Seconds(), attrs)
s.metrics.LoadDuration.Add(c.Request.Context(), res.LoadDuration.Seconds(), attrs)
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(r.PromptEvalCount), attrs)
s.metrics.PromptEvalDuration.Add(c.Request.Context(), r.PromptEvalDuration.Seconds(), attrs)
s.metrics.EvalCount.Add(c.Request.Context(), int64(r.EvalCount), attrs)
s.metrics.EvalDuration.Add(c.Request.Context(), r.EvalDuration.Seconds(), attrs)
}
if builtinParser != nil {
@ -2136,3 +2184,47 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
}
return msgs
}
func prometheusMetricsMiddleware(m *telemetry.Metrics) gin.HandlerFunc {
return func(c *gin.Context) {
// Call the next middleware/handler
c.Next()
responseStatus := c.Writer.Status()
statusText := http.StatusText(responseStatus)
route := c.FullPath()
m.RecordRequests(c.Request.Context(), "all", int64(responseStatus), statusText)
// Record the specific route action metric
if route != "" {
action := routeToAction(route)
m.RecordRequests(c.Request.Context(), action, int64(responseStatus), statusText)
}
}
}
// routeToAction converts a route pattern to an action string (e.g., `/api/pull` -> "pull").
func routeToAction(route string) string {
// Customized mapping goes in the case statements.
switch route {
case "/api/chat", "/v1/chat/completions":
return "chat"
case "/api/embed", "/v1/embeddings":
return "embed"
default:
// Default action derived from the route itself (e.g., `/api/pull` -> "pull")
parts := strings.Split(route, "/")
if len(parts) > 2 {
return parts[len(parts)-1] // Use the last part of the route as the action
}
return "head"
}
}
// MetricsHandler returns the gin.HandlerFunc that provides the Prometheus metrics format on GET requests
func (s *Server) MetricsHandler(c *gin.Context) {
promhttp.Handler().ServeHTTP(c.Writer, c.Request)
}

View File

@ -30,6 +30,7 @@ import (
"github.com/ollama/ollama/server/internal/client/ollama"
"github.com/ollama/ollama/types/model"
"github.com/ollama/ollama/version"
"github.com/stretchr/testify/assert"
)
func createTestFile(t *testing.T, name string) (string, string) {
@ -493,6 +494,20 @@ func TestRoutes(t *testing.T) {
}
},
},
{
Name: "Metrics Handler",
Method: http.MethodGet,
Path: "/metrics",
Setup: func(t *testing.T, req *http.Request) {
},
Expected: func(t *testing.T, resp *http.Response) {
contentType := resp.Header.Get("Content-Type")
assert.Equal(t, contentType, "text/plain; version=0.0.4; charset=utf-8; escaping=values")
body, err := io.ReadAll(resp.Body)
assert.Nil(t, err)
assert.Contains(t, string(body), "http_requests_total")
},
},
}
modelsDir := t.TempDir()
@ -964,3 +979,27 @@ func TestWaitForStream(t *testing.T) {
})
}
}
func TestRouteToAction(t *testing.T) {
tests := []struct {
name string
route string
expectedAction string
}{
{"Chat completion v1", "/v1/chat/completions", "chat"},
{"Chat API", "/api/chat", "chat"},
{"Embed v1", "/v1/embeddings", "embed"},
{"Embed API", "/api/embed", "embed"},
{"Pull API", "/api/pull", "pull"},
{"Push API", "/api/push", "push"},
{"Root path", "/", "head"},
{"Anyother path", "/api/anyother", "anyother"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
action := routeToAction(tt.route)
assert.Equal(t, tt.expectedAction, action)
})
}
}

146
telemetry/metrics.go Normal file
View File

@ -0,0 +1,146 @@
package telemetry
import (
"context"
"errors"
"time"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.17.0"
)
const (
namespace = "ollama"
)
type Metrics struct {
Start metric.Int64Gauge
Requests metric.Int64Counter
TotalDuration metric.Float64Counter
LoadDuration metric.Float64Counter
PromptEvalCount metric.Int64Counter
PromptEvalDuration metric.Float64Counter
EvalCount metric.Int64Counter
EvalDuration metric.Float64Counter
}
func NewMetrics(meter metric.Meter) *Metrics {
build, _ := meter.Int64Gauge(
"ollama_build_info",
metric.WithDescription("Ollama start date (as Unixtime) and build version."),
metric.WithUnit("seconds"),
)
req, _ := meter.Int64Counter(
"http_requests_total",
metric.WithDescription("The total number of requests on the endpoints."),
metric.WithUnit("requests"),
)
totalDuration, _ := meter.Float64Counter(
"ollama_total_duration_seconds",
metric.WithDescription("The request total duration in seconds."),
metric.WithUnit("seconds"),
)
loadDuration, _ := meter.Float64Counter(
"ollama_load_duration_seconds",
metric.WithDescription("The request load duration in seconds."),
metric.WithUnit("seconds"),
)
promptEvalCount, _ := meter.Int64Counter(
"ollama_prompt_eval_total",
metric.WithDescription("The number of prompt token evaluated."),
metric.WithUnit("tokens"),
)
promptEvalDuration, _ := meter.Float64Counter(
"ollama_prompt_eval_duration_seconds",
metric.WithDescription("The prompt evaluation duration in seconds."),
metric.WithUnit("seconds"),
)
evalCount, _ := meter.Int64Counter(
"ollama_eval_total",
metric.WithDescription("The number of token evaluated."),
metric.WithUnit("tokens"),
)
evalDuration, _ := meter.Float64Counter(
"ollama_eval_duration_seconds",
metric.WithDescription("The prompt evaluation duration in seconds."),
metric.WithUnit("seconds"),
)
return &Metrics{
Start: build,
Requests: req,
TotalDuration: totalDuration,
LoadDuration: loadDuration,
PromptEvalCount: promptEvalCount,
PromptEvalDuration: promptEvalDuration,
EvalCount: evalCount,
EvalDuration: evalDuration,
}
}
func (m *Metrics) RecordRequests(ctx context.Context, action string, statusCode int64, status string) {
m.Requests.Add(ctx, 1, metric.WithAttributes(
attribute.String("action", action),
attribute.Int64("status_code", statusCode),
attribute.String("status", status),
))
}
func NewPrometheusMeterProvider(res *resource.Resource, exp *prometheus.Exporter) (*sdkmetric.MeterProvider, error) {
if exp == nil {
return nil, errors.New("exporter cannot be nil")
}
meterProvider := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(res),
sdkmetric.WithReader(exp),
)
// Start go runtime metric collection.
err := runtime.Start(runtime.WithMeterProvider(meterProvider),
runtime.WithMinimumReadMemStatsInterval(time.Second))
if err != nil {
return nil, err
}
return meterProvider, nil
}
func InitMetrics() (*Metrics, error) {
res, err := resource.New(context.Background(),
resource.WithAttributes(
semconv.ServiceNameKey.String(namespace),
semconv.ServiceVersionKey.String("v0.1.0"),
),
resource.WithProcessRuntimeDescription(),
)
if err != nil {
return nil, err
}
exporter, err := prometheus.New()
if err != nil {
return nil, err
}
mp, err := NewPrometheusMeterProvider(res, exporter)
if err != nil {
return nil, err
}
otel.SetMeterProvider(mp)
meter := mp.Meter(namespace, metric.WithInstrumentationVersion(runtime.Version()))
return NewMetrics(meter), nil
}

79
telemetry/metrics_test.go Normal file
View File

@ -0,0 +1,79 @@
package telemetry
import (
"errors"
"testing"
"go.opentelemetry.io/otel/metric"
"go.opentelemetry.io/otel/metric/noop"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"github.com/stretchr/testify/assert"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/resource"
)
func TestNewMetrics(t *testing.T) {
tests := []struct {
name string
meter metric.Meter
expectedMetric string
}{
{
name: "Valid Meter",
meter: noop.NewMeterProvider().Meter("test"),
expectedMetric: "http_requests_total",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
metrics := NewMetrics(tt.meter)
// Ensure the metric is registered correctly
assert.NotNil(t, metrics)
assert.NotNil(t, metrics.Requests)
})
}
}
func TestNewPrometheusMeterProvider(t *testing.T) {
tests := []struct {
name string
wantErr bool
mockPrometheus func() (*prometheus.Exporter, error)
expectedError error
}{
{
name: "Successful creation of meter provider",
wantErr: false,
mockPrometheus: func() (*prometheus.Exporter, error) {
return &prometheus.Exporter{
Reader: sdkmetric.NewManualReader(),
}, nil
},
},
{
name: "Error on resource creation",
wantErr: true,
expectedError: errors.New("error creating prometheus resource"),
mockPrometheus: func() (*prometheus.Exporter, error) {
return nil, errors.New("error creating prometheus resource")
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
res := resource.NewSchemaless() // Use an empty resource for testing.
exp, _ := tt.mockPrometheus()
mp, err := NewPrometheusMeterProvider(res, exp)
if tt.wantErr {
assert.NotNil(t, err)
} else {
assert.NoError(t, err)
assert.NotNil(t, mp)
}
})
}
}