mirror of https://github.com/ollama/ollama.git
Merge b4289cc3a0
into bd15eba4e4
This commit is contained in:
commit
189b9420ea
|
@ -15,19 +15,19 @@ func main() {
|
|||
}
|
||||
|
||||
messages := []api.Message{
|
||||
api.Message{
|
||||
{
|
||||
Role: "system",
|
||||
Content: "Provide very brief, concise responses",
|
||||
},
|
||||
api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "Name some unusual animals",
|
||||
},
|
||||
api.Message{
|
||||
{
|
||||
Role: "assistant",
|
||||
Content: "Monotreme, platypus, echidna",
|
||||
},
|
||||
api.Message{
|
||||
{
|
||||
Role: "user",
|
||||
Content: "which of these is the most dangerous?",
|
||||
},
|
||||
|
|
16
go.mod
16
go.mod
|
@ -23,6 +23,13 @@ require (
|
|||
github.com/mattn/go-runewidth v0.0.14
|
||||
github.com/nlpodyssey/gopickle v0.3.0
|
||||
github.com/pdevine/tensor v0.0.0-20240510204454-f88f4562727c
|
||||
github.com/prometheus/client_golang v1.19.1
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0
|
||||
go.opentelemetry.io/otel v1.30.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.49.0
|
||||
go.opentelemetry.io/otel/metric v1.30.0
|
||||
go.opentelemetry.io/otel/sdk v1.30.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.30.0
|
||||
golang.org/x/image v0.22.0
|
||||
golang.org/x/tools v0.30.0
|
||||
gonum.org/v1/gonum v0.15.0
|
||||
|
@ -30,19 +37,26 @@ require (
|
|||
|
||||
require (
|
||||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/bytedance/sonic/loader v0.1.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/chewxy/hm v1.0.0 // indirect
|
||||
github.com/chewxy/math32 v1.11.0 // indirect
|
||||
github.com/cloudwego/base64x v0.1.4 // indirect
|
||||
github.com/cloudwego/iasm v0.2.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/go-logr/logr v1.4.2 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/google/flatbuffers v24.3.25+incompatible // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.53.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.0 // indirect
|
||||
github.com/rivo/uniseg v0.2.0 // indirect
|
||||
github.com/xtgo/set v1.0.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.30.0 // indirect
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
gorgonia.org/vecf32 v0.9.0 // indirect
|
||||
|
|
40
go.sum
40
go.sum
|
@ -12,12 +12,16 @@ github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40 h1:q4dksr6IC
|
|||
github.com/apache/arrow/go/arrow v0.0.0-20211112161151-bc219186db40/go.mod h1:Q7yQnSMnLvcXlZ8RV+jwz/6y1rQTqbX6C82SndT52Zs=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
|
||||
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8=
|
||||
github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0=
|
||||
github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4=
|
||||
github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM=
|
||||
github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU=
|
||||
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/chewxy/hm v1.0.0 h1:zy/TSv3LV2nD3dwUEQL2VhXeoXbb9QkpmdRAVUFiA6k=
|
||||
github.com/chewxy/hm v1.0.0/go.mod h1:qg9YI4q6Fkj/whwHR1D+bOGeF7SniIP40VweVepLjg0=
|
||||
github.com/chewxy/math32 v1.0.0/go.mod h1:Miac6hA1ohdDUTagnvJy/q+aNnEk16qWUdb8ZVhvCN0=
|
||||
|
@ -34,7 +38,6 @@ github.com/cncf/xds/go v0.0.0-20210312221358-fbca930ec8ed/go.mod h1:eXthEFrGJvWH
|
|||
github.com/containerd/console v1.0.3 h1:lIr7SlA5PxZyMV30bDW0MGbiOPXwc63yRuCP0ARubLw=
|
||||
github.com/containerd/console v1.0.3/go.mod h1:7LqA/THxQ86k76b8c/EMSiaJ3h1eZkMkXar0TQ1gf3U=
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1 h1:cBzrdJPAFBsgCrDPnZxlp1dF2+k4r1kVpD7+1S1PVjY=
|
||||
github.com/d4l3k/go-bfloat16 v0.0.0-20211005043715-690c3bdd05f1/go.mod h1:uw2gLcxEuYUlAd/EXyjc/v55nd3+47YAgWbSXVxPrNI=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
|
@ -70,6 +73,11 @@ github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2H
|
|||
github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY=
|
||||
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
|
||||
github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
|
||||
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
|
||||
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
|
||||
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
|
||||
|
@ -133,8 +141,8 @@ github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa02
|
|||
github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM=
|
||||
github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
|
||||
github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M=
|
||||
github.com/kr/pretty v0.3.0 h1:WgNl7dwNpEZ6jJ9k1snq4pZsg7DOEN8hP9Xw0Tsjwk0=
|
||||
github.com/kr/pretty v0.3.0/go.mod h1:640gp4NfQd8pI5XOwp5fnNeVWj67G7CFk/SaSQn7NBk=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
|
||||
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
|
||||
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
|
||||
github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ=
|
||||
|
@ -166,12 +174,20 @@ github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
|||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.19.1 h1:wZWJDwK+NameRJuPGDhlnFgx8e8HN3XHQeLaYJFJBOE=
|
||||
github.com/prometheus/client_golang v1.19.1/go.mod h1:mP78NwGzrVks5S2H6ab8+ZZGJLZUq1hoULYBAYBw1Ho=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.53.0 h1:U2pL9w9nmJwJDa4qqLQ3ZaePJ6ZTwt7cMD3AG3+aLCE=
|
||||
github.com/prometheus/common v0.53.0/go.mod h1:BrxBKv3FWBIGXw89Mg1AeBq7FSyRzXWI3l3e7W3RN5U=
|
||||
github.com/prometheus/procfs v0.15.0 h1:A82kmvXJq2jTu5YUhSGNlYoxh85zLnKgPz4bMZgI5Ek=
|
||||
github.com/prometheus/procfs v0.15.0/go.mod h1:Y0RJ/Y5g5wJpkTisOtqwDSo4HwhGmLB4VQSw2sQJLHk=
|
||||
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
|
||||
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
|
||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||
github.com/rogpeppe/go-internal v1.8.0 h1:FCbCCtXNOY3UtUuHUYaghJg4y7Fd14rXifAYUAtL9R8=
|
||||
github.com/rogpeppe/go-internal v1.8.0/go.mod h1:WmiCO8CzOY8rg0OYDC4/i/2WRWAB6poM+XZ2dLUbcbE=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
|
||||
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
|
||||
github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w=
|
||||
github.com/spf13/cobra v1.7.0 h1:hyqWnYt1ZQShIddO5kBpj3vu05/++x6tJ6dg8EC572I=
|
||||
|
@ -204,6 +220,20 @@ github.com/xtgo/set v1.0.0/go.mod h1:d3NHzGzSa0NmB2NhFyECA+QdRp29oEn2xbT+TpeFoM8
|
|||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0 h1:GotCpbh7YkCHdFs+hYMdvAEyGsBZifFognqrOnBwyJM=
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.55.0/go.mod h1:6b0AS55EEPj7qP44khqF5dqTUq+RkakDMShFaW1EcA4=
|
||||
go.opentelemetry.io/otel v1.30.0 h1:F2t8sK4qf1fAmY9ua4ohFS/K+FUuOPemHUIXHtktrts=
|
||||
go.opentelemetry.io/otel v1.30.0/go.mod h1:tFw4Br9b7fOS+uEao81PJjVMjW/5fvNCbpsDIXqP0pc=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.49.0 h1:Er5I1g/YhfYv9Affk9nJLfH/+qCCVVg1f2R9AbJfqDQ=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.49.0/go.mod h1:KfQ1wpjf3zsHjzP149P4LyAwWRupc6c7t1ZJ9eXpKQM=
|
||||
go.opentelemetry.io/otel/metric v1.30.0 h1:4xNulvn9gjzo4hjg+wzIKG7iNFEaBMX00Qd4QIZs7+w=
|
||||
go.opentelemetry.io/otel/metric v1.30.0/go.mod h1:aXTfST94tswhWEb+5QjlSqG+cZlmyXy/u8jFpor3WqQ=
|
||||
go.opentelemetry.io/otel/sdk v1.30.0 h1:cHdik6irO49R5IysVhdn8oaiR9m8XluDaJAs4DfOrYE=
|
||||
go.opentelemetry.io/otel/sdk v1.30.0/go.mod h1:p14X4Ok8S+sygzblytT1nqG98QG2KYKv++HE0LY/mhg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.30.0 h1:QJLT8Pe11jyHBHfSAgYH7kEmT24eX792jZO1bo4BXkM=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.30.0/go.mod h1:waS6P3YqFNzeP01kuo/MBBYqaoBJl7efRQHOaydhy1Y=
|
||||
go.opentelemetry.io/otel/trace v1.30.0 h1:7UBkkYzeg3C7kQX8VAidWh2biiQbtAKjyIML8dQ9wmc=
|
||||
go.opentelemetry.io/otel/trace v1.30.0/go.mod h1:5EyKqTzzmyqB9bwtCCq6pDLktPK6fmGf/Dph+8VI02o=
|
||||
go.opentelemetry.io/proto/otlp v0.7.0/go.mod h1:PqfVotwruBrMGOCsRd/89rSnXhoiJIqeYNgFYFoEGnI=
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6 h1:lGdhQUN/cnWdSH3291CUuxSEqc+AsGTiDxPP3r2J0l4=
|
||||
go4.org/unsafe/assume-no-moving-gc v0.0.0-20231121144256-b99613f794b6/go.mod h1:FftLjUGFEDu5k8lt0ddY+HcrH/qU/0qk+H8j9/nTl3E=
|
||||
|
|
|
@ -26,6 +26,8 @@ import (
|
|||
|
||||
"github.com/gin-contrib/cors"
|
||||
"github.com/gin-gonic/gin"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"golang.org/x/image/webp"
|
||||
"golang.org/x/sync/errgroup"
|
||||
|
||||
|
@ -41,12 +43,14 @@ import (
|
|||
"github.com/ollama/ollama/model/parsers"
|
||||
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||
"github.com/ollama/ollama/server/internal/registry"
|
||||
"github.com/ollama/ollama/telemetry"
|
||||
"github.com/ollama/ollama/template"
|
||||
"github.com/ollama/ollama/thinking"
|
||||
"github.com/ollama/ollama/tools"
|
||||
"github.com/ollama/ollama/types/errtypes"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
const signinURLStr = "https://ollama.com/connect?name=%s&key=%s"
|
||||
|
@ -79,6 +83,7 @@ type Server struct {
|
|||
addr net.Addr
|
||||
sched *Scheduler
|
||||
lowVRAM bool
|
||||
metrics *telemetry.Metrics
|
||||
}
|
||||
|
||||
func init() {
|
||||
|
@ -508,6 +513,17 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
|||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
|
||||
attrs := metric.WithAttributes(
|
||||
attribute.String("model", req.Model),
|
||||
attribute.String("reason", res.DoneReason),
|
||||
)
|
||||
s.metrics.TotalDuration.Add(c.Request.Context(), res.TotalDuration.Seconds(), attrs)
|
||||
s.metrics.LoadDuration.Add(c.Request.Context(), res.LoadDuration.Seconds(), attrs)
|
||||
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(cr.PromptEvalCount), attrs)
|
||||
s.metrics.PromptEvalDuration.Add(c.Request.Context(), cr.PromptEvalDuration.Seconds(), attrs)
|
||||
s.metrics.EvalCount.Add(c.Request.Context(), int64(cr.EvalCount), attrs)
|
||||
s.metrics.EvalDuration.Add(c.Request.Context(), cr.EvalDuration.Seconds(), attrs)
|
||||
|
||||
if !req.Raw {
|
||||
tokens, err := r.Tokenize(c.Request.Context(), prompt+sb.String())
|
||||
if err != nil {
|
||||
|
@ -706,6 +722,15 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
|||
LoadDuration: checkpointLoaded.Sub(checkpointStart),
|
||||
PromptEvalCount: count,
|
||||
}
|
||||
|
||||
attrs := metric.WithAttributes(
|
||||
attribute.String("model", req.Model),
|
||||
)
|
||||
s.metrics.TotalDuration.Add(c.Request.Context(), resp.TotalDuration.Seconds(), attrs)
|
||||
s.metrics.LoadDuration.Add(c.Request.Context(), resp.LoadDuration.Seconds(), attrs)
|
||||
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(resp.PromptEvalCount), attrs)
|
||||
s.metrics.PromptEvalDuration.Add(c.Request.Context(), resp.TotalDuration.Seconds(), attrs)
|
||||
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
|
||||
|
@ -1408,11 +1433,21 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||
}
|
||||
corsConfig.AllowOrigins = envconfig.AllowedOrigins()
|
||||
|
||||
m, err := telemetry.InitMetrics()
|
||||
if err != nil {
|
||||
slog.Warn(fmt.Sprintf("Metrics initialization failed with %s", err))
|
||||
}
|
||||
s.metrics = m
|
||||
s.metrics.Start.Record(nil, time.Now().UnixMicro()/1e6, metric.WithAttributes(
|
||||
attribute.String("version", version.Version),
|
||||
))
|
||||
|
||||
r := gin.Default()
|
||||
r.HandleMethodNotAllowed = true
|
||||
r.Use(
|
||||
cors.New(corsConfig),
|
||||
allowedHostsMiddleware(s.addr),
|
||||
prometheusMetricsMiddleware(s.metrics),
|
||||
)
|
||||
|
||||
// General
|
||||
|
@ -1448,6 +1483,8 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
|
|||
r.POST("/api/embed", s.EmbedHandler)
|
||||
r.POST("/api/embeddings", s.EmbeddingsHandler)
|
||||
|
||||
r.GET("/metrics", s.MetricsHandler)
|
||||
|
||||
// Inference (OpenAI compatibility)
|
||||
r.POST("/v1/chat/completions", middleware.ChatMiddleware(), s.ChatHandler)
|
||||
r.POST("/v1/completions", middleware.CompletionsMiddleware(), s.GenerateHandler)
|
||||
|
@ -1993,6 +2030,17 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||
res.DoneReason = r.DoneReason.String()
|
||||
res.TotalDuration = time.Since(checkpointStart)
|
||||
res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
|
||||
|
||||
attrs := metric.WithAttributes(
|
||||
attribute.String("model", req.Model),
|
||||
attribute.String("reason", res.DoneReason),
|
||||
)
|
||||
s.metrics.TotalDuration.Add(c.Request.Context(), res.TotalDuration.Seconds(), attrs)
|
||||
s.metrics.LoadDuration.Add(c.Request.Context(), res.LoadDuration.Seconds(), attrs)
|
||||
s.metrics.PromptEvalCount.Add(c.Request.Context(), int64(r.PromptEvalCount), attrs)
|
||||
s.metrics.PromptEvalDuration.Add(c.Request.Context(), r.PromptEvalDuration.Seconds(), attrs)
|
||||
s.metrics.EvalCount.Add(c.Request.Context(), int64(r.EvalCount), attrs)
|
||||
s.metrics.EvalDuration.Add(c.Request.Context(), r.EvalDuration.Seconds(), attrs)
|
||||
}
|
||||
|
||||
if builtinParser != nil {
|
||||
|
@ -2136,3 +2184,47 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
|
|||
}
|
||||
return msgs
|
||||
}
|
||||
|
||||
func prometheusMetricsMiddleware(m *telemetry.Metrics) gin.HandlerFunc {
|
||||
return func(c *gin.Context) {
|
||||
// Call the next middleware/handler
|
||||
c.Next()
|
||||
|
||||
responseStatus := c.Writer.Status()
|
||||
statusText := http.StatusText(responseStatus)
|
||||
|
||||
route := c.FullPath()
|
||||
|
||||
m.RecordRequests(c.Request.Context(), "all", int64(responseStatus), statusText)
|
||||
|
||||
// Record the specific route action metric
|
||||
if route != "" {
|
||||
action := routeToAction(route)
|
||||
m.RecordRequests(c.Request.Context(), action, int64(responseStatus), statusText)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// routeToAction converts a route pattern to an action string (e.g., `/api/pull` -> "pull").
|
||||
func routeToAction(route string) string {
|
||||
// Customized mapping goes in the case statements.
|
||||
switch route {
|
||||
case "/api/chat", "/v1/chat/completions":
|
||||
return "chat"
|
||||
case "/api/embed", "/v1/embeddings":
|
||||
return "embed"
|
||||
default:
|
||||
// Default action derived from the route itself (e.g., `/api/pull` -> "pull")
|
||||
parts := strings.Split(route, "/")
|
||||
if len(parts) > 2 {
|
||||
return parts[len(parts)-1] // Use the last part of the route as the action
|
||||
}
|
||||
|
||||
return "head"
|
||||
}
|
||||
}
|
||||
|
||||
// MetricsHandler returns the gin.HandlerFunc that provides the Prometheus metrics format on GET requests
|
||||
func (s *Server) MetricsHandler(c *gin.Context) {
|
||||
promhttp.Handler().ServeHTTP(c.Writer, c.Request)
|
||||
}
|
||||
|
|
|
@ -30,6 +30,7 @@ import (
|
|||
"github.com/ollama/ollama/server/internal/client/ollama"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
"github.com/ollama/ollama/version"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func createTestFile(t *testing.T, name string) (string, string) {
|
||||
|
@ -493,6 +494,20 @@ func TestRoutes(t *testing.T) {
|
|||
}
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "Metrics Handler",
|
||||
Method: http.MethodGet,
|
||||
Path: "/metrics",
|
||||
Setup: func(t *testing.T, req *http.Request) {
|
||||
},
|
||||
Expected: func(t *testing.T, resp *http.Response) {
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
assert.Equal(t, contentType, "text/plain; version=0.0.4; charset=utf-8; escaping=values")
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
assert.Nil(t, err)
|
||||
assert.Contains(t, string(body), "http_requests_total")
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
modelsDir := t.TempDir()
|
||||
|
@ -964,3 +979,27 @@ func TestWaitForStream(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestRouteToAction(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
route string
|
||||
expectedAction string
|
||||
}{
|
||||
{"Chat completion v1", "/v1/chat/completions", "chat"},
|
||||
{"Chat API", "/api/chat", "chat"},
|
||||
{"Embed v1", "/v1/embeddings", "embed"},
|
||||
{"Embed API", "/api/embed", "embed"},
|
||||
{"Pull API", "/api/pull", "pull"},
|
||||
{"Push API", "/api/push", "push"},
|
||||
{"Root path", "/", "head"},
|
||||
{"Anyother path", "/api/anyother", "anyother"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
action := routeToAction(tt.route)
|
||||
assert.Equal(t, tt.expectedAction, action)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.17.0"
|
||||
)
|
||||
|
||||
const (
|
||||
namespace = "ollama"
|
||||
)
|
||||
|
||||
type Metrics struct {
|
||||
Start metric.Int64Gauge
|
||||
Requests metric.Int64Counter
|
||||
TotalDuration metric.Float64Counter
|
||||
LoadDuration metric.Float64Counter
|
||||
PromptEvalCount metric.Int64Counter
|
||||
PromptEvalDuration metric.Float64Counter
|
||||
EvalCount metric.Int64Counter
|
||||
EvalDuration metric.Float64Counter
|
||||
}
|
||||
|
||||
func NewMetrics(meter metric.Meter) *Metrics {
|
||||
build, _ := meter.Int64Gauge(
|
||||
"ollama_build_info",
|
||||
metric.WithDescription("Ollama start date (as Unixtime) and build version."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
req, _ := meter.Int64Counter(
|
||||
"http_requests_total",
|
||||
metric.WithDescription("The total number of requests on the endpoints."),
|
||||
metric.WithUnit("requests"),
|
||||
)
|
||||
|
||||
totalDuration, _ := meter.Float64Counter(
|
||||
"ollama_total_duration_seconds",
|
||||
metric.WithDescription("The request total duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
loadDuration, _ := meter.Float64Counter(
|
||||
"ollama_load_duration_seconds",
|
||||
metric.WithDescription("The request load duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
promptEvalCount, _ := meter.Int64Counter(
|
||||
"ollama_prompt_eval_total",
|
||||
metric.WithDescription("The number of prompt token evaluated."),
|
||||
metric.WithUnit("tokens"),
|
||||
)
|
||||
|
||||
promptEvalDuration, _ := meter.Float64Counter(
|
||||
"ollama_prompt_eval_duration_seconds",
|
||||
metric.WithDescription("The prompt evaluation duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
evalCount, _ := meter.Int64Counter(
|
||||
"ollama_eval_total",
|
||||
metric.WithDescription("The number of token evaluated."),
|
||||
metric.WithUnit("tokens"),
|
||||
)
|
||||
|
||||
evalDuration, _ := meter.Float64Counter(
|
||||
"ollama_eval_duration_seconds",
|
||||
metric.WithDescription("The prompt evaluation duration in seconds."),
|
||||
metric.WithUnit("seconds"),
|
||||
)
|
||||
|
||||
return &Metrics{
|
||||
Start: build,
|
||||
Requests: req,
|
||||
TotalDuration: totalDuration,
|
||||
LoadDuration: loadDuration,
|
||||
PromptEvalCount: promptEvalCount,
|
||||
PromptEvalDuration: promptEvalDuration,
|
||||
EvalCount: evalCount,
|
||||
EvalDuration: evalDuration,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Metrics) RecordRequests(ctx context.Context, action string, statusCode int64, status string) {
|
||||
m.Requests.Add(ctx, 1, metric.WithAttributes(
|
||||
attribute.String("action", action),
|
||||
attribute.Int64("status_code", statusCode),
|
||||
attribute.String("status", status),
|
||||
))
|
||||
}
|
||||
|
||||
func NewPrometheusMeterProvider(res *resource.Resource, exp *prometheus.Exporter) (*sdkmetric.MeterProvider, error) {
|
||||
if exp == nil {
|
||||
return nil, errors.New("exporter cannot be nil")
|
||||
}
|
||||
meterProvider := sdkmetric.NewMeterProvider(
|
||||
sdkmetric.WithResource(res),
|
||||
sdkmetric.WithReader(exp),
|
||||
)
|
||||
|
||||
// Start go runtime metric collection.
|
||||
err := runtime.Start(runtime.WithMeterProvider(meterProvider),
|
||||
runtime.WithMinimumReadMemStatsInterval(time.Second))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return meterProvider, nil
|
||||
}
|
||||
|
||||
func InitMetrics() (*Metrics, error) {
|
||||
res, err := resource.New(context.Background(),
|
||||
resource.WithAttributes(
|
||||
semconv.ServiceNameKey.String(namespace),
|
||||
semconv.ServiceVersionKey.String("v0.1.0"),
|
||||
),
|
||||
resource.WithProcessRuntimeDescription(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
exporter, err := prometheus.New()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
mp, err := NewPrometheusMeterProvider(res, exporter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
otel.SetMeterProvider(mp)
|
||||
|
||||
meter := mp.Meter(namespace, metric.WithInstrumentationVersion(runtime.Version()))
|
||||
return NewMetrics(meter), nil
|
||||
}
|
|
@ -0,0 +1,79 @@
|
|||
package telemetry
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"testing"
|
||||
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"go.opentelemetry.io/otel/metric/noop"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
)
|
||||
|
||||
func TestNewMetrics(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
meter metric.Meter
|
||||
expectedMetric string
|
||||
}{
|
||||
{
|
||||
name: "Valid Meter",
|
||||
meter: noop.NewMeterProvider().Meter("test"),
|
||||
expectedMetric: "http_requests_total",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
metrics := NewMetrics(tt.meter)
|
||||
|
||||
// Ensure the metric is registered correctly
|
||||
assert.NotNil(t, metrics)
|
||||
assert.NotNil(t, metrics.Requests)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewPrometheusMeterProvider(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
wantErr bool
|
||||
mockPrometheus func() (*prometheus.Exporter, error)
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
name: "Successful creation of meter provider",
|
||||
wantErr: false,
|
||||
mockPrometheus: func() (*prometheus.Exporter, error) {
|
||||
return &prometheus.Exporter{
|
||||
Reader: sdkmetric.NewManualReader(),
|
||||
}, nil
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Error on resource creation",
|
||||
wantErr: true,
|
||||
expectedError: errors.New("error creating prometheus resource"),
|
||||
mockPrometheus: func() (*prometheus.Exporter, error) {
|
||||
return nil, errors.New("error creating prometheus resource")
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
res := resource.NewSchemaless() // Use an empty resource for testing.
|
||||
exp, _ := tt.mockPrometheus()
|
||||
mp, err := NewPrometheusMeterProvider(res, exp)
|
||||
|
||||
if tt.wantErr {
|
||||
assert.NotNil(t, err)
|
||||
} else {
|
||||
assert.NoError(t, err)
|
||||
assert.NotNil(t, mp)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue