2025-10-06 05:18:56 +08:00
|
|
|
// openai package provides core transformation logic for partial compatibility with the OpenAI REST API
|
2024-02-08 06:24:29 +08:00
|
|
|
package openai
|
|
|
|
|
|
|
|
import (
|
2024-07-14 13:07:45 +08:00
|
|
|
"encoding/base64"
|
2024-02-08 06:24:29 +08:00
|
|
|
"encoding/json"
|
2024-08-02 05:52:15 +08:00
|
|
|
"errors"
|
2024-02-08 06:24:29 +08:00
|
|
|
"fmt"
|
2024-07-17 11:52:59 +08:00
|
|
|
"log/slog"
|
2024-02-08 06:24:29 +08:00
|
|
|
"math/rand"
|
|
|
|
"net/http"
|
2024-07-14 13:07:45 +08:00
|
|
|
"strings"
|
2024-02-08 06:24:29 +08:00
|
|
|
"time"
|
|
|
|
|
2024-03-27 04:04:17 +08:00
|
|
|
"github.com/ollama/ollama/api"
|
2024-07-03 02:50:56 +08:00
|
|
|
"github.com/ollama/ollama/types/model"
|
2024-02-08 06:24:29 +08:00
|
|
|
)
|
|
|
|
|
2025-02-14 02:20:12 +08:00
|
|
|
var finishReasonToolCalls = "tool_calls"
|
|
|
|
|
2024-02-08 06:24:29 +08:00
|
|
|
type Error struct {
|
2025-04-03 00:44:27 +08:00
|
|
|
Message string `json:"message"`
|
|
|
|
Type string `json:"type"`
|
|
|
|
Param any `json:"param"`
|
|
|
|
Code *string `json:"code"`
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type ErrorResponse struct {
|
|
|
|
Error Error `json:"error"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type Message struct {
|
2025-08-07 08:00:24 +08:00
|
|
|
Role string `json:"role"`
|
|
|
|
Content any `json:"content"`
|
|
|
|
Reasoning string `json:"reasoning,omitempty"`
|
|
|
|
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
|
|
|
|
Name string `json:"name,omitempty"`
|
|
|
|
ToolCallID string `json:"tool_call_id,omitempty"`
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type Choice struct {
|
|
|
|
Index int `json:"index"`
|
|
|
|
Message Message `json:"message"`
|
|
|
|
FinishReason *string `json:"finish_reason"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type ChunkChoice struct {
|
|
|
|
Index int `json:"index"`
|
|
|
|
Delta Message `json:"delta"`
|
|
|
|
FinishReason *string `json:"finish_reason"`
|
|
|
|
}
|
|
|
|
|
2024-07-03 07:01:45 +08:00
|
|
|
type CompleteChunkChoice struct {
|
|
|
|
Text string `json:"text"`
|
|
|
|
Index int `json:"index"`
|
|
|
|
FinishReason *string `json:"finish_reason"`
|
|
|
|
}
|
|
|
|
|
2024-02-08 06:24:29 +08:00
|
|
|
type Usage struct {
|
|
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
|
|
CompletionTokens int `json:"completion_tokens"`
|
|
|
|
TotalTokens int `json:"total_tokens"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type ResponseFormat struct {
|
2024-12-05 08:31:19 +08:00
|
|
|
Type string `json:"type"`
|
|
|
|
JsonSchema *JsonSchema `json:"json_schema,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type JsonSchema struct {
|
2024-12-12 06:07:30 +08:00
|
|
|
Schema json.RawMessage `json:"schema"`
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
2024-07-17 04:36:08 +08:00
|
|
|
type EmbedRequest struct {
|
2025-09-12 01:36:10 +08:00
|
|
|
Input any `json:"input"`
|
|
|
|
Model string `json:"model"`
|
|
|
|
Dimensions int `json:"dimensions,omitempty"`
|
2024-07-17 04:36:08 +08:00
|
|
|
}
|
|
|
|
|
2024-12-13 09:09:30 +08:00
|
|
|
type StreamOptions struct {
|
|
|
|
IncludeUsage bool `json:"include_usage"`
|
|
|
|
}
|
|
|
|
|
2025-08-06 03:21:16 +08:00
|
|
|
type Reasoning struct {
|
|
|
|
Effort *string `json:"effort,omitempty"`
|
|
|
|
}
|
|
|
|
|
2024-02-08 06:24:29 +08:00
|
|
|
type ChatCompletionRequest struct {
|
|
|
|
Model string `json:"model"`
|
|
|
|
Messages []Message `json:"messages"`
|
|
|
|
Stream bool `json:"stream"`
|
2024-12-13 09:09:30 +08:00
|
|
|
StreamOptions *StreamOptions `json:"stream_options"`
|
2024-02-08 06:24:29 +08:00
|
|
|
MaxTokens *int `json:"max_tokens"`
|
|
|
|
Seed *int `json:"seed"`
|
|
|
|
Stop any `json:"stop"`
|
|
|
|
Temperature *float64 `json:"temperature"`
|
|
|
|
FrequencyPenalty *float64 `json:"frequency_penalty"`
|
2024-09-06 16:16:28 +08:00
|
|
|
PresencePenalty *float64 `json:"presence_penalty"`
|
2024-02-08 06:24:29 +08:00
|
|
|
TopP *float64 `json:"top_p"`
|
|
|
|
ResponseFormat *ResponseFormat `json:"response_format"`
|
2024-07-17 11:52:59 +08:00
|
|
|
Tools []api.Tool `json:"tools"`
|
2025-08-06 03:21:16 +08:00
|
|
|
Reasoning *Reasoning `json:"reasoning,omitempty"`
|
2025-08-13 02:02:01 +08:00
|
|
|
ReasoningEffort *string `json:"reasoning_effort,omitempty"`
|
add qwen3-coder tool support
The format qwen3-coder uses is relatively unique, both in rendering and
in parsing. To implement parsing, I wrote a custom parser in similar
style to harmony. For the rendering, I found that the logic would be
much more difficult to follow in a template, so I introduced the concept
of a built-in renderer that uses go code, rather than a template to
generate prompts.
I set us up for future built-in parsers and renderers by making it so
they can be specified in a Modelfile like so:
```
RENDERER "qwen3-coder"
PARSER "qwen3-coder"
```
These need to be provided explicitly because the architecture alone is
not enough to understand what format the model expects to receive, and
what format we expect it to output (e.g., qwen3-coder is `qwen3moe`,
which includes other qwen3-family models as well)
I haven't converted harmony to be one of these "built-ins" yet, since
some of it is in flux with the changes @ParthSareen has been making to
move harmony to the runner. It is likely that many other built-ins will
need to move to the runner as well, but I'm able to slightly defer that
decision since qwen3-coder doesn't have thinking (and therefore doesn't
need to be in the runner to make structured outputs work). I expect to
unify harmony with this approach very soon.
Whether a particular model supports tools or thinking was previously
inferred from templates, but without a template we now also use the
parser itself to declare what it supports. If we have future models that
re-use the same parsing format, but have different capabilities, we'll
want to parameterize them and give them different names to be specified
as a `PARSER`.
Misc changes:
- I worked on the renderer by diffing outputs from the reference
implementation and ours. To make it easier to do this, I extended
<https://github.com/ollama/ollama/pull/11875> to also support
returning the prompt via the openai compat layer
2025-09-12 04:40:35 +08:00
|
|
|
DebugRenderOnly bool `json:"_debug_render_only"`
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type ChatCompletion struct {
|
add qwen3-coder tool support
The format qwen3-coder uses is relatively unique, both in rendering and
in parsing. To implement parsing, I wrote a custom parser in similar
style to harmony. For the rendering, I found that the logic would be
much more difficult to follow in a template, so I introduced the concept
of a built-in renderer that uses go code, rather than a template to
generate prompts.
I set us up for future built-in parsers and renderers by making it so
they can be specified in a Modelfile like so:
```
RENDERER "qwen3-coder"
PARSER "qwen3-coder"
```
These need to be provided explicitly because the architecture alone is
not enough to understand what format the model expects to receive, and
what format we expect it to output (e.g., qwen3-coder is `qwen3moe`,
which includes other qwen3-family models as well)
I haven't converted harmony to be one of these "built-ins" yet, since
some of it is in flux with the changes @ParthSareen has been making to
move harmony to the runner. It is likely that many other built-ins will
need to move to the runner as well, but I'm able to slightly defer that
decision since qwen3-coder doesn't have thinking (and therefore doesn't
need to be in the runner to make structured outputs work). I expect to
unify harmony with this approach very soon.
Whether a particular model supports tools or thinking was previously
inferred from templates, but without a template we now also use the
parser itself to declare what it supports. If we have future models that
re-use the same parsing format, but have different capabilities, we'll
want to parameterize them and give them different names to be specified
as a `PARSER`.
Misc changes:
- I worked on the renderer by diffing outputs from the reference
implementation and ours. To make it easier to do this, I extended
<https://github.com/ollama/ollama/pull/11875> to also support
returning the prompt via the openai compat layer
2025-09-12 04:40:35 +08:00
|
|
|
Id string `json:"id"`
|
|
|
|
Object string `json:"object"`
|
|
|
|
Created int64 `json:"created"`
|
|
|
|
Model string `json:"model"`
|
|
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
|
|
Choices []Choice `json:"choices"`
|
|
|
|
Usage Usage `json:"usage,omitempty"`
|
|
|
|
DebugInfo *api.DebugInfo `json:"_debug_info,omitempty"`
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type ChatCompletionChunk struct {
|
|
|
|
Id string `json:"id"`
|
|
|
|
Object string `json:"object"`
|
|
|
|
Created int64 `json:"created"`
|
|
|
|
Model string `json:"model"`
|
|
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
|
|
Choices []ChunkChoice `json:"choices"`
|
2024-12-13 09:09:30 +08:00
|
|
|
Usage *Usage `json:"usage,omitempty"`
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
2024-07-03 07:01:45 +08:00
|
|
|
// TODO (https://github.com/ollama/ollama/issues/5259): support []string, []int and [][]int
|
|
|
|
type CompletionRequest struct {
|
2024-12-13 09:09:30 +08:00
|
|
|
Model string `json:"model"`
|
|
|
|
Prompt string `json:"prompt"`
|
|
|
|
FrequencyPenalty float32 `json:"frequency_penalty"`
|
|
|
|
MaxTokens *int `json:"max_tokens"`
|
|
|
|
PresencePenalty float32 `json:"presence_penalty"`
|
|
|
|
Seed *int `json:"seed"`
|
|
|
|
Stop any `json:"stop"`
|
|
|
|
Stream bool `json:"stream"`
|
|
|
|
StreamOptions *StreamOptions `json:"stream_options"`
|
|
|
|
Temperature *float32 `json:"temperature"`
|
|
|
|
TopP float32 `json:"top_p"`
|
|
|
|
Suffix string `json:"suffix"`
|
add qwen3-coder tool support
The format qwen3-coder uses is relatively unique, both in rendering and
in parsing. To implement parsing, I wrote a custom parser in similar
style to harmony. For the rendering, I found that the logic would be
much more difficult to follow in a template, so I introduced the concept
of a built-in renderer that uses go code, rather than a template to
generate prompts.
I set us up for future built-in parsers and renderers by making it so
they can be specified in a Modelfile like so:
```
RENDERER "qwen3-coder"
PARSER "qwen3-coder"
```
These need to be provided explicitly because the architecture alone is
not enough to understand what format the model expects to receive, and
what format we expect it to output (e.g., qwen3-coder is `qwen3moe`,
which includes other qwen3-family models as well)
I haven't converted harmony to be one of these "built-ins" yet, since
some of it is in flux with the changes @ParthSareen has been making to
move harmony to the runner. It is likely that many other built-ins will
need to move to the runner as well, but I'm able to slightly defer that
decision since qwen3-coder doesn't have thinking (and therefore doesn't
need to be in the runner to make structured outputs work). I expect to
unify harmony with this approach very soon.
Whether a particular model supports tools or thinking was previously
inferred from templates, but without a template we now also use the
parser itself to declare what it supports. If we have future models that
re-use the same parsing format, but have different capabilities, we'll
want to parameterize them and give them different names to be specified
as a `PARSER`.
Misc changes:
- I worked on the renderer by diffing outputs from the reference
implementation and ours. To make it easier to do this, I extended
<https://github.com/ollama/ollama/pull/11875> to also support
returning the prompt via the openai compat layer
2025-09-12 04:40:35 +08:00
|
|
|
DebugRenderOnly bool `json:"_debug_render_only"`
|
2024-07-03 07:01:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
type Completion struct {
|
|
|
|
Id string `json:"id"`
|
|
|
|
Object string `json:"object"`
|
|
|
|
Created int64 `json:"created"`
|
|
|
|
Model string `json:"model"`
|
|
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
|
|
|
Choices []CompleteChunkChoice `json:"choices"`
|
|
|
|
Usage Usage `json:"usage,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type CompletionChunk struct {
|
|
|
|
Id string `json:"id"`
|
|
|
|
Object string `json:"object"`
|
|
|
|
Created int64 `json:"created"`
|
|
|
|
Choices []CompleteChunkChoice `json:"choices"`
|
|
|
|
Model string `json:"model"`
|
|
|
|
SystemFingerprint string `json:"system_fingerprint"`
|
2024-12-13 09:09:30 +08:00
|
|
|
Usage *Usage `json:"usage,omitempty"`
|
2024-07-03 07:01:45 +08:00
|
|
|
}
|
|
|
|
|
2024-07-17 11:52:59 +08:00
|
|
|
type ToolCall struct {
|
|
|
|
ID string `json:"id"`
|
2024-11-30 12:00:09 +08:00
|
|
|
Index int `json:"index"`
|
2024-07-17 11:52:59 +08:00
|
|
|
Type string `json:"type"`
|
|
|
|
Function struct {
|
|
|
|
Name string `json:"name"`
|
|
|
|
Arguments string `json:"arguments"`
|
|
|
|
} `json:"function"`
|
|
|
|
}
|
|
|
|
|
2024-07-03 02:50:56 +08:00
|
|
|
type Model struct {
|
|
|
|
Id string `json:"id"`
|
|
|
|
Object string `json:"object"`
|
|
|
|
Created int64 `json:"created"`
|
|
|
|
OwnedBy string `json:"owned_by"`
|
|
|
|
}
|
|
|
|
|
2024-07-17 04:36:08 +08:00
|
|
|
type Embedding struct {
|
|
|
|
Object string `json:"object"`
|
|
|
|
Embedding []float32 `json:"embedding"`
|
|
|
|
Index int `json:"index"`
|
|
|
|
}
|
|
|
|
|
2024-07-03 02:50:56 +08:00
|
|
|
type ListCompletion struct {
|
|
|
|
Object string `json:"object"`
|
|
|
|
Data []Model `json:"data"`
|
|
|
|
}
|
|
|
|
|
2024-07-17 04:36:08 +08:00
|
|
|
type EmbeddingList struct {
|
2024-08-02 06:49:37 +08:00
|
|
|
Object string `json:"object"`
|
|
|
|
Data []Embedding `json:"data"`
|
|
|
|
Model string `json:"model"`
|
|
|
|
Usage EmbeddingUsage `json:"usage,omitempty"`
|
|
|
|
}
|
|
|
|
|
|
|
|
type EmbeddingUsage struct {
|
|
|
|
PromptTokens int `json:"prompt_tokens"`
|
|
|
|
TotalTokens int `json:"total_tokens"`
|
2024-07-17 04:36:08 +08:00
|
|
|
}
|
|
|
|
|
2024-02-08 06:24:29 +08:00
|
|
|
func NewError(code int, message string) ErrorResponse {
|
|
|
|
var etype string
|
|
|
|
switch code {
|
|
|
|
case http.StatusBadRequest:
|
|
|
|
etype = "invalid_request_error"
|
|
|
|
case http.StatusNotFound:
|
|
|
|
etype = "not_found_error"
|
|
|
|
default:
|
|
|
|
etype = "api_error"
|
|
|
|
}
|
|
|
|
|
|
|
|
return ErrorResponse{Error{Type: etype, Message: message}}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToUsage converts an api.ChatResponse to Usage
|
|
|
|
func ToUsage(r api.ChatResponse) Usage {
|
2024-12-13 09:09:30 +08:00
|
|
|
return Usage{
|
2025-10-06 05:18:56 +08:00
|
|
|
PromptTokens: r.Metrics.PromptEvalCount,
|
|
|
|
CompletionTokens: r.Metrics.EvalCount,
|
|
|
|
TotalTokens: r.Metrics.PromptEvalCount + r.Metrics.EvalCount,
|
2024-12-13 09:09:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-07-17 11:52:59 +08:00
|
|
|
func toolCallId() string {
|
|
|
|
const letterBytes = "abcdefghijklmnopqrstuvwxyz0123456789"
|
|
|
|
b := make([]byte, 8)
|
|
|
|
for i := range b {
|
|
|
|
b[i] = letterBytes[rand.Intn(len(letterBytes))]
|
|
|
|
}
|
|
|
|
return "call_" + strings.ToLower(string(b))
|
|
|
|
}
|
|
|
|
|
2024-11-28 05:40:57 +08:00
|
|
|
func toToolCalls(tc []api.ToolCall) []ToolCall {
|
|
|
|
toolCalls := make([]ToolCall, len(tc))
|
|
|
|
for i, tc := range tc {
|
2024-07-17 11:52:59 +08:00
|
|
|
toolCalls[i].ID = toolCallId()
|
|
|
|
toolCalls[i].Type = "function"
|
|
|
|
toolCalls[i].Function.Name = tc.Function.Name
|
2024-11-30 12:00:09 +08:00
|
|
|
toolCalls[i].Index = tc.Function.Index
|
2024-07-17 11:52:59 +08:00
|
|
|
|
|
|
|
args, err := json.Marshal(tc.Function.Arguments)
|
|
|
|
if err != nil {
|
|
|
|
slog.Error("could not marshall function arguments to json", "error", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
toolCalls[i].Function.Arguments = string(args)
|
|
|
|
}
|
2024-11-28 05:40:57 +08:00
|
|
|
return toolCalls
|
|
|
|
}
|
2024-07-17 11:52:59 +08:00
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToChatCompletion converts an api.ChatResponse to ChatCompletion
|
|
|
|
func ToChatCompletion(id string, r api.ChatResponse) ChatCompletion {
|
2024-11-28 05:40:57 +08:00
|
|
|
toolCalls := toToolCalls(r.Message.ToolCalls)
|
2024-02-08 06:24:29 +08:00
|
|
|
return ChatCompletion{
|
|
|
|
Id: id,
|
|
|
|
Object: "chat.completion",
|
|
|
|
Created: r.CreatedAt.Unix(),
|
|
|
|
Model: r.Model,
|
|
|
|
SystemFingerprint: "fp_ollama",
|
|
|
|
Choices: []Choice{{
|
2024-05-12 06:31:41 +08:00
|
|
|
Index: 0,
|
2025-08-06 03:21:16 +08:00
|
|
|
Message: Message{Role: r.Message.Role, Content: r.Message.Content, ToolCalls: toolCalls, Reasoning: r.Message.Thinking},
|
2024-05-12 06:31:41 +08:00
|
|
|
FinishReason: func(reason string) *string {
|
2024-07-30 04:56:57 +08:00
|
|
|
if len(toolCalls) > 0 {
|
|
|
|
reason = "tool_calls"
|
|
|
|
}
|
2024-05-12 06:31:41 +08:00
|
|
|
if len(reason) > 0 {
|
|
|
|
return &reason
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}(r.DoneReason),
|
2025-10-06 05:18:56 +08:00
|
|
|
}}, Usage: ToUsage(r),
|
add qwen3-coder tool support
The format qwen3-coder uses is relatively unique, both in rendering and
in parsing. To implement parsing, I wrote a custom parser in similar
style to harmony. For the rendering, I found that the logic would be
much more difficult to follow in a template, so I introduced the concept
of a built-in renderer that uses go code, rather than a template to
generate prompts.
I set us up for future built-in parsers and renderers by making it so
they can be specified in a Modelfile like so:
```
RENDERER "qwen3-coder"
PARSER "qwen3-coder"
```
These need to be provided explicitly because the architecture alone is
not enough to understand what format the model expects to receive, and
what format we expect it to output (e.g., qwen3-coder is `qwen3moe`,
which includes other qwen3-family models as well)
I haven't converted harmony to be one of these "built-ins" yet, since
some of it is in flux with the changes @ParthSareen has been making to
move harmony to the runner. It is likely that many other built-ins will
need to move to the runner as well, but I'm able to slightly defer that
decision since qwen3-coder doesn't have thinking (and therefore doesn't
need to be in the runner to make structured outputs work). I expect to
unify harmony with this approach very soon.
Whether a particular model supports tools or thinking was previously
inferred from templates, but without a template we now also use the
parser itself to declare what it supports. If we have future models that
re-use the same parsing format, but have different capabilities, we'll
want to parameterize them and give them different names to be specified
as a `PARSER`.
Misc changes:
- I worked on the renderer by diffing outputs from the reference
implementation and ours. To make it easier to do this, I extended
<https://github.com/ollama/ollama/pull/11875> to also support
returning the prompt via the openai compat layer
2025-09-12 04:40:35 +08:00
|
|
|
DebugInfo: r.DebugInfo,
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToChunk converts an api.ChatResponse to ChatCompletionChunk
|
|
|
|
func ToChunk(id string, r api.ChatResponse, toolCallSent bool) ChatCompletionChunk {
|
2024-11-28 05:40:57 +08:00
|
|
|
toolCalls := toToolCalls(r.Message.ToolCalls)
|
2024-02-08 06:24:29 +08:00
|
|
|
return ChatCompletionChunk{
|
|
|
|
Id: id,
|
|
|
|
Object: "chat.completion.chunk",
|
|
|
|
Created: time.Now().Unix(),
|
|
|
|
Model: r.Model,
|
|
|
|
SystemFingerprint: "fp_ollama",
|
2024-05-12 06:31:41 +08:00
|
|
|
Choices: []ChunkChoice{{
|
|
|
|
Index: 0,
|
2025-08-06 03:21:16 +08:00
|
|
|
Delta: Message{Role: "assistant", Content: r.Message.Content, ToolCalls: toolCalls, Reasoning: r.Message.Thinking},
|
2024-05-12 06:31:41 +08:00
|
|
|
FinishReason: func(reason string) *string {
|
|
|
|
if len(reason) > 0 {
|
2025-08-06 03:21:16 +08:00
|
|
|
if toolCallSent || len(toolCalls) > 0 {
|
2025-02-14 02:20:12 +08:00
|
|
|
return &finishReasonToolCalls
|
|
|
|
}
|
2024-05-12 06:31:41 +08:00
|
|
|
return &reason
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}(r.DoneReason),
|
|
|
|
}},
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToUsageGenerate converts an api.GenerateResponse to Usage
|
|
|
|
func ToUsageGenerate(r api.GenerateResponse) Usage {
|
2024-12-13 09:09:30 +08:00
|
|
|
return Usage{
|
2025-10-06 05:18:56 +08:00
|
|
|
PromptTokens: r.Metrics.PromptEvalCount,
|
|
|
|
CompletionTokens: r.Metrics.EvalCount,
|
|
|
|
TotalTokens: r.Metrics.PromptEvalCount + r.Metrics.EvalCount,
|
2024-12-13 09:09:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToCompletion converts an api.GenerateResponse to Completion
|
|
|
|
func ToCompletion(id string, r api.GenerateResponse) Completion {
|
2024-07-03 07:01:45 +08:00
|
|
|
return Completion{
|
|
|
|
Id: id,
|
|
|
|
Object: "text_completion",
|
|
|
|
Created: r.CreatedAt.Unix(),
|
|
|
|
Model: r.Model,
|
|
|
|
SystemFingerprint: "fp_ollama",
|
|
|
|
Choices: []CompleteChunkChoice{{
|
|
|
|
Text: r.Response,
|
|
|
|
Index: 0,
|
|
|
|
FinishReason: func(reason string) *string {
|
|
|
|
if len(reason) > 0 {
|
|
|
|
return &reason
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}(r.DoneReason),
|
|
|
|
}},
|
2025-10-06 05:18:56 +08:00
|
|
|
Usage: ToUsageGenerate(r),
|
2024-07-03 07:01:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToCompleteChunk converts an api.GenerateResponse to CompletionChunk
|
|
|
|
func ToCompleteChunk(id string, r api.GenerateResponse) CompletionChunk {
|
2024-07-03 07:01:45 +08:00
|
|
|
return CompletionChunk{
|
|
|
|
Id: id,
|
|
|
|
Object: "text_completion",
|
|
|
|
Created: time.Now().Unix(),
|
|
|
|
Model: r.Model,
|
|
|
|
SystemFingerprint: "fp_ollama",
|
|
|
|
Choices: []CompleteChunkChoice{{
|
|
|
|
Text: r.Response,
|
|
|
|
Index: 0,
|
|
|
|
FinishReason: func(reason string) *string {
|
|
|
|
if len(reason) > 0 {
|
|
|
|
return &reason
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}(r.DoneReason),
|
|
|
|
}},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToListCompletion converts an api.ListResponse to ListCompletion
|
|
|
|
func ToListCompletion(r api.ListResponse) ListCompletion {
|
2024-07-03 02:50:56 +08:00
|
|
|
var data []Model
|
|
|
|
for _, m := range r.Models {
|
|
|
|
data = append(data, Model{
|
|
|
|
Id: m.Name,
|
|
|
|
Object: "model",
|
|
|
|
Created: m.ModifiedAt.Unix(),
|
|
|
|
OwnedBy: model.ParseName(m.Name).Namespace,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return ListCompletion{
|
|
|
|
Object: "list",
|
|
|
|
Data: data,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToEmbeddingList converts an api.EmbedResponse to EmbeddingList
|
|
|
|
func ToEmbeddingList(model string, r api.EmbedResponse) EmbeddingList {
|
2024-07-17 04:36:08 +08:00
|
|
|
if r.Embeddings != nil {
|
|
|
|
var data []Embedding
|
|
|
|
for i, e := range r.Embeddings {
|
|
|
|
data = append(data, Embedding{
|
|
|
|
Object: "embedding",
|
|
|
|
Embedding: e,
|
|
|
|
Index: i,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
return EmbeddingList{
|
|
|
|
Object: "list",
|
|
|
|
Data: data,
|
|
|
|
Model: model,
|
2024-08-02 06:49:37 +08:00
|
|
|
Usage: EmbeddingUsage{
|
|
|
|
PromptTokens: r.PromptEvalCount,
|
|
|
|
TotalTokens: r.PromptEvalCount,
|
|
|
|
},
|
2024-07-17 04:36:08 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return EmbeddingList{}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// ToModel converts an api.ShowResponse to Model
|
|
|
|
func ToModel(r api.ShowResponse, m string) Model {
|
2024-07-03 02:50:56 +08:00
|
|
|
return Model{
|
|
|
|
Id: m,
|
|
|
|
Object: "model",
|
|
|
|
Created: r.ModifiedAt.Unix(),
|
|
|
|
OwnedBy: model.ParseName(m).Namespace,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// FromChatRequest converts a ChatCompletionRequest to api.ChatRequest
|
|
|
|
func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
|
2024-02-08 06:24:29 +08:00
|
|
|
var messages []api.Message
|
|
|
|
for _, msg := range r.Messages {
|
2025-08-07 08:00:24 +08:00
|
|
|
toolName := ""
|
|
|
|
if strings.ToLower(msg.Role) == "tool" {
|
|
|
|
toolName = msg.Name
|
|
|
|
if toolName == "" && msg.ToolCallID != "" {
|
|
|
|
toolName = nameFromToolCallID(r.Messages, msg.ToolCallID)
|
|
|
|
}
|
|
|
|
}
|
2024-07-14 13:07:45 +08:00
|
|
|
switch content := msg.Content.(type) {
|
|
|
|
case string:
|
2025-08-07 06:50:30 +08:00
|
|
|
toolCalls, err := fromCompletionToolCall(msg.ToolCalls)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2025-08-07 08:00:24 +08:00
|
|
|
messages = append(messages, api.Message{Role: msg.Role, Content: content, Thinking: msg.Reasoning, ToolCalls: toolCalls, ToolName: toolName})
|
2024-07-14 13:07:45 +08:00
|
|
|
case []any:
|
|
|
|
for _, c := range content {
|
|
|
|
data, ok := c.(map[string]any)
|
|
|
|
if !ok {
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid message format")
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
|
|
|
switch data["type"] {
|
|
|
|
case "text":
|
|
|
|
text, ok := data["text"].(string)
|
|
|
|
if !ok {
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid message format")
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
2024-07-20 02:19:20 +08:00
|
|
|
messages = append(messages, api.Message{Role: msg.Role, Content: text})
|
2024-07-14 13:07:45 +08:00
|
|
|
case "image_url":
|
|
|
|
var url string
|
|
|
|
if urlMap, ok := data["image_url"].(map[string]any); ok {
|
|
|
|
if url, ok = urlMap["url"].(string); !ok {
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid message format")
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if url, ok = data["image_url"].(string); !ok {
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid message format")
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-07-17 12:31:49 +08:00
|
|
|
types := []string{"jpeg", "jpg", "png", "webp"}
|
2024-07-14 13:07:45 +08:00
|
|
|
valid := false
|
|
|
|
for _, t := range types {
|
|
|
|
prefix := "data:image/" + t + ";base64,"
|
|
|
|
if strings.HasPrefix(url, prefix) {
|
|
|
|
url = strings.TrimPrefix(url, prefix)
|
|
|
|
valid = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if !valid {
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid image input")
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
img, err := base64.StdEncoding.DecodeString(url)
|
|
|
|
if err != nil {
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid message format")
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
2024-07-20 02:19:20 +08:00
|
|
|
|
|
|
|
messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
|
2024-07-14 13:07:45 +08:00
|
|
|
default:
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid message format")
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
|
|
|
}
|
2025-08-07 06:50:30 +08:00
|
|
|
// since we might have added multiple messages above, if we have tools
|
|
|
|
// calls we'll add them to the last message
|
|
|
|
if len(messages) > 0 && len(msg.ToolCalls) > 0 {
|
|
|
|
toolCalls, err := fromCompletionToolCall(msg.ToolCalls)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
messages[len(messages)-1].ToolCalls = toolCalls
|
2025-08-07 08:00:24 +08:00
|
|
|
if toolName != "" {
|
|
|
|
messages[len(messages)-1].ToolName = toolName
|
|
|
|
}
|
2025-08-07 09:54:20 +08:00
|
|
|
messages[len(messages)-1].Thinking = msg.Reasoning
|
2025-08-07 06:50:30 +08:00
|
|
|
}
|
2024-07-14 13:07:45 +08:00
|
|
|
default:
|
2025-08-07 06:50:30 +08:00
|
|
|
// content is only optional if tool calls are present
|
2024-07-17 11:52:59 +08:00
|
|
|
if msg.ToolCalls == nil {
|
|
|
|
return nil, fmt.Errorf("invalid message content type: %T", content)
|
|
|
|
}
|
|
|
|
|
|
|
|
toolCalls := make([]api.ToolCall, len(msg.ToolCalls))
|
|
|
|
for i, tc := range msg.ToolCalls {
|
|
|
|
toolCalls[i].Function.Name = tc.Function.Name
|
|
|
|
err := json.Unmarshal([]byte(tc.Function.Arguments), &toolCalls[i].Function.Arguments)
|
|
|
|
if err != nil {
|
2024-08-02 05:52:15 +08:00
|
|
|
return nil, errors.New("invalid tool call arguments")
|
2024-07-17 11:52:59 +08:00
|
|
|
}
|
|
|
|
}
|
2025-08-07 09:54:20 +08:00
|
|
|
messages = append(messages, api.Message{Role: msg.Role, Thinking: msg.Reasoning, ToolCalls: toolCalls})
|
2024-07-14 13:07:45 +08:00
|
|
|
}
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
2025-04-03 00:44:27 +08:00
|
|
|
options := make(map[string]any)
|
2024-02-08 06:24:29 +08:00
|
|
|
|
|
|
|
switch stop := r.Stop.(type) {
|
|
|
|
case string:
|
|
|
|
options["stop"] = []string{stop}
|
2024-07-03 07:01:45 +08:00
|
|
|
case []any:
|
2024-02-08 06:24:29 +08:00
|
|
|
var stops []string
|
|
|
|
for _, s := range stop {
|
|
|
|
if str, ok := s.(string); ok {
|
|
|
|
stops = append(stops, str)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
options["stop"] = stops
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.MaxTokens != nil {
|
|
|
|
options["num_predict"] = *r.MaxTokens
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.Temperature != nil {
|
2024-09-08 00:08:08 +08:00
|
|
|
options["temperature"] = *r.Temperature
|
2024-02-08 06:24:29 +08:00
|
|
|
} else {
|
|
|
|
options["temperature"] = 1.0
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.Seed != nil {
|
|
|
|
options["seed"] = *r.Seed
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.FrequencyPenalty != nil {
|
2024-09-08 00:08:08 +08:00
|
|
|
options["frequency_penalty"] = *r.FrequencyPenalty
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if r.PresencePenalty != nil {
|
2024-09-08 00:08:08 +08:00
|
|
|
options["presence_penalty"] = *r.PresencePenalty
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if r.TopP != nil {
|
|
|
|
options["top_p"] = *r.TopP
|
|
|
|
} else {
|
|
|
|
options["top_p"] = 1.0
|
|
|
|
}
|
|
|
|
|
2024-12-05 08:31:19 +08:00
|
|
|
var format json.RawMessage
|
|
|
|
if r.ResponseFormat != nil {
|
|
|
|
switch strings.ToLower(strings.TrimSpace(r.ResponseFormat.Type)) {
|
|
|
|
// Support the old "json_object" type for OpenAI compatibility
|
|
|
|
case "json_object":
|
|
|
|
format = json.RawMessage(`"json"`)
|
|
|
|
case "json_schema":
|
|
|
|
if r.ResponseFormat.JsonSchema != nil {
|
2024-12-12 06:07:30 +08:00
|
|
|
format = r.ResponseFormat.JsonSchema.Schema
|
2024-12-05 08:31:19 +08:00
|
|
|
}
|
|
|
|
}
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
2025-08-06 03:21:16 +08:00
|
|
|
var think *api.ThinkValue
|
|
|
|
if r.Reasoning != nil {
|
|
|
|
think = &api.ThinkValue{
|
|
|
|
Value: *r.Reasoning.Effort,
|
|
|
|
}
|
2025-08-13 02:02:01 +08:00
|
|
|
} else if r.ReasoningEffort != nil {
|
|
|
|
think = &api.ThinkValue{
|
|
|
|
Value: *r.ReasoningEffort,
|
|
|
|
}
|
2025-08-06 03:21:16 +08:00
|
|
|
}
|
|
|
|
|
2024-07-14 13:07:45 +08:00
|
|
|
return &api.ChatRequest{
|
add qwen3-coder tool support
The format qwen3-coder uses is relatively unique, both in rendering and
in parsing. To implement parsing, I wrote a custom parser in similar
style to harmony. For the rendering, I found that the logic would be
much more difficult to follow in a template, so I introduced the concept
of a built-in renderer that uses go code, rather than a template to
generate prompts.
I set us up for future built-in parsers and renderers by making it so
they can be specified in a Modelfile like so:
```
RENDERER "qwen3-coder"
PARSER "qwen3-coder"
```
These need to be provided explicitly because the architecture alone is
not enough to understand what format the model expects to receive, and
what format we expect it to output (e.g., qwen3-coder is `qwen3moe`,
which includes other qwen3-family models as well)
I haven't converted harmony to be one of these "built-ins" yet, since
some of it is in flux with the changes @ParthSareen has been making to
move harmony to the runner. It is likely that many other built-ins will
need to move to the runner as well, but I'm able to slightly defer that
decision since qwen3-coder doesn't have thinking (and therefore doesn't
need to be in the runner to make structured outputs work). I expect to
unify harmony with this approach very soon.
Whether a particular model supports tools or thinking was previously
inferred from templates, but without a template we now also use the
parser itself to declare what it supports. If we have future models that
re-use the same parsing format, but have different capabilities, we'll
want to parameterize them and give them different names to be specified
as a `PARSER`.
Misc changes:
- I worked on the renderer by diffing outputs from the reference
implementation and ours. To make it easier to do this, I extended
<https://github.com/ollama/ollama/pull/11875> to also support
returning the prompt via the openai compat layer
2025-09-12 04:40:35 +08:00
|
|
|
Model: r.Model,
|
|
|
|
Messages: messages,
|
|
|
|
Format: format,
|
|
|
|
Options: options,
|
|
|
|
Stream: &r.Stream,
|
|
|
|
Tools: r.Tools,
|
|
|
|
Think: think,
|
|
|
|
DebugRenderOnly: r.DebugRenderOnly,
|
2024-07-14 13:07:45 +08:00
|
|
|
}, nil
|
2024-02-08 06:24:29 +08:00
|
|
|
}
|
|
|
|
|
2025-08-07 08:00:24 +08:00
|
|
|
func nameFromToolCallID(messages []Message, toolCallID string) string {
|
|
|
|
// iterate backwards to be more resilient to duplicate tool call IDs (this
|
|
|
|
// follows "last one wins")
|
|
|
|
for i := len(messages) - 1; i >= 0; i-- {
|
|
|
|
msg := messages[i]
|
|
|
|
for _, tc := range msg.ToolCalls {
|
|
|
|
if tc.ID == toolCallID {
|
|
|
|
return tc.Function.Name
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
2025-08-07 06:50:30 +08:00
|
|
|
func fromCompletionToolCall(toolCalls []ToolCall) ([]api.ToolCall, error) {
|
|
|
|
apiToolCalls := make([]api.ToolCall, len(toolCalls))
|
|
|
|
for i, tc := range toolCalls {
|
|
|
|
apiToolCalls[i].Function.Name = tc.Function.Name
|
|
|
|
err := json.Unmarshal([]byte(tc.Function.Arguments), &apiToolCalls[i].Function.Arguments)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errors.New("invalid tool call arguments")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return apiToolCalls, nil
|
|
|
|
}
|
|
|
|
|
2025-10-06 05:18:56 +08:00
|
|
|
// FromCompleteRequest converts a CompletionRequest to api.GenerateRequest
|
|
|
|
func FromCompleteRequest(r CompletionRequest) (api.GenerateRequest, error) {
|
2024-07-03 07:01:45 +08:00
|
|
|
options := make(map[string]any)
|
|
|
|
|
|
|
|
switch stop := r.Stop.(type) {
|
|
|
|
case string:
|
|
|
|
options["stop"] = []string{stop}
|
2024-07-10 05:01:26 +08:00
|
|
|
case []any:
|
|
|
|
var stops []string
|
|
|
|
for _, s := range stop {
|
|
|
|
if str, ok := s.(string); ok {
|
|
|
|
stops = append(stops, str)
|
|
|
|
} else {
|
|
|
|
return api.GenerateRequest{}, fmt.Errorf("invalid type for 'stop' field: %T", s)
|
|
|
|
}
|
2024-07-03 07:01:45 +08:00
|
|
|
}
|
2024-07-10 05:01:26 +08:00
|
|
|
options["stop"] = stops
|
2024-07-03 07:01:45 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if r.MaxTokens != nil {
|
|
|
|
options["num_predict"] = *r.MaxTokens
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.Temperature != nil {
|
2024-09-07 08:45:45 +08:00
|
|
|
options["temperature"] = *r.Temperature
|
2024-07-03 07:01:45 +08:00
|
|
|
} else {
|
|
|
|
options["temperature"] = 1.0
|
|
|
|
}
|
|
|
|
|
|
|
|
if r.Seed != nil {
|
|
|
|
options["seed"] = *r.Seed
|
|
|
|
}
|
|
|
|
|
2024-09-07 08:45:45 +08:00
|
|
|
options["frequency_penalty"] = r.FrequencyPenalty
|
2024-07-03 07:01:45 +08:00
|
|
|
|
2024-09-07 08:45:45 +08:00
|
|
|
options["presence_penalty"] = r.PresencePenalty
|
2024-07-03 07:01:45 +08:00
|
|
|
|
|
|
|
if r.TopP != 0.0 {
|
|
|
|
options["top_p"] = r.TopP
|
|
|
|
} else {
|
|
|
|
options["top_p"] = 1.0
|
|
|
|
}
|
|
|
|
|
|
|
|
return api.GenerateRequest{
|
add qwen3-coder tool support
The format qwen3-coder uses is relatively unique, both in rendering and
in parsing. To implement parsing, I wrote a custom parser in similar
style to harmony. For the rendering, I found that the logic would be
much more difficult to follow in a template, so I introduced the concept
of a built-in renderer that uses go code, rather than a template to
generate prompts.
I set us up for future built-in parsers and renderers by making it so
they can be specified in a Modelfile like so:
```
RENDERER "qwen3-coder"
PARSER "qwen3-coder"
```
These need to be provided explicitly because the architecture alone is
not enough to understand what format the model expects to receive, and
what format we expect it to output (e.g., qwen3-coder is `qwen3moe`,
which includes other qwen3-family models as well)
I haven't converted harmony to be one of these "built-ins" yet, since
some of it is in flux with the changes @ParthSareen has been making to
move harmony to the runner. It is likely that many other built-ins will
need to move to the runner as well, but I'm able to slightly defer that
decision since qwen3-coder doesn't have thinking (and therefore doesn't
need to be in the runner to make structured outputs work). I expect to
unify harmony with this approach very soon.
Whether a particular model supports tools or thinking was previously
inferred from templates, but without a template we now also use the
parser itself to declare what it supports. If we have future models that
re-use the same parsing format, but have different capabilities, we'll
want to parameterize them and give them different names to be specified
as a `PARSER`.
Misc changes:
- I worked on the renderer by diffing outputs from the reference
implementation and ours. To make it easier to do this, I extended
<https://github.com/ollama/ollama/pull/11875> to also support
returning the prompt via the openai compat layer
2025-09-12 04:40:35 +08:00
|
|
|
Model: r.Model,
|
|
|
|
Prompt: r.Prompt,
|
|
|
|
Options: options,
|
|
|
|
Stream: &r.Stream,
|
|
|
|
Suffix: r.Suffix,
|
|
|
|
DebugRenderOnly: r.DebugRenderOnly,
|
2024-07-03 07:01:45 +08:00
|
|
|
}, nil
|
|
|
|
}
|