Compare commits

...

4 Commits

11 changed files with 662 additions and 140 deletions

View File

@ -16,6 +16,8 @@ type Parser interface {
HasThinkingSupport() bool
}
// used like builtinParser := parsers.ParserForName(m.Config.Parser)
func ParserForName(name string) Parser {
switch name {
case "qwen3-coder":

View File

@ -13,17 +13,25 @@ import (
"github.com/ollama/ollama/logutil"
)
func (p *Qwen3VLParser) initialState() qwenParserState {
if p.HasThinkingSupport() { // has thinking, start from collecting thinking content
return CollectingThinkingContent
}
return CollectingContent
}
// TODO: call the init function
const (
CollectingContent qwenParserState = iota
CollectingThinkingContent
CollectingContent qwenParserState = iota
CollectingThinkingContent // qwenParserState = iota
CollectingToolContent
)
const (
thinkingOpenTag = "<thinking>"
thinkingCloseTag = "</thinking>"
thinkingCloseTag = "</think>"
)
// TODO(gguo): add a field for isThinking
type Qwen3VLParser struct {
state qwenParserState
buffer strings.Builder
@ -34,12 +42,14 @@ func (p *Qwen3VLParser) HasToolSupport() bool {
return true
}
// TODO(gguo): changes this to reference an objects param
func (p *Qwen3VLParser) HasThinkingSupport() bool {
return true
}
func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
p.tools = tools
p.state = p.initialState()
return tools
}
@ -98,7 +108,6 @@ func (p *Qwen3VLParser) parseEvents() []qwenEvent {
return all
}
// think if a better name
func emitContentBeforeTag(p *Qwen3VLParser, events []qwenEvent, tag string) []qwenEvent {
split := strings.SplitN(p.buffer.String(), tag, 2)
before := split[0]
@ -112,49 +121,15 @@ func emitContentBeforeTag(p *Qwen3VLParser, events []qwenEvent, tag string) []qw
return events
}
// findFirstTag returns the tag that appears first in the buffer among the provided tags.
// If no tag is found, it returns an empty string.
func findFirstTag(p *Qwen3VLParser, tags []string) string {
minIdx := -1
var firstTag string
for _, tag := range tags {
idx := strings.Index(p.buffer.String(), tag)
if idx != -1 && (minIdx == -1 || idx < minIdx) {
minIdx = idx
firstTag = tag
}
}
if minIdx == -1 {
return ""
}
return firstTag
}
func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
var events []qwenEvent
firstTag := findFirstTag(p, []string{thinkingOpenTag, toolOpenTag})
switch p.state {
case CollectingContent:
if firstTag == thinkingOpenTag {
events = emitContentBeforeTag(p, events, thinkingOpenTag)
p.state = CollectingThinkingContent
return events, true
} else if firstTag == toolOpenTag {
if strings.Contains(p.buffer.String(), toolOpenTag) {
events = emitContentBeforeTag(p, events, toolOpenTag)
p.state = CollectingToolContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), thinkingOpenTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
events = append(events, qwenEventContent{content: unambiguous})
return events, false
} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
@ -164,11 +139,14 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
events = append(events, qwenEventContent{content: unambiguous})
if len(unambiguous) > 0 { // why does qwen3coder not have this here
events = append(events, qwenEventContent{content: unambiguous})
}
return events, false
} else {
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
ambiguousStart := len(p.buffer.String()) - whitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
@ -195,21 +173,46 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
} else {
return events, false
}
case CollectingThinkingContent:
case CollectingThinkingContent: // so we want to hip the unambiguous stuff
if strings.Contains(p.buffer.String(), thinkingCloseTag) {
split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
fmt.Println("split", split)
// fmt.Println("split", split)
before := split[0]
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
events = append(events, qwenEventThinkingContent{content: before})
if len(before) > 0 {
events = append(events, qwenEventThinkingContent{content: before})
}
p.buffer.Reset()
p.buffer.WriteString(after)
p.state = CollectingContent
return events, true
} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 { // we see part of a close thinking tag
beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventThinkingContent{content: unambiguous})
}
return events, false
} else {
whitespaceLen := trailingWhitespaceLen(p.buffer.String())
ambiguousStart := len(p.buffer.String()) - whitespaceLen
unambiguous := p.buffer.String()[:ambiguousStart]
ambiguous := p.buffer.String()[ambiguousStart:]
p.buffer.Reset()
p.buffer.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventThinkingContent{content: unambiguous})
}
return events, false
}
default:

View File

@ -15,7 +15,7 @@ import (
// return t
// }
func TestQwen3VLParserStreaming(t *testing.T) {
func TestQwen3VLThinkingParserStreaming(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
@ -30,21 +30,33 @@ func TestQwen3VLParserStreaming(t *testing.T) {
{
desc: "simple thinking",
steps: []step{
{input: "<thinking>abc</thinking>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
},
},
{
desc: "simple trip thinking",
steps: []step{
{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "<think>abc"}}},
},
},
{
desc: "thinking with split tags",
steps: []step{
{input: "<thinking>abc", wantEvents: []qwenEvent{}},
{input: "</thinking>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: "abc", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
{input: "</think>", wantEvents: []qwenEvent{}},
},
},
{
desc: "multiple think tags",
steps: []step{
{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>actually, is not thinking"}}},
},
},
{
desc: "thinking and tool call",
steps: []step{
{
input: "<thinking>I'm thinking</thinking><tool_call>I'm tool calling</tool_call>",
input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking"},
qwenEventRawToolCall{raw: "I'm tool calling"},
@ -56,7 +68,7 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "thinking and content",
steps: []step{
{
input: "<thinking>I'm thinking</thinking>I'm content",
input: "I'm thinking</think>I'm content",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking"},
qwenEventContent{content: "I'm content"},
@ -71,10 +83,10 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "nested thinking (outside thinking, inside thinking)",
steps: []step{
{
input: "<thinking>I'm thinking<thinking>I'm nested thinking</thinking></thinking>",
input: "I'm thinking<think>I'm nested thinking</think></think>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking<thinking>I'm nested thinking"},
qwenEventContent{content: "</thinking>"},
qwenEventThinkingContent{content: "I'm thinking<think>I'm nested thinking"},
qwenEventContent{content: "</think>"},
},
},
},
@ -83,10 +95,10 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "interleaved thinking",
steps: []step{
{
input: "<thinking>I'm thinking<thinking></thinking>I'm actually content</thinking>",
input: "<think>I'm thinking</think>I'm actually content</think>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking<thinking>"},
qwenEventContent{content: "I'm actually content</thinking>"},
qwenEventThinkingContent{content: "<think>I'm thinking"},
qwenEventContent{content: "I'm actually content</think>"},
},
},
},
@ -95,7 +107,7 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "nested thinking and tool call (outside thinking, inside tool call)",
steps: []step{
{
input: "<thinking>I'm thinking<tool_call>I'm nested tool call</tool_call></thinking>",
input: "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm nested tool call</tool_call>"}},
},
},
@ -104,8 +116,11 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "nested thinking and tool call (outside tool call, inside thinking)",
steps: []step{
{
input: "<tool_call>I'm nested tool call<thinking>I'm thinking</thinking></tool_call>",
wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "I'm nested tool call<thinking>I'm thinking</thinking>"}},
input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "<tool_call>I'm nested tool call<think>I'm thinking"},
qwenEventContent{content: "</tool_call>"},
},
},
},
},
@ -113,12 +128,12 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "interleaved thinking and tool call",
steps: []step{
{
input: "<thinking>I'm thinking<tool_call>I'm NOT a nested tool call</thinking></tool_call><tool_call>I'm nested tool call 2<thinking></tool_call></thinking>",
input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm NOT a nested tool call"},
qwenEventContent{content: "</tool_call>"},
qwenEventRawToolCall{raw: "I'm nested tool call 2<thinking>"},
qwenEventContent{content: "</thinking>"},
qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
qwenEventContent{content: "</think>"},
},
},
},
@ -127,16 +142,12 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "partial thinking tag fakeout",
steps: []step{
{
input: "abc<thinking",
wantEvents: []qwenEvent{
qwenEventContent{content: "abc"},
},
input: "abc</think",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}},
},
{
input: " fakeout",
wantEvents: []qwenEvent{
qwenEventContent{content: "<thinking fakeout"},
},
input: " fakeout",
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}},
},
},
},
@ -144,9 +155,46 @@ func TestQwen3VLParserStreaming(t *testing.T) {
desc: "partial thinking incomplete",
steps: []step{
{
input: "abc<thinking>unfinished</thinking", // when something is ambiguious, we dont emit anything
input: "abc<think>unfinished</think", // when something is ambiguious, we dont emit anything
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
},
},
},
{
desc: "test with split thinking and content",
steps: []step{
{
input: "abc<think>unfinished</th", // when something is ambiguious, we dont emit anything
wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
},
{
input: "ink> def",
wantEvents: []qwenEvent{
qwenEventContent{content: "abc"},
qwenEventContent{content: "def"},
},
},
},
},
{
desc: "thinking with no tags",
steps: []step{
{
input: "Hello I am thinking",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "Hello I am thinking"},
},
},
{
input: "Hello I am thinking some more",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "Hello I am thinking some more"},
},
},
{
input: "Hello I am think</think> NOT",
wantEvents: []qwenEvent{
qwenEventThinkingContent{content: "Hello I am think"},
qwenEventContent{content: "NOT"},
},
},
},
@ -184,42 +232,9 @@ func TestQwen3VLParserStreaming(t *testing.T) {
}
}
func TestQwen3VLComplex(t *testing.T) {
type step struct {
input string
wantEvents []qwenEvent
}
cases := []struct {
desc string
steps []step
only bool
}{
{
desc: "simple tool call",
steps: []step{
{
input: "Here are 30 distinct and popular emojis for you! 😊\n\n1. 😂 \n2. ❤️ \n3. 🌟 \n4. 🐶 \n5. 🍕 \n6. ✨ \n7. 🌈 \n8. 🎉 \n9. 🌎 \n10. 🦁 \n11. 💯 \n12. 🥰 \n13. 🌸 \n14. 🚀 \n15. 🌊 \n16. 🍦 \n17. 🌙 \n18. 🌞 \n19. 🌻 \n20. 🦋 \n21. 🍃 \n22. 🏆 \n23. 🌮 \n24. 🧸 \n25. 🎮 \n26. 📚 \n27. ✈️ \n28. 🌟 (sparkles) \n29. 🌈 (rainbow) \n30. 🥳 \n\n*Bonus fun fact:* The 😂 (Face with Tears of Joy) was Oxford Dictionaries' Word of the Year in 2015! 🎉 \nLet me know if you'd like themed emojis (e.g., animals, food, or emotions)! 🐱🍕📚",
wantEvents: []qwenEvent{qwenEventContent{content: "bruh"}},
},
},
},
}
for _, tc := range cases {
for i, step := range tc.steps {
parser := Qwen3VLParser{}
parser.buffer.WriteString(step.input)
gotEvents := parser.parseEvents()
if !reflect.DeepEqual(gotEvents, step.wantEvents) {
t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
}
}
}
}
// TODO: devin was saying something about json cant figure out types?
// do we need to test for
func TestQwen3VLToolParser(t *testing.T) {
func TestQwen3VLThinkingToolParser(t *testing.T) {
type step struct {
name string
rawToolCall string

View File

@ -55,7 +55,12 @@ func renderAdditionalKeys(obj any, handledKeys map[string]bool) string {
return sb.String()
}
func Qwen3CoderRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
type Qwen3CoderRenderer struct {
isThinking bool
}
func (r *Qwen3CoderRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
// func Qwen3CoderRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
var sb strings.Builder
// filter out system messages and choose the first (if any) to win

View File

@ -288,7 +288,8 @@ call tool<|im_end|>
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := Qwen3CoderRenderer(tt.msgs, tt.tools, nil)
// rendered, err := Qwen3CoderRenderer(tt.msgs, tt.tools, nil)
rendered, err := (&Qwen3CoderRenderer{false}).Render(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}

View File

@ -49,7 +49,12 @@ func marshalWithSpaces(v any) ([]byte, error) {
return out, nil
}
func renderContent(content api.Message, doVisionCount bool) string {
type Qwen3VLRenderer struct {
isThinking bool
}
// func renderContent(content api.Message, doVisionCount bool) string {
func (r *Qwen3VLRenderer) renderContent(content api.Message, doVisionCount bool) string {
// This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
var subSb strings.Builder
for _ = range content.Images {
@ -64,8 +69,10 @@ func renderContent(content api.Message, doVisionCount bool) string {
return subSb.String()
}
func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
// func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
func (r *Qwen3VLRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
var sb strings.Builder
// r.isThinking = false
if len(tools) > 0 {
sb.WriteString(imStartTag + "system\n")
@ -99,29 +106,35 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
}
for i, message := range messages {
content := renderContent(message, true)
content := r.renderContent(message, true)
if message.Role == "user" || message.Role == "system" && i != 0 {
sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
} else if message.Role == "assistant" {
contentReasoning := ""
if message.Thinking != "" {
contentReasoning = message.Thinking
} else if strings.Contains(content, "</think>") {
contentReasoning = strings.Split(content, "</think>")[0]
contentReasoning = strings.TrimRight(contentReasoning, "\n")
contentReasoningSplit := strings.Split(contentReasoning, "<think>")
contentReasoning = contentReasoningSplit[len(contentReasoningSplit)-1]
// here we need to reconstruct
if r.isThinking { // we only do this if its a thinking model (i.e contentReasoning != "" if its a thinking model)
if message.Thinking != "" {
contentReasoning = message.Thinking
} else if strings.Contains(content, "</think>") {
contentReasoning = strings.Split(content, "</think>")[0]
contentReasoning = strings.TrimRight(contentReasoning, "\n")
contentReasoning = strings.TrimLeft(contentReasoning, "\n")
contentReasoningSplit := strings.Split(contentReasoning, "<think>")
contentReasoning = contentReasoningSplit[len(contentReasoningSplit)-1]
contentSplit := strings.Split(content, "</think>")
content = contentSplit[len(contentSplit)-1]
content = strings.TrimLeft(content, "\n")
contentReasoning = strings.TrimLeft(contentReasoning, "\n")
contentSplit := strings.Split(content, "</think>")
content = contentSplit[len(contentSplit)-1]
content = strings.TrimLeft(content, "\n")
}
}
// reconstruct the content
if i > lastQueryIndex {
// isThinking && i > lastQueryIndex
if r.isThinking && i > lastQueryIndex { // if it is a thinking model
if i == len(messages)-1 || contentReasoning != "" {
sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n") + "\n</think>\n\n" + strings.TrimLeft(content, "\n"))
} else {
@ -158,6 +171,10 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
}
sb.WriteString("<|im_start|>assistant\n")
if r.isThinking {
sb.WriteString("<think>\n") // Thinking models end with <|im_start|>assistant\n<think>\n
}
return sb.String(), nil
}

View File

@ -0,0 +1,311 @@
package renderers
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestQwen3VLNonThinkingRenderer(t *testing.T) {
tests := []struct {
name string
msgs []api.Message
images []api.ImageData
tools []api.Tool
expected string
}{
{
name: "basic",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello, how are you?"},
},
expected: `<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Hello, how are you?<|im_end|>
<|im_start|>assistant
`,
},
{ // C
name: "With thinking, end assistant.",
msgs: []api.Message{
// {Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think>"}, // does the thinking even work?
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><|im_end|>
<|im_start|>assistant
`,
},
{ // C
name: "Multiple thinking",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
<|im_start|>assistant
`, // the second thinking tag is not captured
},
{ // C
name: "Multiple thinking, multiple messages.",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
{Role: "user", Content: "What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think>"},
{Role: "assistant", Content: "I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>"},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
<|im_start|>user
What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think><|im_end|>
<|im_start|>assistant
I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think><|im_end|>
<|im_start|>assistant
`,
},
{
name: "Image",
msgs: []api.Message{ // i think this is because it does not go through the renderer?
{Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData(IMAGE2_BASE64)}}, // does this work?
}, // this is actually a local test, remote model may need to be different
expected: `<|im_start|>user
[img-0]Describe this image.<|im_end|>
<|im_start|>assistant
`,
}, // there's no way to do videos?
{
name: "Multiple images",
msgs: []api.Message{
{Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData(IMAGE1_BASE64), api.ImageData(IMAGE2_BASE64)}},
},
expected: `<|im_start|>user
[img-0][img-1]Describe these images.<|im_end|>
<|im_start|>assistant
`,
},
{
name: "with tools and response",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant with access to tools."},
{Role: "user", Content: "What's the weather like in New York?"},
{
Role: "assistant",
Content: "I'll check the weather in New York for you.",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get-current-weather",
Arguments: map[string]any{
"location": "New York",
"unit": "fahrenheit",
},
},
},
},
},
{Role: "tool", Content: "80", ToolName: "get-current-weather"},
{Role: "user", Content: "That sounds nice! What about San Francisco?"},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get-current-weather",
Description: "Get the current weather for a location",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"location"},
Properties: map[string]api.ToolProperty{
"location": {
Type: api.PropertyType{"string"},
Description: "The city and state, e.g. San Francisco, CA",
},
"unit": {
Type: api.PropertyType{"string"},
Enum: []any{"celsius", "fahrenheit"},
Description: "The temperature unit",
},
},
},
},
},
},
expected: `<|im_start|>system
You are a helpful assistant with access to tools.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
What's the weather like in New York?<|im_end|>
<|im_start|>assistant
I'll check the weather in New York for you.
<tool_call>
{"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
80
</tool_response><|im_end|>
<|im_start|>user
That sounds nice! What about San Francisco?<|im_end|>
<|im_start|>assistant
`,
},
{
name: "With tools and response, multiple tool calls",
msgs: []api.Message{
{
Role: "system",
Content: "You are a helpful assistant with access to tools.",
},
{
Role: "user",
Content: "Call two tools for me: add and multiply.",
},
{
Role: "assistant",
Content: "Sure, I'll call both tools for you.",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "add",
Arguments: map[string]any{
"a": 2,
"b": 3,
},
},
},
{
Function: api.ToolCallFunction{
Name: "multiply",
Arguments: map[string]any{
"x": 4,
"y": 5,
},
},
},
},
},
{
Role: "tool",
Content: "5",
ToolName: "add",
},
{
Role: "tool",
Content: "20",
ToolName: "multiply",
},
{
Role: "user",
Content: "Thanks! What are the results?",
},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "add",
Description: "Add two numbers",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"a", "b"},
Properties: map[string]api.ToolProperty{
"a": {Type: api.PropertyType{"integer"}, Description: "First number"},
"b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
},
},
},
},
{
Type: "function",
Function: api.ToolFunction{
Name: "multiply",
Description: "Multiply two numbers",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"x", "y"},
Properties: map[string]api.ToolProperty{
"x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
"y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
},
},
},
},
},
expected: `<|im_start|>system
You are a helpful assistant with access to tools.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
{"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"description": "First factor"}, "y": {"description": "Second factor"}}, "required": ["x", "y"]}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
Call two tools for me: add and multiply.<|im_end|>
<|im_start|>assistant
Sure, I'll call both tools for you.
<tool_call>
{"name": "add", "arguments": {"a": 2, "b": 3}}
</tool_call>
<tool_call>
{"name": "multiply", "arguments": {"x": 4, "y": 5}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
5
</tool_response>
<tool_response>
20
</tool_response><|im_end|>
<|im_start|>user
Thanks! What are the results?<|im_end|>
<|im_start|>assistant
`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
// renderer := RendererForName("qwen3-vl")
rendered, err := (&Qwen3VLRenderer{false}).Render(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(rendered, tt.expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}

View File

@ -17,7 +17,7 @@ var IMAGE2_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAIAAADTED8xAAADMElEQVR4nOz
// - [ ] set descriptions to omitempty?
// - [] images add the auto tag
func TestQwen3VLRenderer(t *testing.T) {
func TestQwen3VLThinkingRenderer(t *testing.T) {
tests := []struct {
name string
msgs []api.Message
@ -327,7 +327,8 @@ Thanks! What are the results?<|im_end|>
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
// rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
rendered, err := (&Qwen3VLRenderer{true}).Render(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}

View File

@ -0,0 +1,163 @@
// package renderers
// import (
// "encoding/json"
// "strings"
// "github.com/ollama/ollama/api"
// )
// var imageCount int
// var videoCount int
// func marshalWithSpaces(v any) ([]byte, error) {
// b, err := json.Marshal(v)
// if err != nil {
// return nil, err
// }
// out := make([]byte, 0, len(b)+len(b)/8)
// inStr, esc := false, false
// for _, c := range b {
// if inStr {
// out = append(out, c)
// if esc {
// esc = false
// continue
// }
// if c == '\\' {
// esc = true
// continue
// }
// if c == '"' {
// inStr = false
// }
// continue
// }
// switch c {
// case '"':
// inStr = true
// out = append(out, c)
// case ':':
// out = append(out, ':', ' ')
// case ',':
// out = append(out, ',', ' ')
// default:
// out = append(out, c)
// }
// }
// return out, nil
// }
// func renderContent(content api.Message, doVisionCount bool) string {
// // This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
// var subSb strings.Builder
// for _ = range content.Images {
// if doVisionCount {
// imageCount++
// }
// subSb.WriteString("<|vision_start|><|image_pad|><|vision_end|>")
// }
// // TODO: support videos
// subSb.WriteString(content.Content)
// return subSb.String()
// }
// func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
// var sb strings.Builder
// if len(tools) > 0 {
// sb.WriteString(imStartTag + "system\n")
// if len(messages) > 0 && messages[0].Role == "system" {
// sb.WriteString(messages[0].Content + "\n\n")
// }
// sb.WriteString("# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>")
// for _, tool := range tools {
// sb.WriteString("\n")
// if b, err := marshalWithSpaces(tool); err == nil {
// sb.Write(b)
// }
// }
// sb.WriteString("\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n")
// } else if len(messages) > 0 && messages[0].Role == "system" {
// sb.WriteString("<|im_start|>system\n" + messages[0].Content + "<|im_end|>\n")
// }
// multiStepTool := true
// lastQueryIndex := len(messages) - 1
// for i := len(messages) - 1; i >= 0; i-- {
// message := messages[i]
// if multiStepTool && message.Role == "user" {
// // Check if content starts with <tool_response> and ends with </tool_response>
// content := message.Content
// if !(strings.HasPrefix(content, "<tool_response>") && strings.HasSuffix(content, "</tool_response>")) {
// multiStepTool = false
// lastQueryIndex = i
// }
// }
// }
// for i, message := range messages {
// content := renderContent(message, true)
// if message.Role == "user" || message.Role == "system" && i != 0 {
// sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
// } else if message.Role == "assistant" {
// contentReasoning := ""
// if message.Thinking != "" {
// contentReasoning = message.Thinking
// } else if strings.Contains(content, "</think>") {
// contentReasoning = strings.Split(content, "</think>")[0]
// contentReasoning = strings.TrimRight(contentReasoning, "\n")
// contentReasoningSplit := strings.Split(contentReasoning, "<think>")
// contentReasoning = contentReasoningSplit[len(contentReasoningSplit)-1]
// contentReasoning = strings.TrimLeft(contentReasoning, "\n")
// contentSplit := strings.Split(content, "</think>")
// content = contentSplit[len(contentSplit)-1]
// content = strings.TrimLeft(content, "\n")
// }
// if i > lastQueryIndex {
// if i == len(messages)-1 || contentReasoning != "" {
// sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n") + "\n</think>\n\n" + strings.TrimLeft(content, "\n"))
// } else {
// sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
// }
// } else {
// sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
// }
// if len(message.ToolCalls) > 0 {
// for j, toolCall := range message.ToolCalls {
// if j > 0 || content != "" {
// sb.WriteString("\n")
// }
// sb.WriteString("<tool_call>\n{\"name\": \"" + toolCall.Function.Name + "\", \"arguments\": ")
// if b, err := marshalWithSpaces(toolCall.Function.Arguments); err == nil {
// sb.Write(b)
// }
// sb.WriteString("}\n</tool_call>")
// }
// }
// sb.WriteString("<|im_end|>\n")
// } else if message.Role == "tool" {
// if i == 0 || messages[i-1].Role != "tool" {
// sb.WriteString("<|im_start|>user")
// }
// sb.WriteString("\n<tool_response>\n" + message.Content + "\n</tool_response>")
// if i == len(messages)-1 || messages[i+1].Role != "tool" {
// sb.WriteString("<|im_end|>\n")
// }
// }
// }
// sb.WriteString("<|im_start|>assistant<think>\n")
// return sb.String(), nil
// }

View File

@ -1,27 +1,29 @@
package renderers
import (
"fmt"
import "github.com/ollama/ollama/api"
"github.com/ollama/ollama/api"
)
// type rendererFunc func([]api.Message, []api.Tool, *api.ThinkValue) (string, error)
type rendererFunc func([]api.Message, []api.Tool, *api.ThinkValue) (string, error)
// func RenderWithRenderer(name string, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
// renderer := rendererForName(name)
// if renderer == nil {
// return "", fmt.Errorf("unknown renderer %q", name)
// }
// return renderer(msgs, tools, think)
// }
func RenderWithRenderer(name string, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
renderer := rendererForName(name)
if renderer == nil {
return "", fmt.Errorf("unknown renderer %q", name)
}
return renderer(msgs, tools, think)
type Renderer interface {
Render(messages []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error)
}
func rendererForName(name string) rendererFunc {
func RendererForName(name string) Renderer {
switch name {
case "qwen3-coder":
return Qwen3CoderRenderer
renderer := &Qwen3CoderRenderer{false} // this is not implemented yet
return renderer
case "qwen3-vl":
return Qwen3VLRenderer
renderer := &Qwen3VLRenderer{false} // not a thinking model?
return renderer
default:
return nil
}

View File

@ -106,7 +106,9 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
func renderPrompt(m *Model, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
if m.Config.Renderer != "" {
rendered, err := renderers.RenderWithRenderer(m.Config.Renderer, msgs, tools, think)
// rendered, err := renderers.RenderWithRenderer(m.Config.Renderer, msgs, tools, think)
renderer := renderers.RendererForName(m.Config.Renderer)
rendered, err := renderer.Render(msgs, tools, think)
if err != nil {
return "", err
}