First iteration of non thinking qwen3 renderer

This commit is contained in:
Grace Guo 2025-10-07 11:56:10 -07:00
parent 6976917864
commit ec46dc0660
2 changed files with 372 additions and 12 deletions

View File

@ -66,6 +66,7 @@ func renderContent(content api.Message, doVisionCount bool) string {
func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
var sb strings.Builder
isThinking := false
if len(tools) > 0 {
sb.WriteString(imStartTag + "system\n")
@ -105,6 +106,9 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
} else if message.Role == "assistant" {
contentReasoning := ""
// here we need to reconstruct
if isThinking { // we only do this if its a thinking model (i.e contentReasoning != "" if its a thinking model)
if message.Thinking != "" {
contentReasoning = message.Thinking
} else if strings.Contains(content, "</think>") {
@ -120,8 +124,11 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
content = contentSplit[len(contentSplit)-1]
content = strings.TrimLeft(content, "\n")
}
}
// reconstruct the content
if i > lastQueryIndex {
// isThinking && i > lastQueryIndex
if isThinking && i > lastQueryIndex { // if it is a thinking model
if i == len(messages)-1 || contentReasoning != "" {
sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n") + "\n</think>\n\n" + strings.TrimLeft(content, "\n"))
} else {
@ -158,6 +165,10 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
}
sb.WriteString("<|im_start|>assistant\n")
if isThinking {
sb.WriteString("<think>\n") // Thinking models end with <|im_start|>assistant\n<think>\n
}
return sb.String(), nil
}

View File

@ -0,0 +1,349 @@
package renderers
import (
"testing"
"github.com/google/go-cmp/cmp"
"github.com/ollama/ollama/api"
)
func TestQwen3VLNonThinkingRenderer(t *testing.T) {
tests := []struct {
name string
msgs []api.Message
images []api.ImageData
tools []api.Tool
expected string
}{
{
name: "basic",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Hello, how are you?"},
},
expected: `<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
Hello, how are you?<|im_end|>
<|im_start|>assistant
`,
},
{ // C
name: "With thinking, end assistant.",
msgs: []api.Message{
// {Role: "system", Content: "You are a helpful assistant."},
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think>"}, // does the thinking even work?
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><|im_end|>
<|im_start|>assistant
`,
},
{ // C
name: "Multiple thinking",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
<|im_start|>assistant
`, // the second thinking tag is not captured
},
{ // C
name: "Multiple thinking, multiple messages.",
msgs: []api.Message{
{Role: "user", Content: "Tell me a story in two sentences."},
{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
{Role: "user", Content: "What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think>"},
{Role: "assistant", Content: "I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>"},
},
expected: `<|im_start|>user
Tell me a story in two sentences.<|im_end|>
<|im_start|>assistant
abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
<|im_start|>user
What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think><|im_end|>
<|im_start|>assistant
I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think><|im_end|>
<|im_start|>assistant
`,
},
{
name: "Image",
msgs: []api.Message{ // i think this is because it does not go through the renderer?
{Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData(IMAGE2_BASE64)}}, // does this work?
}, // this is actually a local test, remote model may need to be different
expected: `<|im_start|>user
[img-0]Describe this image.<|im_end|>
<|im_start|>assistant
`,
}, // there's no way to do videos?
{
name: "Multiple images",
msgs: []api.Message{
{Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData(IMAGE1_BASE64), api.ImageData(IMAGE2_BASE64)}},
},
expected: `<|im_start|>user
[img-0][img-1]Describe these images.<|im_end|>
<|im_start|>assistant
`,
},
{
name: "with tools and response",
msgs: []api.Message{
{Role: "system", Content: "You are a helpful assistant with access to tools."},
{Role: "user", Content: "What's the weather like in New York?"},
{
Role: "assistant",
Content: "I'll check the weather in New York for you.",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "get-current-weather",
Arguments: map[string]any{
"location": "New York",
"unit": "fahrenheit",
},
},
},
},
},
{Role: "tool", Content: "80", ToolName: "get-current-weather"},
{Role: "user", Content: "That sounds nice! What about San Francisco?"},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "get-current-weather",
Description: "Get the current weather for a location",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"location"},
Properties: map[string]api.ToolProperty{
"location": {
Type: api.PropertyType{"string"},
Description: "The city and state, e.g. San Francisco, CA",
},
"unit": {
Type: api.PropertyType{"string"},
Enum: []any{"celsius", "fahrenheit"},
Description: "The temperature unit",
},
},
},
},
},
},
expected: `<|im_start|>system
You are a helpful assistant with access to tools.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
What's the weather like in New York?<|im_end|>
<|im_start|>assistant
I'll check the weather in New York for you.
<tool_call>
{"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
80
</tool_response><|im_end|>
<|im_start|>user
That sounds nice! What about San Francisco?<|im_end|>
<|im_start|>assistant
`,
},
{
name: "With tools and response, multiple tool calls",
msgs: []api.Message{
{
Role: "system",
Content: "You are a helpful assistant with access to tools.",
},
{
Role: "user",
Content: "Call two tools for me: add and multiply.",
},
{
Role: "assistant",
Content: "Sure, I'll call both tools for you.",
ToolCalls: []api.ToolCall{
{
Function: api.ToolCallFunction{
Name: "add",
Arguments: map[string]any{
"a": 2,
"b": 3,
},
},
},
{
Function: api.ToolCallFunction{
Name: "multiply",
Arguments: map[string]any{
"x": 4,
"y": 5,
},
},
},
},
},
{
Role: "tool",
Content: "5",
ToolName: "add",
},
{
Role: "tool",
Content: "20",
ToolName: "multiply",
},
{
Role: "user",
Content: "Thanks! What are the results?",
},
},
tools: []api.Tool{
{
Type: "function",
Function: api.ToolFunction{
Name: "add",
Description: "Add two numbers",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"a", "b"},
Properties: map[string]api.ToolProperty{
"a": {Type: api.PropertyType{"integer"}, Description: "First number"},
"b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
},
},
},
},
{
Type: "function",
Function: api.ToolFunction{
Name: "multiply",
Description: "Multiply two numbers",
Parameters: api.ToolFunctionParameters{
Type: "object",
Required: []string{"x", "y"},
Properties: map[string]api.ToolProperty{
"x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
"y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
},
},
},
},
},
expected: `<|im_start|>system
You are a helpful assistant with access to tools.
# Tools
You may call one or more functions to assist with the user query.
You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
{"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"description": "First factor"}, "y": {"description": "Second factor"}}, "required": ["x", "y"]}}}
</tools>
For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
<tool_call>
{"name": <function-name>, "arguments": <args-json-object>}
</tool_call><|im_end|>
<|im_start|>user
Call two tools for me: add and multiply.<|im_end|>
<|im_start|>assistant
Sure, I'll call both tools for you.
<tool_call>
{"name": "add", "arguments": {"a": 2, "b": 3}}
</tool_call>
<tool_call>
{"name": "multiply", "arguments": {"x": 4, "y": 5}}
</tool_call><|im_end|>
<|im_start|>user
<tool_response>
5
</tool_response>
<tool_response>
20
</tool_response><|im_end|>
<|im_start|>user
Thanks! What are the results?<|im_end|>
<|im_start|>assistant
`,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
if err != nil {
t.Fatal(err)
}
if diff := cmp.Diff(rendered, tt.expected); diff != "" {
t.Errorf("mismatch (-got +want):\n%s", diff)
}
})
}
}
// what is this function for?
// func TestFormatToolCallArgumentVL(t *testing.T) {
// tests := []struct {
// name string
// arg any
// expected string
// }{
// {
// name: "string",
// arg: "foo",
// // notice no quotes around the string
// expected: "foo",
// },
// {
// name: "map",
// arg: map[string]any{"foo": "bar"},
// expected: "{\"foo\":\"bar\"}",
// },
// {
// name: "number",
// arg: 1,
// expected: "1",
// },
// {
// name: "boolean",
// arg: true,
// expected: "true",
// },
// }
// for _, tt := range tests {
// t.Run(tt.name, func(t *testing.T) {
// got := formatToolCallArgument(tt.arg)
// if got != tt.expected {
// t.Errorf("formatToolCallArgument(%v) = %v, want %v", tt.arg, got, tt.expected)
// }
// })
// }
// }