First iteration of non thinking qwen3 renderer

2025-10-07 11:56:10 -07:00 · 2025-10-07 11:56:10 -07:00 · ec46dc0660
parent 6976917864
commit ec46dc0660
2 changed files with 372 additions and 12 deletions
--- a/model/renderers/qwen3vl.go
+++ b/model/renderers/qwen3vl.go
@ -66,6 +66,7 @@ func renderContent(content api.Message, doVisionCount bool) string {

 func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
 	var sb strings.Builder
+	isThinking := false

 	if len(tools) > 0 {
 		sb.WriteString(imStartTag + "system\n")
@ -105,6 +106,9 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
 			sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
 		} else if message.Role == "assistant" {
 			contentReasoning := ""
+
+			// here we need to reconstruct
+			if isThinking { // we only do this if its a thinking model (i.e contentReasoning != "" if its a thinking model)
 				if message.Thinking != "" {
 					contentReasoning = message.Thinking
 				} else if strings.Contains(content, "</think>") {
@ -120,8 +124,11 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
 					content = contentSplit[len(contentSplit)-1]
 					content = strings.TrimLeft(content, "\n")
 				}
+			}
+			// reconstruct the content

-			if i > lastQueryIndex {
+			// isThinking && i > lastQueryIndex
+			if isThinking && i > lastQueryIndex { // if it is a thinking model
 				if i == len(messages)-1 || contentReasoning != "" {
 					sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n") + "\n</think>\n\n" + strings.TrimLeft(content, "\n"))
 				} else {
@ -158,6 +165,10 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
 	}

 	sb.WriteString("<|im_start|>assistant\n")
+	if isThinking {
+		sb.WriteString("<think>\n") // Thinking models end with <|im_start|>assistant\n<think>\n
+	}
+
 	return sb.String(), nil

 }
--- a/model/renderers/qwen3vl_nonthinking_test.go
+++ b/model/renderers/qwen3vl_nonthinking_test.go
@ -0,0 +1,349 @@
+package renderers
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/ollama/ollama/api"
+)
+
+func TestQwen3VLNonThinkingRenderer(t *testing.T) {
+	tests := []struct {
+		name     string
+		msgs     []api.Message
+		images   []api.ImageData
+		tools    []api.Tool
+		expected string
+	}{
+		{
+			name: "basic",
+			msgs: []api.Message{
+				{Role: "system", Content: "You are a helpful assistant."},
+				{Role: "user", Content: "Hello, how are you?"},
+			},
+			expected: `<|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Hello, how are you?<|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{ // C
+			name: "With thinking, end assistant.",
+			msgs: []api.Message{
+				// {Role: "system", Content: "You are a helpful assistant."},
+				{Role: "user", Content: "Tell me a story in two sentences."},
+				{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think>"}, // does the thinking even work?
+			},
+			expected: `<|im_start|>user
+Tell me a story in two sentences.<|im_end|>
+<|im_start|>assistant
+abc<think>To make this story interesting, I will speak in poetry.</think><|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{ // C
+			name: "Multiple thinking",
+			msgs: []api.Message{
+				{Role: "user", Content: "Tell me a story in two sentences."},
+				{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
+			},
+			expected: `<|im_start|>user
+Tell me a story in two sentences.<|im_end|>
+<|im_start|>assistant
+abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
+<|im_start|>assistant
+`, // the second thinking tag is not captured
+		},
+		{ // C
+			name: "Multiple thinking, multiple messages.",
+			msgs: []api.Message{
+				{Role: "user", Content: "Tell me a story in two sentences."},
+				{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
+				{Role: "user", Content: "What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think>"},
+				{Role: "assistant", Content: "I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>"},
+			},
+			expected: `<|im_start|>user
+Tell me a story in two sentences.<|im_end|>
+<|im_start|>assistant
+abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
+<|im_start|>user
+What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think><|im_end|>
+<|im_start|>assistant
+I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think><|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{
+			name: "Image",
+			msgs: []api.Message{ // i think this is because it does not go through the renderer?
+				{Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData(IMAGE2_BASE64)}}, // does this work?
+			}, // this is actually a local test, remote model may need to be different
+			expected: `<|im_start|>user
+[img-0]Describe this image.<|im_end|>
+<|im_start|>assistant
+`,
+		}, // there's no way to do videos?
+		{
+			name: "Multiple images",
+			msgs: []api.Message{
+				{Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData(IMAGE1_BASE64), api.ImageData(IMAGE2_BASE64)}},
+			},
+			expected: `<|im_start|>user
+[img-0][img-1]Describe these images.<|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{
+			name: "with tools and response",
+			msgs: []api.Message{
+				{Role: "system", Content: "You are a helpful assistant with access to tools."},
+				{Role: "user", Content: "What's the weather like in New York?"},
+				{
+					Role:    "assistant",
+					Content: "I'll check the weather in New York for you.",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get-current-weather",
+								Arguments: map[string]any{
+									"location": "New York",
+									"unit":     "fahrenheit",
+								},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "80", ToolName: "get-current-weather"},
+				{Role: "user", Content: "That sounds nice! What about San Francisco?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get-current-weather",
+						Description: "Get the current weather for a location",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"location"},
+							Properties: map[string]api.ToolProperty{
+								"location": {
+									Type:        api.PropertyType{"string"},
+									Description: "The city and state, e.g. San Francisco, CA",
+								},
+								"unit": {
+									Type:        api.PropertyType{"string"},
+									Enum:        []any{"celsius", "fahrenheit"},
+									Description: "The temperature unit",
+								},
+							},
+						},
+					},
+				},
+			},
+			expected: `<|im_start|>system
+You are a helpful assistant with access to tools.
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+What's the weather like in New York?<|im_end|>
+<|im_start|>assistant
+I'll check the weather in New York for you.
+<tool_call>
+{"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+80
+</tool_response><|im_end|>
+<|im_start|>user
+That sounds nice! What about San Francisco?<|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{
+			name: "With tools and response, multiple tool calls",
+			msgs: []api.Message{
+				{
+					Role:    "system",
+					Content: "You are a helpful assistant with access to tools.",
+				},
+				{
+					Role:    "user",
+					Content: "Call two tools for me: add and multiply.",
+				},
+				{
+					Role:    "assistant",
+					Content: "Sure, I'll call both tools for you.",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "add",
+								Arguments: map[string]any{
+									"a": 2,
+									"b": 3,
+								},
+							},
+						},
+						{
+							Function: api.ToolCallFunction{
+								Name: "multiply",
+								Arguments: map[string]any{
+									"x": 4,
+									"y": 5,
+								},
+							},
+						},
+					},
+				},
+				{
+					Role:     "tool",
+					Content:  "5",
+					ToolName: "add",
+				},
+				{
+					Role:     "tool",
+					Content:  "20",
+					ToolName: "multiply",
+				},
+				{
+					Role:    "user",
+					Content: "Thanks! What are the results?",
+				},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "add",
+						Description: "Add two numbers",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"a", "b"},
+							Properties: map[string]api.ToolProperty{
+								"a": {Type: api.PropertyType{"integer"}, Description: "First number"},
+								"b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
+							},
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "multiply",
+						Description: "Multiply two numbers",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"x", "y"},
+							Properties: map[string]api.ToolProperty{
+								"x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
+								"y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
+							},
+						},
+					},
+				},
+			},
+			expected: `<|im_start|>system
+You are a helpful assistant with access to tools.
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
+{"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"description": "First factor"}, "y": {"description": "Second factor"}}, "required": ["x", "y"]}}}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+Call two tools for me: add and multiply.<|im_end|>
+<|im_start|>assistant
+Sure, I'll call both tools for you.
+<tool_call>
+{"name": "add", "arguments": {"a": 2, "b": 3}}
+</tool_call>
+<tool_call>
+{"name": "multiply", "arguments": {"x": 4, "y": 5}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+5
+</tool_response>
+<tool_response>
+20
+</tool_response><|im_end|>
+<|im_start|>user
+Thanks! What are the results?<|im_end|>
+<|im_start|>assistant
+`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if diff := cmp.Diff(rendered, tt.expected); diff != "" {
+				t.Errorf("mismatch (-got +want):\n%s", diff)
+			}
+		})
+	}
+}
+
+// what is this function for?
+
+// func TestFormatToolCallArgumentVL(t *testing.T) {
+// 	tests := []struct {
+// 		name     string
+// 		arg      any
+// 		expected string
+// 	}{
+// 		{
+// 			name: "string",
+// 			arg:  "foo",
+// 			// notice no quotes around the string
+// 			expected: "foo",
+// 		},
+// 		{
+// 			name:     "map",
+// 			arg:      map[string]any{"foo": "bar"},
+// 			expected: "{\"foo\":\"bar\"}",
+// 		},
+// 		{
+// 			name:     "number",
+// 			arg:      1,
+// 			expected: "1",
+// 		},
+// 		{
+// 			name:     "boolean",
+// 			arg:      true,
+// 			expected: "true",
+// 		},
+// 	}
+// 	for _, tt := range tests {
+// 		t.Run(tt.name, func(t *testing.T) {
+// 			got := formatToolCallArgument(tt.arg)
+// 			if got != tt.expected {
+// 				t.Errorf("formatToolCallArgument(%v) = %v, want %v", tt.arg, got, tt.expected)
+// 			}
+// 		})
+// 	}
+// }