clean up, but no longer working with tool calls?

Working parser for thinking models - assumes state of thinking, emits unambiguous content in thinking, does not call tool call in thinking
working renderer with tests
2025-10-07 16:11:47 -07:00 · 2025-10-07 15:35:33 -07:00 · 2025-10-07 13:48:54 -07:00 · 2025-10-07 11:56:10 -07:00
11 changed files with 662 additions and 140 deletions
--- a/model/parsers/parsers.go
+++ b/model/parsers/parsers.go
@ -16,6 +16,8 @@ type Parser interface {
 	HasThinkingSupport() bool
 }

+// used like builtinParser := parsers.ParserForName(m.Config.Parser)
+
 func ParserForName(name string) Parser {
 	switch name {
 	case "qwen3-coder":
--- a/model/parsers/qwen3vl.go
+++ b/model/parsers/qwen3vl.go
@ -13,17 +13,25 @@ import (
 	"github.com/ollama/ollama/logutil"
 )

+func (p *Qwen3VLParser) initialState() qwenParserState {
+	if p.HasThinkingSupport() { // has thinking, start from collecting thinking content
+		return CollectingThinkingContent
+	}
+	return CollectingContent
+}
+
+// TODO: call the init function
 const (
-	CollectingContent qwenParserState = iota
-	CollectingThinkingContent
+	CollectingContent         qwenParserState = iota
+	CollectingThinkingContent                 // qwenParserState = iota
 	CollectingToolContent
 )

 const (
-	thinkingOpenTag  = "<thinking>"
-	thinkingCloseTag = "</thinking>"
+	thinkingCloseTag = "</think>"
 )

+// TODO(gguo): add a field for isThinking
 type Qwen3VLParser struct {
 	state  qwenParserState
 	buffer strings.Builder
@ -34,12 +42,14 @@ func (p *Qwen3VLParser) HasToolSupport() bool {
 	return true
 }

+// TODO(gguo): changes this to reference an objects param
 func (p *Qwen3VLParser) HasThinkingSupport() bool {
 	return true
 }

 func (p *Qwen3VLParser) Init(tools []api.Tool, lastMessage *api.Message) []api.Tool {
 	p.tools = tools
+	p.state = p.initialState()
 	return tools
 }

@ -98,7 +108,6 @@ func (p *Qwen3VLParser) parseEvents() []qwenEvent {
 	return all
 }

-// think if a better name
 func emitContentBeforeTag(p *Qwen3VLParser, events []qwenEvent, tag string) []qwenEvent {
 	split := strings.SplitN(p.buffer.String(), tag, 2)
 	before := split[0]
@ -112,49 +121,15 @@ func emitContentBeforeTag(p *Qwen3VLParser, events []qwenEvent, tag string) []qw
 	return events
 }

-// findFirstTag returns the tag that appears first in the buffer among the provided tags.
-// If no tag is found, it returns an empty string.
-func findFirstTag(p *Qwen3VLParser, tags []string) string {
-	minIdx := -1
-	var firstTag string
-	for _, tag := range tags {
-		idx := strings.Index(p.buffer.String(), tag)
-		if idx != -1 && (minIdx == -1 || idx < minIdx) {
-			minIdx = idx
-			firstTag = tag
-		}
-	}
-	if minIdx == -1 {
-		return ""
-	}
-	return firstTag
-}
-
 func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
 	var events []qwenEvent

-	firstTag := findFirstTag(p, []string{thinkingOpenTag, toolOpenTag})
-
 	switch p.state {
 	case CollectingContent:
-		if firstTag == thinkingOpenTag {
-			events = emitContentBeforeTag(p, events, thinkingOpenTag)
-			p.state = CollectingThinkingContent
-			return events, true
-		} else if firstTag == toolOpenTag {
+		if strings.Contains(p.buffer.String(), toolOpenTag) {
 			events = emitContentBeforeTag(p, events, toolOpenTag)
 			p.state = CollectingToolContent
 			return events, true
-		} else if overlapLen := overlap(p.buffer.String(), thinkingOpenTag); overlapLen > 0 {
-			beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
-			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
-			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
-			unambiguous := p.buffer.String()[:ambiguousStart]
-			ambiguous := p.buffer.String()[ambiguousStart:]
-			p.buffer.Reset()
-			p.buffer.WriteString(ambiguous)
-			events = append(events, qwenEventContent{content: unambiguous})
-			return events, false
 		} else if overlapLen := overlap(p.buffer.String(), toolOpenTag); overlapLen > 0 {
 			beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
 			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
@ -164,11 +139,14 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
 			ambiguous := p.buffer.String()[ambiguousStart:]
 			p.buffer.Reset()
 			p.buffer.WriteString(ambiguous)
-			events = append(events, qwenEventContent{content: unambiguous})
+			if len(unambiguous) > 0 { // why does qwen3coder not have this here
+				events = append(events, qwenEventContent{content: unambiguous})
+			}
 			return events, false
 		} else {
 			whitespaceLen := trailingWhitespaceLen(p.buffer.String())
 			ambiguousStart := len(p.buffer.String()) - whitespaceLen
+
 			unambiguous := p.buffer.String()[:ambiguousStart]
 			ambiguous := p.buffer.String()[ambiguousStart:]
 			p.buffer.Reset()
@ -195,21 +173,46 @@ func (p *Qwen3VLParser) eat() ([]qwenEvent, bool) {
 		} else {
 			return events, false
 		}
-	case CollectingThinkingContent:
+	case CollectingThinkingContent: // so we want to hip the unambiguous stuff
 		if strings.Contains(p.buffer.String(), thinkingCloseTag) {
 			split := strings.SplitN(p.buffer.String(), thinkingCloseTag, 2)
-			fmt.Println("split", split)
+			// fmt.Println("split", split)
 			before := split[0]
 			if len(before) == 0 {
 				slog.Warn("qwen tool call closing tag found but no content before it")
 			}
 			after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
-			events = append(events, qwenEventThinkingContent{content: before})
+			if len(before) > 0 {
+				events = append(events, qwenEventThinkingContent{content: before})
+			}
 			p.buffer.Reset()
 			p.buffer.WriteString(after)
 			p.state = CollectingContent
 			return events, true
+		} else if overlapLen := overlap(p.buffer.String(), thinkingCloseTag); overlapLen > 0 { // we see part of a close thinking tag
+			beforePartialTag := p.buffer.String()[:len(p.buffer.String())-overlapLen]
+			trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
+			ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
+
+			unambiguous := p.buffer.String()[:ambiguousStart]
+			ambiguous := p.buffer.String()[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventThinkingContent{content: unambiguous})
+			}
+			return events, false
 		} else {
+			whitespaceLen := trailingWhitespaceLen(p.buffer.String())
+			ambiguousStart := len(p.buffer.String()) - whitespaceLen
+
+			unambiguous := p.buffer.String()[:ambiguousStart]
+			ambiguous := p.buffer.String()[ambiguousStart:]
+			p.buffer.Reset()
+			p.buffer.WriteString(ambiguous)
+			if len(unambiguous) > 0 {
+				events = append(events, qwenEventThinkingContent{content: unambiguous})
+			}
 			return events, false
 		}
 	default:
--- a/model/parsers/qwen3vl_thinking_test.go
+++ b/model/parsers/qwen3vl_thinking_test.go
@ -15,7 +15,7 @@ import (
 // 	return t
 // }

-func TestQwen3VLParserStreaming(t *testing.T) {
+func TestQwen3VLThinkingParserStreaming(t *testing.T) {
 	type step struct {
 		input      string
 		wantEvents []qwenEvent
@ -30,21 +30,33 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 		{
 			desc: "simple thinking",
 			steps: []step{
-				{input: "<thinking>abc</thinking>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+				{input: "abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+			},
+		},
+		{
+			desc: "simple trip thinking",
+			steps: []step{
+				{input: "<think>abc</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "<think>abc"}}},
 			},
 		},
 		{
 			desc: "thinking with split tags",
 			steps: []step{
-				{input: "<thinking>abc", wantEvents: []qwenEvent{}},
-				{input: "</thinking>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+				{input: "abc", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}}},
+				{input: "</think>", wantEvents: []qwenEvent{}},
+			},
+		},
+		{
+			desc: "multiple think tags",
+			steps: []step{
+				{input: "abc<think>actually, is not thinking</think>", wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>actually, is not thinking"}}},
 			},
 		},
 		{
 			desc: "thinking and tool call",
 			steps: []step{
 				{
-					input: "<thinking>I'm thinking</thinking><tool_call>I'm tool calling</tool_call>",
+					input: "I'm thinking</think><tool_call>I'm tool calling</tool_call>",
 					wantEvents: []qwenEvent{
 						qwenEventThinkingContent{content: "I'm thinking"},
 						qwenEventRawToolCall{raw: "I'm tool calling"},
@ -56,7 +68,7 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "thinking and content",
 			steps: []step{
 				{
-					input: "<thinking>I'm thinking</thinking>I'm content",
+					input: "I'm thinking</think>I'm content",
 					wantEvents: []qwenEvent{
 						qwenEventThinkingContent{content: "I'm thinking"},
 						qwenEventContent{content: "I'm content"},
@ -71,10 +83,10 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "nested thinking (outside thinking, inside thinking)",
 			steps: []step{
 				{
-					input: "<thinking>I'm thinking<thinking>I'm nested thinking</thinking></thinking>",
+					input: "I'm thinking<think>I'm nested thinking</think></think>",
 					wantEvents: []qwenEvent{
-						qwenEventThinkingContent{content: "I'm thinking<thinking>I'm nested thinking"},
-						qwenEventContent{content: "</thinking>"},
+						qwenEventThinkingContent{content: "I'm thinking<think>I'm nested thinking"},
+						qwenEventContent{content: "</think>"},
 					},
 				},
 			},
@ -83,10 +95,10 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "interleaved thinking",
 			steps: []step{
 				{
-					input: "<thinking>I'm thinking<thinking></thinking>I'm actually content</thinking>",
+					input: "<think>I'm thinking</think>I'm actually content</think>",
 					wantEvents: []qwenEvent{
-						qwenEventThinkingContent{content: "I'm thinking<thinking>"},
-						qwenEventContent{content: "I'm actually content</thinking>"},
+						qwenEventThinkingContent{content: "<think>I'm thinking"},
+						qwenEventContent{content: "I'm actually content</think>"},
 					},
 				},
 			},
@ -95,7 +107,7 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "nested thinking and tool call (outside thinking, inside tool call)",
 			steps: []step{
 				{
-					input:      "<thinking>I'm thinking<tool_call>I'm nested tool call</tool_call></thinking>",
+					input:      "I'm thinking<tool_call>I'm nested tool call</tool_call></think>",
 					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm nested tool call</tool_call>"}},
 				},
 			},
@ -104,8 +116,11 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "nested thinking and tool call (outside tool call, inside thinking)",
 			steps: []step{
 				{
-					input:      "<tool_call>I'm nested tool call<thinking>I'm thinking</thinking></tool_call>",
-					wantEvents: []qwenEvent{qwenEventRawToolCall{raw: "I'm nested tool call<thinking>I'm thinking</thinking>"}},
+					input: "<tool_call>I'm nested tool call<think>I'm thinking</think></tool_call>",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "<tool_call>I'm nested tool call<think>I'm thinking"},
+						qwenEventContent{content: "</tool_call>"},
+					},
 				},
 			},
 		},
@ -113,12 +128,12 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "interleaved thinking and tool call",
 			steps: []step{
 				{
-					input: "<thinking>I'm thinking<tool_call>I'm NOT a nested tool call</thinking></tool_call><tool_call>I'm nested tool call 2<thinking></tool_call></thinking>",
+					input: "I'm thinking<tool_call>I'm NOT a nested tool call</think></tool_call><tool_call>I'm nested tool call 2<think></tool_call></think>",
 					wantEvents: []qwenEvent{
 						qwenEventThinkingContent{content: "I'm thinking<tool_call>I'm NOT a nested tool call"},
 						qwenEventContent{content: "</tool_call>"},
-						qwenEventRawToolCall{raw: "I'm nested tool call 2<thinking>"},
-						qwenEventContent{content: "</thinking>"},
+						qwenEventRawToolCall{raw: "I'm nested tool call 2<think>"},
+						qwenEventContent{content: "</think>"},
 					},
 				},
 			},
@ -127,16 +142,12 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "partial thinking tag fakeout",
 			steps: []step{
 				{
-					input: "abc<thinking",
-					wantEvents: []qwenEvent{
-						qwenEventContent{content: "abc"},
-					},
+					input:      "abc</think",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc"}},
 				},
 				{
-					input: " fakeout",
-					wantEvents: []qwenEvent{
-						qwenEventContent{content: "<thinking fakeout"},
-					},
+					input:      " fakeout",
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "</think fakeout"}},
 				},
 			},
 		},
@ -144,9 +155,46 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 			desc: "partial thinking incomplete",
 			steps: []step{
 				{
-					input: "abc<thinking>unfinished</thinking", // when something is ambiguious, we dont emit anything
+					input:      "abc<think>unfinished</think", // when something is ambiguious, we dont emit anything
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
+				},
+			},
+		},
+		{
+			desc: "test with split thinking and content",
+			steps: []step{
+				{
+					input:      "abc<think>unfinished</th", // when something is ambiguious, we dont emit anything
+					wantEvents: []qwenEvent{qwenEventThinkingContent{content: "abc<think>unfinished"}},
+				},
+				{
+					input: "ink> def",
 					wantEvents: []qwenEvent{
-						qwenEventContent{content: "abc"},
+						qwenEventContent{content: "def"},
+					},
+				},
+			},
+		},
+		{
+			desc: "thinking with no tags",
+			steps: []step{
+				{
+					input: "Hello I am thinking",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "Hello I am thinking"},
+					},
+				},
+				{
+					input: "Hello I am thinking some more",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "Hello I am thinking some more"},
+					},
+				},
+				{
+					input: "Hello I am think</think>     NOT",
+					wantEvents: []qwenEvent{
+						qwenEventThinkingContent{content: "Hello I am think"},
+						qwenEventContent{content: "NOT"},
 					},
 				},
 			},
@ -184,42 +232,9 @@ func TestQwen3VLParserStreaming(t *testing.T) {
 	}
 }

-func TestQwen3VLComplex(t *testing.T) {
-	type step struct {
-		input      string
-		wantEvents []qwenEvent
-	}
-
-	cases := []struct {
-		desc  string
-		steps []step
-		only  bool
-	}{
-		{
-			desc: "simple tool call",
-			steps: []step{
-				{
-					input:      "Here are 30 distinct and popular emojis for you! 😊\n\n1. 😂  \n2. ❤️  \n3. 🌟  \n4. 🐶  \n5. 🍕  \n6. ✨  \n7. 🌈  \n8. 🎉  \n9. 🌎  \n10. 🦁  \n11. 💯  \n12. 🥰  \n13. 🌸  \n14. 🚀  \n15. 🌊  \n16. 🍦  \n17. 🌙  \n18. 🌞  \n19. 🌻  \n20. 🦋  \n21. 🍃  \n22. 🏆  \n23. 🌮  \n24. 🧸  \n25. 🎮  \n26. 📚  \n27. ✈️  \n28. 🌟 (sparkles)  \n29. 🌈 (rainbow)  \n30. 🥳  \n\n*Bonus fun fact:* The 😂 (Face with Tears of Joy) was Oxford Dictionaries' Word of the Year in 2015! 🎉  \nLet me know if you'd like themed emojis (e.g., animals, food, or emotions)! 🐱🍕📚",
-					wantEvents: []qwenEvent{qwenEventContent{content: "bruh"}},
-				},
-			},
-		},
-	}
-	for _, tc := range cases {
-		for i, step := range tc.steps {
-			parser := Qwen3VLParser{}
-			parser.buffer.WriteString(step.input)
-			gotEvents := parser.parseEvents()
-			if !reflect.DeepEqual(gotEvents, step.wantEvents) {
-				t.Errorf("step %d: input %q: got events %#v, want %#v", i, step.input, gotEvents, step.wantEvents)
-			}
-		}
-	}
-}
-
 // TODO: devin was saying something about json cant figure out types?
 // do we need to test for
-func TestQwen3VLToolParser(t *testing.T) {
+func TestQwen3VLThinkingToolParser(t *testing.T) {
 	type step struct {
 		name         string
 		rawToolCall  string
--- a/model/renderers/qwen3coder.go
+++ b/model/renderers/qwen3coder.go
@ -55,7 +55,12 @@ func renderAdditionalKeys(obj any, handledKeys map[string]bool) string {
 	return sb.String()
 }

-func Qwen3CoderRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
+type Qwen3CoderRenderer struct {
+	isThinking bool
+}
+
+func (r *Qwen3CoderRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
+	// func Qwen3CoderRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
 	var sb strings.Builder

 	// filter out system messages and choose the first (if any) to win
--- a/model/renderers/qwen3coder_test.go
+++ b/model/renderers/qwen3coder_test.go
@ -288,7 +288,8 @@ call tool<|im_end|>
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			rendered, err := Qwen3CoderRenderer(tt.msgs, tt.tools, nil)
+			// rendered, err := Qwen3CoderRenderer(tt.msgs, tt.tools, nil)
+			rendered, err := (&Qwen3CoderRenderer{false}).Render(tt.msgs, tt.tools, nil)
 			if err != nil {
 				t.Fatal(err)
 			}
--- a/model/renderers/qwen3vl.go
+++ b/model/renderers/qwen3vl.go
@ -49,7 +49,12 @@ func marshalWithSpaces(v any) ([]byte, error) {
 	return out, nil
 }

-func renderContent(content api.Message, doVisionCount bool) string {
+type Qwen3VLRenderer struct {
+	isThinking bool
+}
+
+// func renderContent(content api.Message, doVisionCount bool) string {
+func (r *Qwen3VLRenderer) renderContent(content api.Message, doVisionCount bool) string {
 	// This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
 	var subSb strings.Builder
 	for _ = range content.Images {
@ -64,8 +69,10 @@ func renderContent(content api.Message, doVisionCount bool) string {
 	return subSb.String()
 }

-func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
+// func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
+func (r *Qwen3VLRenderer) Render(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
 	var sb strings.Builder
+	// r.isThinking = false

 	if len(tools) > 0 {
 		sb.WriteString(imStartTag + "system\n")
@ -99,29 +106,35 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
 	}

 	for i, message := range messages {
-		content := renderContent(message, true)
+		content := r.renderContent(message, true)

 		if message.Role == "user" || message.Role == "system" && i != 0 {
 			sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
 		} else if message.Role == "assistant" {
 			contentReasoning := ""
-			if message.Thinking != "" {
-				contentReasoning = message.Thinking
-			} else if strings.Contains(content, "</think>") {
-				contentReasoning = strings.Split(content, "</think>")[0]
-				contentReasoning = strings.TrimRight(contentReasoning, "\n")

-				contentReasoningSplit := strings.Split(contentReasoning, "<think>")
-				contentReasoning = contentReasoningSplit[len(contentReasoningSplit)-1]
+			// here we need to reconstruct
+			if r.isThinking { // we only do this if its a thinking model (i.e contentReasoning != "" if its a thinking model)
+				if message.Thinking != "" {
+					contentReasoning = message.Thinking
+				} else if strings.Contains(content, "</think>") {
+					contentReasoning = strings.Split(content, "</think>")[0]
+					contentReasoning = strings.TrimRight(contentReasoning, "\n")

-				contentReasoning = strings.TrimLeft(contentReasoning, "\n")
+					contentReasoningSplit := strings.Split(contentReasoning, "<think>")
+					contentReasoning = contentReasoningSplit[len(contentReasoningSplit)-1]

-				contentSplit := strings.Split(content, "</think>")
-				content = contentSplit[len(contentSplit)-1]
-				content = strings.TrimLeft(content, "\n")
+					contentReasoning = strings.TrimLeft(contentReasoning, "\n")
+
+					contentSplit := strings.Split(content, "</think>")
+					content = contentSplit[len(contentSplit)-1]
+					content = strings.TrimLeft(content, "\n")
+				}
 			}
+			// reconstruct the content

-			if i > lastQueryIndex {
+			// isThinking && i > lastQueryIndex
+			if r.isThinking && i > lastQueryIndex { // if it is a thinking model
 				if i == len(messages)-1 || contentReasoning != "" {
 					sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n") + "\n</think>\n\n" + strings.TrimLeft(content, "\n"))
 				} else {
@ -158,6 +171,10 @@ func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue
 	}

 	sb.WriteString("<|im_start|>assistant\n")
+	if r.isThinking {
+		sb.WriteString("<think>\n") // Thinking models end with <|im_start|>assistant\n<think>\n
+	}
+
 	return sb.String(), nil

 }
--- a/model/renderers/qwen3vl_nonthinking_test.go
+++ b/model/renderers/qwen3vl_nonthinking_test.go
@ -0,0 +1,311 @@
+package renderers
+
+import (
+	"testing"
+
+	"github.com/google/go-cmp/cmp"
+	"github.com/ollama/ollama/api"
+)
+
+func TestQwen3VLNonThinkingRenderer(t *testing.T) {
+	tests := []struct {
+		name     string
+		msgs     []api.Message
+		images   []api.ImageData
+		tools    []api.Tool
+		expected string
+	}{
+		{
+			name: "basic",
+			msgs: []api.Message{
+				{Role: "system", Content: "You are a helpful assistant."},
+				{Role: "user", Content: "Hello, how are you?"},
+			},
+			expected: `<|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+Hello, how are you?<|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{ // C
+			name: "With thinking, end assistant.",
+			msgs: []api.Message{
+				// {Role: "system", Content: "You are a helpful assistant."},
+				{Role: "user", Content: "Tell me a story in two sentences."},
+				{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think>"}, // does the thinking even work?
+			},
+			expected: `<|im_start|>user
+Tell me a story in two sentences.<|im_end|>
+<|im_start|>assistant
+abc<think>To make this story interesting, I will speak in poetry.</think><|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{ // C
+			name: "Multiple thinking",
+			msgs: []api.Message{
+				{Role: "user", Content: "Tell me a story in two sentences."},
+				{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
+			},
+			expected: `<|im_start|>user
+Tell me a story in two sentences.<|im_end|>
+<|im_start|>assistant
+abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
+<|im_start|>assistant
+`, // the second thinking tag is not captured
+		},
+		{ // C
+			name: "Multiple thinking, multiple messages.",
+			msgs: []api.Message{
+				{Role: "user", Content: "Tell me a story in two sentences."},
+				{Role: "assistant", Content: "abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think>"},
+				{Role: "user", Content: "What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think>"},
+				{Role: "assistant", Content: "I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think>"},
+			},
+			expected: `<|im_start|>user
+Tell me a story in two sentences.<|im_end|>
+<|im_start|>assistant
+abc<think>To make this story interesting, I will speak in poetry.</think><think>And I will speak in poetry after the first sentence.</think><|im_end|>
+<|im_start|>user
+What is the weather like in San Francisco? <think>I will check the weather in San Francisco for you.</think><|im_end|>
+<|im_start|>assistant
+I'll check the weather in San Francisco for you.<think>Speak poetry after the first sentence.</think><think>Speak poetry after the second sentence.</think><|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{
+			name: "Image",
+			msgs: []api.Message{ // i think this is because it does not go through the renderer?
+				{Role: "user", Content: "Describe this image.", Images: []api.ImageData{api.ImageData(IMAGE2_BASE64)}}, // does this work?
+			}, // this is actually a local test, remote model may need to be different
+			expected: `<|im_start|>user
+[img-0]Describe this image.<|im_end|>
+<|im_start|>assistant
+`,
+		}, // there's no way to do videos?
+		{
+			name: "Multiple images",
+			msgs: []api.Message{
+				{Role: "user", Content: "Describe these images.", Images: []api.ImageData{api.ImageData(IMAGE1_BASE64), api.ImageData(IMAGE2_BASE64)}},
+			},
+			expected: `<|im_start|>user
+[img-0][img-1]Describe these images.<|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{
+			name: "with tools and response",
+			msgs: []api.Message{
+				{Role: "system", Content: "You are a helpful assistant with access to tools."},
+				{Role: "user", Content: "What's the weather like in New York?"},
+				{
+					Role:    "assistant",
+					Content: "I'll check the weather in New York for you.",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "get-current-weather",
+								Arguments: map[string]any{
+									"location": "New York",
+									"unit":     "fahrenheit",
+								},
+							},
+						},
+					},
+				},
+				{Role: "tool", Content: "80", ToolName: "get-current-weather"},
+				{Role: "user", Content: "That sounds nice! What about San Francisco?"},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "get-current-weather",
+						Description: "Get the current weather for a location",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"location"},
+							Properties: map[string]api.ToolProperty{
+								"location": {
+									Type:        api.PropertyType{"string"},
+									Description: "The city and state, e.g. San Francisco, CA",
+								},
+								"unit": {
+									Type:        api.PropertyType{"string"},
+									Enum:        []any{"celsius", "fahrenheit"},
+									Description: "The temperature unit",
+								},
+							},
+						},
+					},
+				},
+			},
+			expected: `<|im_start|>system
+You are a helpful assistant with access to tools.
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "get-current-weather", "description": "Get the current weather for a location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit"}}, "required": ["location"]}}}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+What's the weather like in New York?<|im_end|>
+<|im_start|>assistant
+I'll check the weather in New York for you.
+<tool_call>
+{"name": "get-current-weather", "arguments": {"location": "New York", "unit": "fahrenheit"}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+80
+</tool_response><|im_end|>
+<|im_start|>user
+That sounds nice! What about San Francisco?<|im_end|>
+<|im_start|>assistant
+`,
+		},
+		{
+			name: "With tools and response, multiple tool calls",
+			msgs: []api.Message{
+				{
+					Role:    "system",
+					Content: "You are a helpful assistant with access to tools.",
+				},
+				{
+					Role:    "user",
+					Content: "Call two tools for me: add and multiply.",
+				},
+				{
+					Role:    "assistant",
+					Content: "Sure, I'll call both tools for you.",
+					ToolCalls: []api.ToolCall{
+						{
+							Function: api.ToolCallFunction{
+								Name: "add",
+								Arguments: map[string]any{
+									"a": 2,
+									"b": 3,
+								},
+							},
+						},
+						{
+							Function: api.ToolCallFunction{
+								Name: "multiply",
+								Arguments: map[string]any{
+									"x": 4,
+									"y": 5,
+								},
+							},
+						},
+					},
+				},
+				{
+					Role:     "tool",
+					Content:  "5",
+					ToolName: "add",
+				},
+				{
+					Role:     "tool",
+					Content:  "20",
+					ToolName: "multiply",
+				},
+				{
+					Role:    "user",
+					Content: "Thanks! What are the results?",
+				},
+			},
+			tools: []api.Tool{
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "add",
+						Description: "Add two numbers",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"a", "b"},
+							Properties: map[string]api.ToolProperty{
+								"a": {Type: api.PropertyType{"integer"}, Description: "First number"},
+								"b": {Type: api.PropertyType{"integer"}, Description: "Second number"},
+							},
+						},
+					},
+				},
+				{
+					Type: "function",
+					Function: api.ToolFunction{
+						Name:        "multiply",
+						Description: "Multiply two numbers",
+						Parameters: api.ToolFunctionParameters{
+							Type:     "object",
+							Required: []string{"x", "y"},
+							Properties: map[string]api.ToolProperty{
+								"x": {Type: api.PropertyType{"integer"}, Description: "First factor"},
+								"y": {Type: api.PropertyType{"integer"}, Description: "Second factor"},
+							},
+						},
+					},
+				},
+			},
+			expected: `<|im_start|>system
+You are a helpful assistant with access to tools.
+
+# Tools
+
+You may call one or more functions to assist with the user query.
+
+You are provided with function signatures within <tools></tools> XML tags:
+<tools>
+{"type": "function", "function": {"name": "add", "description": "Add two numbers", "parameters": {"type": "object", "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, "required": ["a", "b"]}}}
+{"type": "function", "function": {"name": "multiply", "description": "Multiply two numbers", "parameters": {"type": "object", "properties": {"x": {"description": "First factor"}, "y": {"description": "Second factor"}}, "required": ["x", "y"]}}}
+</tools>
+
+For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
+<tool_call>
+{"name": <function-name>, "arguments": <args-json-object>}
+</tool_call><|im_end|>
+<|im_start|>user
+Call two tools for me: add and multiply.<|im_end|>
+<|im_start|>assistant
+Sure, I'll call both tools for you.
+<tool_call>
+{"name": "add", "arguments": {"a": 2, "b": 3}}
+</tool_call>
+<tool_call>
+{"name": "multiply", "arguments": {"x": 4, "y": 5}}
+</tool_call><|im_end|>
+<|im_start|>user
+<tool_response>
+5
+</tool_response>
+<tool_response>
+20
+</tool_response><|im_end|>
+<|im_start|>user
+Thanks! What are the results?<|im_end|>
+<|im_start|>assistant
+`,
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
+			// renderer := RendererForName("qwen3-vl")
+			rendered, err := (&Qwen3VLRenderer{false}).Render(tt.msgs, tt.tools, nil)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if diff := cmp.Diff(rendered, tt.expected); diff != "" {
+				t.Errorf("mismatch (-got +want):\n%s", diff)
+			}
+		})
+	}
+}
--- a/model/renderers/qwen3vl_test.go
+++ b/model/renderers/qwen3vl_test.go
@ -17,7 +17,7 @@ var IMAGE2_BASE64 = "iVBORw0KGgoAAAANSUhEUgAAAQAAAAEACAIAAADTED8xAAADMElEQVR4nOz
 // - [ ] set descriptions to omitempty?
 // - [] images add the auto tag

-func TestQwen3VLRenderer(t *testing.T) {
+func TestQwen3VLThinkingRenderer(t *testing.T) {
 	tests := []struct {
 		name     string
 		msgs     []api.Message
@ -327,7 +327,8 @@ Thanks! What are the results?<|im_end|>
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
+			// rendered, err := Qwen3VLRenderer(tt.msgs, tt.tools, nil)
+			rendered, err := (&Qwen3VLRenderer{true}).Render(tt.msgs, tt.tools, nil)
 			if err != nil {
 				t.Fatal(err)
 			}
--- a/model/renderers/qwenvl-thinking.txt
+++ b/model/renderers/qwenvl-thinking.txt
@ -0,0 +1,163 @@
+// package renderers
+
+// import (
+// 	"encoding/json"
+// 	"strings"
+
+// 	"github.com/ollama/ollama/api"
+// )
+
+// var imageCount int
+// var videoCount int
+
+// func marshalWithSpaces(v any) ([]byte, error) {
+// 	b, err := json.Marshal(v)
+// 	if err != nil {
+// 		return nil, err
+// 	}
+
+// 	out := make([]byte, 0, len(b)+len(b)/8)
+// 	inStr, esc := false, false
+// 	for _, c := range b {
+// 		if inStr {
+// 			out = append(out, c)
+// 			if esc {
+// 				esc = false
+// 				continue
+// 			}
+// 			if c == '\\' {
+// 				esc = true
+// 				continue
+// 			}
+// 			if c == '"' {
+// 				inStr = false
+// 			}
+// 			continue
+// 		}
+// 		switch c {
+// 		case '"':
+// 			inStr = true
+// 			out = append(out, c)
+// 		case ':':
+// 			out = append(out, ':', ' ')
+// 		case ',':
+// 			out = append(out, ',', ' ')
+// 		default:
+// 			out = append(out, c)
+// 		}
+// 	}
+// 	return out, nil
+// }
+
+// func renderContent(content api.Message, doVisionCount bool) string {
+// 	// This assumes all images are at the front of the message - same assumption as ollama/ollama/runner.go
+// 	var subSb strings.Builder
+// 	for _ = range content.Images {
+// 		if doVisionCount {
+// 			imageCount++
+// 		}
+// 		subSb.WriteString("<|vision_start|><|image_pad|><|vision_end|>")
+// 	}
+// 	// TODO: support videos
+
+// 	subSb.WriteString(content.Content)
+// 	return subSb.String()
+// }
+
+// func Qwen3VLRenderer(messages []api.Message, tools []api.Tool, _ *api.ThinkValue) (string, error) {
+// 	var sb strings.Builder
+
+// 	if len(tools) > 0 {
+// 		sb.WriteString(imStartTag + "system\n")
+// 		if len(messages) > 0 && messages[0].Role == "system" {
+// 			sb.WriteString(messages[0].Content + "\n\n")
+// 		}
+// 		sb.WriteString("# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>")
+// 		for _, tool := range tools {
+// 			sb.WriteString("\n")
+// 			if b, err := marshalWithSpaces(tool); err == nil {
+// 				sb.Write(b)
+// 			}
+// 		}
+// 		sb.WriteString("\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n")
+// 	} else if len(messages) > 0 && messages[0].Role == "system" {
+// 		sb.WriteString("<|im_start|>system\n" + messages[0].Content + "<|im_end|>\n")
+// 	}
+// 	multiStepTool := true
+// 	lastQueryIndex := len(messages) - 1
+
+// 	for i := len(messages) - 1; i >= 0; i-- {
+// 		message := messages[i]
+// 		if multiStepTool && message.Role == "user" {
+// 			// Check if content starts with <tool_response> and ends with </tool_response>
+// 			content := message.Content
+// 			if !(strings.HasPrefix(content, "<tool_response>") && strings.HasSuffix(content, "</tool_response>")) {
+// 				multiStepTool = false
+// 				lastQueryIndex = i
+// 			}
+// 		}
+// 	}
+
+// 	for i, message := range messages {
+// 		content := renderContent(message, true)
+
+// 		if message.Role == "user" || message.Role == "system" && i != 0 {
+// 			sb.WriteString("<|im_start|>" + message.Role + "\n" + content + "<|im_end|>\n")
+// 		} else if message.Role == "assistant" {
+// 			contentReasoning := ""
+// 			if message.Thinking != "" {
+// 				contentReasoning = message.Thinking
+// 			} else if strings.Contains(content, "</think>") {
+// 				contentReasoning = strings.Split(content, "</think>")[0]
+// 				contentReasoning = strings.TrimRight(contentReasoning, "\n")
+
+// 				contentReasoningSplit := strings.Split(contentReasoning, "<think>")
+// 				contentReasoning = contentReasoningSplit[len(contentReasoningSplit)-1]
+
+// 				contentReasoning = strings.TrimLeft(contentReasoning, "\n")
+
+// 				contentSplit := strings.Split(content, "</think>")
+// 				content = contentSplit[len(contentSplit)-1]
+// 				content = strings.TrimLeft(content, "\n")
+// 			}
+
+// 			if i > lastQueryIndex {
+// 				if i == len(messages)-1 || contentReasoning != "" {
+// 					sb.WriteString("<|im_start|>" + message.Role + "\n<think>\n" + strings.Trim(contentReasoning, "\n") + "\n</think>\n\n" + strings.TrimLeft(content, "\n"))
+// 				} else {
+// 					sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
+// 				}
+// 			} else {
+// 				sb.WriteString("<|im_start|>" + message.Role + "\n" + content)
+// 			}
+
+// 			if len(message.ToolCalls) > 0 {
+// 				for j, toolCall := range message.ToolCalls {
+// 					if j > 0 || content != "" {
+// 						sb.WriteString("\n")
+// 					}
+
+// 					sb.WriteString("<tool_call>\n{\"name\": \"" + toolCall.Function.Name + "\", \"arguments\": ")
+// 					if b, err := marshalWithSpaces(toolCall.Function.Arguments); err == nil {
+// 						sb.Write(b)
+// 					}
+// 					sb.WriteString("}\n</tool_call>")
+// 				}
+// 			}
+// 			sb.WriteString("<|im_end|>\n")
+// 		} else if message.Role == "tool" {
+// 			if i == 0 || messages[i-1].Role != "tool" {
+// 				sb.WriteString("<|im_start|>user")
+// 			}
+// 			sb.WriteString("\n<tool_response>\n" + message.Content + "\n</tool_response>")
+// 			if i == len(messages)-1 || messages[i+1].Role != "tool" {
+// 				sb.WriteString("<|im_end|>\n")
+// 			}
+// 		}
+
+// 	}
+
+// 	sb.WriteString("<|im_start|>assistant<think>\n")
+// 	return sb.String(), nil
+
+// }
--- a/model/renderers/renderer.go
+++ b/model/renderers/renderer.go
@ -1,27 +1,29 @@
 package renderers

-import (
-	"fmt"
+import "github.com/ollama/ollama/api"

-	"github.com/ollama/ollama/api"
-)
+// type rendererFunc func([]api.Message, []api.Tool, *api.ThinkValue) (string, error)

-type rendererFunc func([]api.Message, []api.Tool, *api.ThinkValue) (string, error)
+// func RenderWithRenderer(name string, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
+// 	renderer := rendererForName(name)
+// 	if renderer == nil {
+// 		return "", fmt.Errorf("unknown renderer %q", name)
+// 	}
+// 	return renderer(msgs, tools, think)
+// }

-func RenderWithRenderer(name string, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
-	renderer := rendererForName(name)
-	if renderer == nil {
-		return "", fmt.Errorf("unknown renderer %q", name)
-	}
-	return renderer(msgs, tools, think)
+type Renderer interface {
+	Render(messages []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error)
 }

-func rendererForName(name string) rendererFunc {
+func RendererForName(name string) Renderer {
 	switch name {
 	case "qwen3-coder":
-		return Qwen3CoderRenderer
+		renderer := &Qwen3CoderRenderer{false} // this is not implemented yet
+		return renderer
 	case "qwen3-vl":
-		return Qwen3VLRenderer
+		renderer := &Qwen3VLRenderer{false} // not a thinking model?
+		return renderer
 	default:
 		return nil
 	}
--- a/server/prompt.go
+++ b/server/prompt.go
@ -106,7 +106,9 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.

 func renderPrompt(m *Model, msgs []api.Message, tools []api.Tool, think *api.ThinkValue) (string, error) {
 	if m.Config.Renderer != "" {
-		rendered, err := renderers.RenderWithRenderer(m.Config.Renderer, msgs, tools, think)
+		// rendered, err := renderers.RenderWithRenderer(m.Config.Renderer, msgs, tools, think)
+		renderer := renderers.RendererForName(m.Config.Renderer)
+		rendered, err := renderer.Render(msgs, tools, think)
 		if err != nil {
 			return "", err
 		}
Author	SHA1	Message	Date
Grace Guo	de3e0e7d3c	clean up, but no longer working with tool calls?	2025-10-07 16:11:47 -07:00
Grace Guo	d1f3145a74	Working parser for thinking models - assumes state of thinking, emits unambiguous content in thinking, does not call tool call in thinking	2025-10-07 15:35:33 -07:00
Grace Guo	ef84ad9440	working renderer with tests	2025-10-07 13:48:54 -07:00
Grace Guo	ec46dc0660	First iteration of non thinking qwen3 renderer	2025-10-07 11:56:10 -07:00