ollama/model/parsers/qwen3coder.go

411 lines
12 KiB
Go

package parsers
import (
"context"
"encoding/json"
"encoding/xml"
"fmt"
"log/slog"
"math"
"regexp"
"strconv"
"strings"
"unicode"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/logutil"
)
type qwenParserState int
const (
toolOpenTag = "<tool_call>"
toolCloseTag = "</tool_call>"
)
const (
qwenParserState_LookingForToolStart qwenParserState = iota
qwenParserState_CollectingToolContent
)
type Qwen3CoderParser struct {
state qwenParserState
acc strings.Builder
}
func (p *Qwen3CoderParser) HasToolSupport() bool {
return true
}
func (p *Qwen3CoderParser) HasThinkingSupport() bool {
return false
}
func (p *Qwen3CoderParser) Add(s string, tools []api.Tool) (content string, thinking string, calls []api.ToolCall, err error) {
p.acc.WriteString(s)
events := p.parseEvents()
var toolCalls []api.ToolCall
var sb strings.Builder
for _, event := range events {
switch event := event.(type) {
case qwenEventRawToolCall:
toolCall, err := parseToolCall(event, tools)
if err != nil {
slog.Warn("qwen tool call parsing failed", "error", err)
return "", "", nil, err
}
toolCalls = append(toolCalls, toolCall)
case qwenEventContent:
// TODO(drifkin): if the same turn contains multiple interleaved content
// events, we naively append them together here. See the note below about
// `qwenEvent`s for more details
sb.WriteString(event.content)
}
}
return sb.String(), "", toolCalls, nil
}
func (p *Qwen3CoderParser) parseEvents() []qwenEvent {
var all []qwenEvent
keepLooping := true
for keepLooping {
var events []qwenEvent
events, keepLooping = eat(p)
if len(events) > 0 {
all = append(all, events...)
}
}
if len(all) > 0 {
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "acc", p.acc.String())
}
return all
}
// we use some internal event types in order to communicate between `Add` and
// `eat`. We do this to support interleaving content and parallel tool calls in
// the parser, even though qwen3-coder isn't supposed to do this. Our API
// doesn't currently support models outputting multiple messages in a turn, so
// we wouldn't be able to represent it yet, but there's no reason to prevent the
// parser from supporting it, especially for future models if they end up using
// a similar format.
type qwenEvent interface {
isQwenEvent()
}
type qwenEventRawToolCall struct {
raw string
}
type qwenEventContent struct {
content string
}
func (qwenEventContent) isQwenEvent() {}
func (qwenEventRawToolCall) isQwenEvent() {}
// eat consumes the parser's buffer, and returns a list of any unambiguous
// events from the current parser state. If the parser transitions to another
// state, it may have additional events to emit on the next call, which is what
// the second return value indicates
func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
var events []qwenEvent
switch p.state {
case qwenParserState_LookingForToolStart:
if strings.Contains(p.acc.String(), toolOpenTag) {
// we found a full tool open tag, so we can emit the content before the
// tag, being sure to trim any trailing whitespace
split := strings.SplitN(p.acc.String(), toolOpenTag, 2)
before := split[0]
before = strings.TrimRightFunc(before, unicode.IsSpace)
if len(before) > 0 {
events = append(events, qwenEventContent{content: before})
}
after := split[1]
p.acc.Reset()
p.acc.WriteString(after)
p.state = qwenParserState_CollectingToolContent
return events, true
} else if overlap := overlap(p.acc.String(), toolOpenTag); overlap > 0 {
// we found a partial tool open tag, so we can emit the unambiguous part,
// which is the (trailing-whitespace trimmed) content before the partial
// tool open tag
beforePartialTag := p.acc.String()[:len(p.acc.String())-overlap]
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
unambiguous := p.acc.String()[:ambiguousStart]
ambiguous := p.acc.String()[ambiguousStart:]
p.acc.Reset()
p.acc.WriteString(ambiguous)
events = append(events, qwenEventContent{content: unambiguous})
return events, false
} else {
// we found content that is entirely not a tool call. We should withhold
// any trailing whitespace in case this is the end of the content
whitespaceLen := trailingWhitespaceLen(p.acc.String())
ambiguousStart := len(p.acc.String()) - whitespaceLen
unambiguous := p.acc.String()[:ambiguousStart]
ambiguous := p.acc.String()[ambiguousStart:]
p.acc.Reset()
p.acc.WriteString(ambiguous)
if len(unambiguous) > 0 {
events = append(events, qwenEventContent{content: unambiguous})
}
return events, false
}
case qwenParserState_CollectingToolContent:
if strings.Contains(p.acc.String(), toolCloseTag) {
split := strings.SplitN(p.acc.String(), toolCloseTag, 2)
before := split[0]
if len(before) == 0 {
slog.Warn("qwen tool call closing tag found but no content before it")
}
// remove any whitespace between the tool call and any content after it
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
p.acc.Reset()
p.acc.WriteString(after)
events = append(events, qwenEventRawToolCall{raw: before})
p.state = qwenParserState_LookingForToolStart
return events, true
} else {
// note that we don't need to check the overlap here because we only plan
// on parsing the tool call once we see the full closing tag. We don't
// stream back the unparsed tool content, so there's no need to be eager
// here
return events, false
}
default:
panic("unreachable")
}
}
// TODO(drifkin): move this to a shared location
// longest overlap between suffix of s and prefix of delim
func overlap(s, delim string) int {
max := min(len(delim), len(s))
for i := max; i > 0; i-- {
if strings.HasSuffix(s, delim[:i]) {
return i
}
}
return 0
}
func trailingWhitespaceLen(s string) int {
for i := len(s) - 1; i >= 0; i-- {
if !unicode.IsSpace(rune(s[i])) {
return len(s) - i - 1
}
}
return len(s)
}
type XMLFunctionCall struct {
XMLName xml.Name `xml:"function"`
Name string `xml:"name,attr"`
Parameters []XMLParameter `xml:"parameter"`
}
type XMLParameter struct {
Name string `xml:"name,attr"`
Value string `xml:",chardata"`
}
// parseToolCall parses a raw tool call string into an api.ToolCall.
// The raw string follows an xml-like format, here's an example:
//
// <function=get_current_temperature>
// <parameter=location>
// San Francisco
// </parameter>
// <parameter=unit>
// celsius
// </parameter>
// </function>
func parseToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
toolCall := api.ToolCall{}
xmlString := transformToXML(raw.raw)
var functionCall XMLFunctionCall
err := xml.Unmarshal([]byte(xmlString), &functionCall)
if err != nil {
return api.ToolCall{}, err
}
toolCall.Function = api.ToolCallFunction{
Name: functionCall.Name,
}
// Find the matching tool to get parameter types
var matchedTool *api.Tool
for i := range tools {
if tools[i].Function.Name == functionCall.Name {
matchedTool = &tools[i]
break
}
}
toolCall.Function.Arguments = make(api.ToolCallFunctionArguments)
for _, parameter := range functionCall.Parameters {
// Look up the parameter type if we found the tool
var paramType api.PropertyType
if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil {
if prop, ok := matchedTool.Function.Parameters.Properties[parameter.Name]; ok {
paramType = prop.Type
}
}
toolCall.Function.Arguments[parameter.Name] = parseValue(parameter.Value, paramType)
}
return toolCall, nil
}
// parseValue converts a raw string value to the appropriate type based on the parameter type specification.
//
// For union types (multiple types in PropertyType, which we support but doesn't
// seem as though the reference parser does type coercion with those types in
// mind) we use a type precedence approach:
// 1. null - checked first regardless of declared types (matches reference implementation)
// 2. boolean - only "true"/"false" are valid booleans
// 3. integer - must parse as a whole number
// 4. number - must parse as numeric (returns int if no decimal part)
// 5. array - must parse as valid JSON array
// 6. object - must parse as valid JSON object
// 7. string - always succeeds (least specific type)
//
// This precedence ensures we return the most specific type that successfully parses,
// following the principle of least surprise. For example, with PropertyType{"string", "number"},
// "123" becomes 123 (number), while "hello" becomes "hello" (string).
func parseValue(raw string, paramType api.PropertyType) any {
// first remove a single leading newlines, and a single trailing newline (if
// they exist). This follows the reference implementation
raw = strings.TrimPrefix(raw, "\n")
raw = strings.TrimSuffix(raw, "\n")
// Check for null first (case-insensitive) - this takes precedence over any type
if strings.ToLower(raw) == "null" {
return nil
}
// If no type is specified, default to string
if len(paramType) == 0 {
return raw
}
// Check if any of the specified types match, using type precedence
// Order: boolean -> integer -> number -> array -> object -> string
typeSet := make(map[string]bool)
for _, t := range paramType {
typeSet[t] = true
}
// Try boolean first (most restrictive)
if typeSet["boolean"] {
lower := strings.ToLower(raw)
switch lower {
case "true":
return true
case "false":
return false
}
// If not a valid boolean but boolean is the only type, return false (matching reference)
if len(paramType) == 1 {
return false
}
// Otherwise try other types
}
// Try integer
if typeSet["integer"] {
if i, err := strconv.ParseInt(raw, 10, 64); err == nil {
// Return as int if it fits in int32, otherwise int64
if i >= math.MinInt32 && i <= math.MaxInt32 {
return int(i)
}
return i
}
// If integer is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// Try number (float)
if typeSet["number"] {
if f, err := strconv.ParseFloat(raw, 64); err == nil {
// If the number has no decimal part, return as int (matching reference)
if f == math.Trunc(f) {
i := int64(f)
if i >= math.MinInt32 && i <= math.MaxInt32 {
return int(i)
}
return i
}
return f
}
// If number is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// Try array
if typeSet["array"] {
var arr []interface{}
if err := json.Unmarshal([]byte(raw), &arr); err == nil {
return arr
}
// If array is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// Try object
if typeSet["object"] {
var obj map[string]interface{}
if err := json.Unmarshal([]byte(raw), &obj); err == nil {
return obj
}
// If object is the only type and parsing failed, fall back to string
if len(paramType) == 1 {
return raw
}
}
// String always succeeds (or if "string" is in the type set)
if typeSet["string"] {
return raw
}
// If we get here, none of the types matched and string wasn't an option
// We return string as a fallback. The reference implementation will attempt
// to parse the value as a python literal, but we purposefully don't support
// that
return raw
}
var qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`)
// transformToXML transforms a raw qwen tool call with xml-like tags into valid
// xml so that it can be parsed by any xml parser
func transformToXML(raw string) string {
// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
// care to properly escape the string that becomes the attribute value
return qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
groups := qwenTagRegex.FindStringSubmatch(match)
tag := groups[1]
var escapedValue strings.Builder
xml.EscapeText(&escapedValue, []byte(groups[2]))
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
})
}