mirror of https://github.com/ollama/ollama.git
411 lines
12 KiB
Go
411 lines
12 KiB
Go
package parsers
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"encoding/xml"
|
|
"fmt"
|
|
"log/slog"
|
|
"math"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
|
|
"github.com/ollama/ollama/api"
|
|
"github.com/ollama/ollama/logutil"
|
|
)
|
|
|
|
type qwenParserState int
|
|
|
|
const (
|
|
toolOpenTag = "<tool_call>"
|
|
toolCloseTag = "</tool_call>"
|
|
)
|
|
|
|
const (
|
|
qwenParserState_LookingForToolStart qwenParserState = iota
|
|
qwenParserState_CollectingToolContent
|
|
)
|
|
|
|
type Qwen3CoderParser struct {
|
|
state qwenParserState
|
|
acc strings.Builder
|
|
}
|
|
|
|
func (p *Qwen3CoderParser) HasToolSupport() bool {
|
|
return true
|
|
}
|
|
|
|
func (p *Qwen3CoderParser) HasThinkingSupport() bool {
|
|
return false
|
|
}
|
|
|
|
func (p *Qwen3CoderParser) Add(s string, tools []api.Tool) (content string, thinking string, calls []api.ToolCall, err error) {
|
|
p.acc.WriteString(s)
|
|
|
|
events := p.parseEvents()
|
|
|
|
var toolCalls []api.ToolCall
|
|
var sb strings.Builder
|
|
for _, event := range events {
|
|
switch event := event.(type) {
|
|
case qwenEventRawToolCall:
|
|
toolCall, err := parseToolCall(event, tools)
|
|
if err != nil {
|
|
slog.Warn("qwen tool call parsing failed", "error", err)
|
|
return "", "", nil, err
|
|
}
|
|
toolCalls = append(toolCalls, toolCall)
|
|
case qwenEventContent:
|
|
// TODO(drifkin): if the same turn contains multiple interleaved content
|
|
// events, we naively append them together here. See the note below about
|
|
// `qwenEvent`s for more details
|
|
sb.WriteString(event.content)
|
|
}
|
|
}
|
|
|
|
return sb.String(), "", toolCalls, nil
|
|
}
|
|
|
|
func (p *Qwen3CoderParser) parseEvents() []qwenEvent {
|
|
var all []qwenEvent
|
|
|
|
keepLooping := true
|
|
for keepLooping {
|
|
var events []qwenEvent
|
|
events, keepLooping = eat(p)
|
|
if len(events) > 0 {
|
|
all = append(all, events...)
|
|
}
|
|
}
|
|
|
|
if len(all) > 0 {
|
|
slog.Log(context.TODO(), logutil.LevelTrace, "qwen events parsed", "events", all, "state", p.state, "acc", p.acc.String())
|
|
}
|
|
|
|
return all
|
|
}
|
|
|
|
// we use some internal event types in order to communicate between `Add` and
|
|
// `eat`. We do this to support interleaving content and parallel tool calls in
|
|
// the parser, even though qwen3-coder isn't supposed to do this. Our API
|
|
// doesn't currently support models outputting multiple messages in a turn, so
|
|
// we wouldn't be able to represent it yet, but there's no reason to prevent the
|
|
// parser from supporting it, especially for future models if they end up using
|
|
// a similar format.
|
|
type qwenEvent interface {
|
|
isQwenEvent()
|
|
}
|
|
|
|
type qwenEventRawToolCall struct {
|
|
raw string
|
|
}
|
|
|
|
type qwenEventContent struct {
|
|
content string
|
|
}
|
|
|
|
func (qwenEventContent) isQwenEvent() {}
|
|
func (qwenEventRawToolCall) isQwenEvent() {}
|
|
|
|
// eat consumes the parser's buffer, and returns a list of any unambiguous
|
|
// events from the current parser state. If the parser transitions to another
|
|
// state, it may have additional events to emit on the next call, which is what
|
|
// the second return value indicates
|
|
func eat(p *Qwen3CoderParser) ([]qwenEvent, bool) {
|
|
var events []qwenEvent
|
|
|
|
switch p.state {
|
|
case qwenParserState_LookingForToolStart:
|
|
if strings.Contains(p.acc.String(), toolOpenTag) {
|
|
// we found a full tool open tag, so we can emit the content before the
|
|
// tag, being sure to trim any trailing whitespace
|
|
split := strings.SplitN(p.acc.String(), toolOpenTag, 2)
|
|
before := split[0]
|
|
before = strings.TrimRightFunc(before, unicode.IsSpace)
|
|
if len(before) > 0 {
|
|
events = append(events, qwenEventContent{content: before})
|
|
}
|
|
after := split[1]
|
|
p.acc.Reset()
|
|
p.acc.WriteString(after)
|
|
p.state = qwenParserState_CollectingToolContent
|
|
return events, true
|
|
} else if overlap := overlap(p.acc.String(), toolOpenTag); overlap > 0 {
|
|
// we found a partial tool open tag, so we can emit the unambiguous part,
|
|
// which is the (trailing-whitespace trimmed) content before the partial
|
|
// tool open tag
|
|
beforePartialTag := p.acc.String()[:len(p.acc.String())-overlap]
|
|
trailingWhitespaceLen := trailingWhitespaceLen(beforePartialTag)
|
|
ambiguousStart := len(beforePartialTag) - trailingWhitespaceLen
|
|
unambiguous := p.acc.String()[:ambiguousStart]
|
|
ambiguous := p.acc.String()[ambiguousStart:]
|
|
p.acc.Reset()
|
|
p.acc.WriteString(ambiguous)
|
|
events = append(events, qwenEventContent{content: unambiguous})
|
|
return events, false
|
|
} else {
|
|
// we found content that is entirely not a tool call. We should withhold
|
|
// any trailing whitespace in case this is the end of the content
|
|
whitespaceLen := trailingWhitespaceLen(p.acc.String())
|
|
ambiguousStart := len(p.acc.String()) - whitespaceLen
|
|
unambiguous := p.acc.String()[:ambiguousStart]
|
|
ambiguous := p.acc.String()[ambiguousStart:]
|
|
p.acc.Reset()
|
|
p.acc.WriteString(ambiguous)
|
|
if len(unambiguous) > 0 {
|
|
events = append(events, qwenEventContent{content: unambiguous})
|
|
}
|
|
return events, false
|
|
}
|
|
case qwenParserState_CollectingToolContent:
|
|
if strings.Contains(p.acc.String(), toolCloseTag) {
|
|
split := strings.SplitN(p.acc.String(), toolCloseTag, 2)
|
|
before := split[0]
|
|
if len(before) == 0 {
|
|
slog.Warn("qwen tool call closing tag found but no content before it")
|
|
}
|
|
// remove any whitespace between the tool call and any content after it
|
|
after := strings.TrimLeftFunc(split[1], unicode.IsSpace)
|
|
p.acc.Reset()
|
|
p.acc.WriteString(after)
|
|
events = append(events, qwenEventRawToolCall{raw: before})
|
|
p.state = qwenParserState_LookingForToolStart
|
|
return events, true
|
|
} else {
|
|
// note that we don't need to check the overlap here because we only plan
|
|
// on parsing the tool call once we see the full closing tag. We don't
|
|
// stream back the unparsed tool content, so there's no need to be eager
|
|
// here
|
|
return events, false
|
|
}
|
|
default:
|
|
panic("unreachable")
|
|
}
|
|
}
|
|
|
|
// TODO(drifkin): move this to a shared location
|
|
// longest overlap between suffix of s and prefix of delim
|
|
func overlap(s, delim string) int {
|
|
max := min(len(delim), len(s))
|
|
for i := max; i > 0; i-- {
|
|
if strings.HasSuffix(s, delim[:i]) {
|
|
return i
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func trailingWhitespaceLen(s string) int {
|
|
for i := len(s) - 1; i >= 0; i-- {
|
|
if !unicode.IsSpace(rune(s[i])) {
|
|
return len(s) - i - 1
|
|
}
|
|
}
|
|
return len(s)
|
|
}
|
|
|
|
type XMLFunctionCall struct {
|
|
XMLName xml.Name `xml:"function"`
|
|
Name string `xml:"name,attr"`
|
|
Parameters []XMLParameter `xml:"parameter"`
|
|
}
|
|
|
|
type XMLParameter struct {
|
|
Name string `xml:"name,attr"`
|
|
Value string `xml:",chardata"`
|
|
}
|
|
|
|
// parseToolCall parses a raw tool call string into an api.ToolCall.
|
|
// The raw string follows an xml-like format, here's an example:
|
|
//
|
|
// <function=get_current_temperature>
|
|
// <parameter=location>
|
|
// San Francisco
|
|
// </parameter>
|
|
// <parameter=unit>
|
|
// celsius
|
|
// </parameter>
|
|
// </function>
|
|
func parseToolCall(raw qwenEventRawToolCall, tools []api.Tool) (api.ToolCall, error) {
|
|
toolCall := api.ToolCall{}
|
|
|
|
xmlString := transformToXML(raw.raw)
|
|
|
|
var functionCall XMLFunctionCall
|
|
err := xml.Unmarshal([]byte(xmlString), &functionCall)
|
|
if err != nil {
|
|
return api.ToolCall{}, err
|
|
}
|
|
|
|
toolCall.Function = api.ToolCallFunction{
|
|
Name: functionCall.Name,
|
|
}
|
|
|
|
// Find the matching tool to get parameter types
|
|
var matchedTool *api.Tool
|
|
for i := range tools {
|
|
if tools[i].Function.Name == functionCall.Name {
|
|
matchedTool = &tools[i]
|
|
break
|
|
}
|
|
}
|
|
|
|
toolCall.Function.Arguments = make(api.ToolCallFunctionArguments)
|
|
for _, parameter := range functionCall.Parameters {
|
|
// Look up the parameter type if we found the tool
|
|
var paramType api.PropertyType
|
|
if matchedTool != nil && matchedTool.Function.Parameters.Properties != nil {
|
|
if prop, ok := matchedTool.Function.Parameters.Properties[parameter.Name]; ok {
|
|
paramType = prop.Type
|
|
}
|
|
}
|
|
|
|
toolCall.Function.Arguments[parameter.Name] = parseValue(parameter.Value, paramType)
|
|
}
|
|
|
|
return toolCall, nil
|
|
}
|
|
|
|
// parseValue converts a raw string value to the appropriate type based on the parameter type specification.
|
|
//
|
|
// For union types (multiple types in PropertyType, which we support but doesn't
|
|
// seem as though the reference parser does type coercion with those types in
|
|
// mind) we use a type precedence approach:
|
|
// 1. null - checked first regardless of declared types (matches reference implementation)
|
|
// 2. boolean - only "true"/"false" are valid booleans
|
|
// 3. integer - must parse as a whole number
|
|
// 4. number - must parse as numeric (returns int if no decimal part)
|
|
// 5. array - must parse as valid JSON array
|
|
// 6. object - must parse as valid JSON object
|
|
// 7. string - always succeeds (least specific type)
|
|
//
|
|
// This precedence ensures we return the most specific type that successfully parses,
|
|
// following the principle of least surprise. For example, with PropertyType{"string", "number"},
|
|
// "123" becomes 123 (number), while "hello" becomes "hello" (string).
|
|
func parseValue(raw string, paramType api.PropertyType) any {
|
|
// first remove a single leading newlines, and a single trailing newline (if
|
|
// they exist). This follows the reference implementation
|
|
raw = strings.TrimPrefix(raw, "\n")
|
|
raw = strings.TrimSuffix(raw, "\n")
|
|
|
|
// Check for null first (case-insensitive) - this takes precedence over any type
|
|
if strings.ToLower(raw) == "null" {
|
|
return nil
|
|
}
|
|
|
|
// If no type is specified, default to string
|
|
if len(paramType) == 0 {
|
|
return raw
|
|
}
|
|
|
|
// Check if any of the specified types match, using type precedence
|
|
// Order: boolean -> integer -> number -> array -> object -> string
|
|
typeSet := make(map[string]bool)
|
|
for _, t := range paramType {
|
|
typeSet[t] = true
|
|
}
|
|
|
|
// Try boolean first (most restrictive)
|
|
if typeSet["boolean"] {
|
|
lower := strings.ToLower(raw)
|
|
switch lower {
|
|
case "true":
|
|
return true
|
|
case "false":
|
|
return false
|
|
}
|
|
// If not a valid boolean but boolean is the only type, return false (matching reference)
|
|
if len(paramType) == 1 {
|
|
return false
|
|
}
|
|
// Otherwise try other types
|
|
}
|
|
|
|
// Try integer
|
|
if typeSet["integer"] {
|
|
if i, err := strconv.ParseInt(raw, 10, 64); err == nil {
|
|
// Return as int if it fits in int32, otherwise int64
|
|
if i >= math.MinInt32 && i <= math.MaxInt32 {
|
|
return int(i)
|
|
}
|
|
return i
|
|
}
|
|
// If integer is the only type and parsing failed, fall back to string
|
|
if len(paramType) == 1 {
|
|
return raw
|
|
}
|
|
}
|
|
|
|
// Try number (float)
|
|
if typeSet["number"] {
|
|
if f, err := strconv.ParseFloat(raw, 64); err == nil {
|
|
// If the number has no decimal part, return as int (matching reference)
|
|
if f == math.Trunc(f) {
|
|
i := int64(f)
|
|
if i >= math.MinInt32 && i <= math.MaxInt32 {
|
|
return int(i)
|
|
}
|
|
return i
|
|
}
|
|
return f
|
|
}
|
|
// If number is the only type and parsing failed, fall back to string
|
|
if len(paramType) == 1 {
|
|
return raw
|
|
}
|
|
}
|
|
|
|
// Try array
|
|
if typeSet["array"] {
|
|
var arr []interface{}
|
|
if err := json.Unmarshal([]byte(raw), &arr); err == nil {
|
|
return arr
|
|
}
|
|
// If array is the only type and parsing failed, fall back to string
|
|
if len(paramType) == 1 {
|
|
return raw
|
|
}
|
|
}
|
|
|
|
// Try object
|
|
if typeSet["object"] {
|
|
var obj map[string]interface{}
|
|
if err := json.Unmarshal([]byte(raw), &obj); err == nil {
|
|
return obj
|
|
}
|
|
// If object is the only type and parsing failed, fall back to string
|
|
if len(paramType) == 1 {
|
|
return raw
|
|
}
|
|
}
|
|
|
|
// String always succeeds (or if "string" is in the type set)
|
|
if typeSet["string"] {
|
|
return raw
|
|
}
|
|
|
|
// If we get here, none of the types matched and string wasn't an option
|
|
// We return string as a fallback. The reference implementation will attempt
|
|
// to parse the value as a python literal, but we purposefully don't support
|
|
// that
|
|
return raw
|
|
}
|
|
|
|
var qwenTagRegex = regexp.MustCompile(`<(\w+)=([^>]+)>`)
|
|
|
|
// transformToXML transforms a raw qwen tool call with xml-like tags into valid
|
|
// xml so that it can be parsed by any xml parser
|
|
func transformToXML(raw string) string {
|
|
// take the form `<tag=abc>` and transform it to `<tag name="abc">`, taking
|
|
// care to properly escape the string that becomes the attribute value
|
|
return qwenTagRegex.ReplaceAllStringFunc(raw, func(match string) string {
|
|
groups := qwenTagRegex.FindStringSubmatch(match)
|
|
tag := groups[1]
|
|
var escapedValue strings.Builder
|
|
xml.EscapeText(&escapedValue, []byte(groups[2]))
|
|
return fmt.Sprintf(`<%s name="%s">`, tag, escapedValue.String())
|
|
})
|
|
}
|