ollama/cmd/interactive.go

642 lines
19 KiB
Go
Raw Normal View History

2024-01-05 07:20:26 +08:00
package cmd
import (
"cmp"
2024-01-05 07:20:26 +08:00
"errors"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
2024-01-05 07:20:26 +08:00
"regexp"
2024-05-22 12:30:52 +08:00
"slices"
2024-01-05 07:20:26 +08:00
"strings"
"github.com/spf13/cobra"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/readline"
"github.com/ollama/ollama/types/errtypes"
"github.com/ollama/ollama/types/model"
2024-01-05 07:20:26 +08:00
)
type MultilineState int
const (
MultilineNone MultilineState = iota
MultilinePrompt
MultilineSystem
)
2024-01-26 04:12:36 +08:00
func generateInteractive(cmd *cobra.Command, opts runOptions) error {
2024-01-05 07:20:26 +08:00
usage := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
2024-01-26 04:12:36 +08:00
fmt.Fprintln(os.Stderr, " /set Set session variables")
fmt.Fprintln(os.Stderr, " /show Show model information")
fmt.Fprintln(os.Stderr, " /load <model> Load a session or model")
fmt.Fprintln(os.Stderr, " /save <model> Save your current session")
fmt.Fprintln(os.Stderr, " /clear Clear session context")
2024-01-26 04:12:36 +08:00
fmt.Fprintln(os.Stderr, " /bye Exit")
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
fmt.Fprintln(os.Stderr, " /? shortcuts Help for keyboard shortcuts")
2024-01-05 07:20:26 +08:00
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
if opts.MultiModal {
fmt.Fprintf(os.Stderr, "Use %s to include .jpg, .png, or .webp images.\n", filepath.FromSlash("/path/to/file"))
}
2024-01-05 07:20:26 +08:00
fmt.Fprintln(os.Stderr, "")
}
usageSet := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
fmt.Fprintln(os.Stderr, " /set system <string> Set system message")
fmt.Fprintln(os.Stderr, " /set history Enable history")
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
fmt.Fprintln(os.Stderr, " /set think Enable thinking")
fmt.Fprintln(os.Stderr, " /set nothink Disable thinking")
2024-01-05 07:20:26 +08:00
fmt.Fprintln(os.Stderr, "")
}
usageShortcuts := func() {
fmt.Fprintln(os.Stderr, "Available keyboard shortcuts:")
fmt.Fprintln(os.Stderr, " Ctrl + a Move to the beginning of the line (Home)")
fmt.Fprintln(os.Stderr, " Ctrl + e Move to the end of the line (End)")
fmt.Fprintln(os.Stderr, " Alt + b Move back (left) one word")
fmt.Fprintln(os.Stderr, " Alt + f Move forward (right) one word")
fmt.Fprintln(os.Stderr, " Ctrl + k Delete the sentence after the cursor")
fmt.Fprintln(os.Stderr, " Ctrl + u Delete the sentence before the cursor")
2024-05-22 07:55:09 +08:00
fmt.Fprintln(os.Stderr, " Ctrl + w Delete the word before the cursor")
2024-01-05 07:20:26 +08:00
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, " Ctrl + l Clear the screen")
fmt.Fprintln(os.Stderr, " Ctrl + c Stop the model from responding")
fmt.Fprintln(os.Stderr, " Ctrl + d Exit ollama (/bye)")
fmt.Fprintln(os.Stderr, "")
}
usageShow := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /show info Show details for this model")
2024-01-05 07:20:26 +08:00
fmt.Fprintln(os.Stderr, " /show license Show model license")
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
fmt.Fprintln(os.Stderr, " /show system Show system message")
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
fmt.Fprintln(os.Stderr, "")
}
// only list out the most common parameters
usageParameters := func() {
fmt.Fprintln(os.Stderr, "Available Parameters:")
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
fmt.Fprintln(os.Stderr, " /set parameter min_p <float> Pick token based on top token probability * min_p")
2024-01-05 07:20:26 +08:00
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
fmt.Fprintln(os.Stderr, " /set parameter stop <string> <string> ... Set the stop parameters")
2024-01-05 07:20:26 +08:00
fmt.Fprintln(os.Stderr, "")
}
scanner, err := readline.New(readline.Prompt{
Prompt: ">>> ",
AltPrompt: "... ",
Placeholder: "Send a message (/? for help)",
AltPlaceholder: `Use """ to end multi-line input`,
})
if err != nil {
return err
}
2024-07-04 08:22:13 +08:00
if envconfig.NoHistory() {
scanner.HistoryDisable()
}
2024-01-05 07:20:26 +08:00
fmt.Print(readline.StartBracketedPaste)
defer fmt.Printf(readline.EndBracketedPaste)
var sb strings.Builder
2024-01-05 07:20:26 +08:00
var multiline MultilineState
var thinkExplicitlySet bool = opts.Think != nil
2024-01-05 07:20:26 +08:00
for {
line, err := scanner.Readline()
switch {
case errors.Is(err, io.EOF):
fmt.Println()
return nil
case errors.Is(err, readline.ErrInterrupt):
if line == "" {
fmt.Println("\nUse Ctrl + d or /bye to exit.")
}
scanner.Prompt.UseAlt = false
sb.Reset()
2024-01-05 07:20:26 +08:00
continue
case err != nil:
return err
}
switch {
case multiline != MultilineNone:
// check if there's a multiline terminating string
before, ok := strings.CutSuffix(line, `"""`)
sb.WriteString(before)
if !ok {
fmt.Fprintln(&sb)
2024-01-05 07:20:26 +08:00
continue
}
switch multiline {
case MultilineSystem:
opts.System = sb.String()
opts.Messages = append(opts.Messages, api.Message{Role: "system", Content: opts.System})
2024-01-05 07:20:26 +08:00
fmt.Println("Set system message.")
sb.Reset()
2024-01-05 07:20:26 +08:00
}
2024-01-05 07:20:26 +08:00
multiline = MultilineNone
scanner.Prompt.UseAlt = false
case strings.HasPrefix(line, `"""`):
line := strings.TrimPrefix(line, `"""`)
line, ok := strings.CutSuffix(line, `"""`)
sb.WriteString(line)
if !ok {
// no multiline terminating string; need more input
fmt.Fprintln(&sb)
multiline = MultilinePrompt
scanner.Prompt.UseAlt = true
}
2024-01-05 07:20:26 +08:00
case scanner.Pasting:
fmt.Fprintln(&sb, line)
2024-01-05 07:20:26 +08:00
continue
case strings.HasPrefix(line, "/list"):
args := strings.Fields(line)
if err := ListHandler(cmd, args[1:]); err != nil {
return err
}
2024-01-26 04:12:36 +08:00
case strings.HasPrefix(line, "/load"):
args := strings.Fields(line)
if len(args) != 2 {
fmt.Println("Usage:\n /load <modelname>")
continue
}
origOpts := opts.Copy()
2024-01-26 04:12:36 +08:00
opts.Model = args[1]
opts.Messages = []api.Message{}
fmt.Printf("Loading model '%s'\n", opts.Model)
opts.Think, err = inferThinkingOption(nil, &opts, thinkExplicitlySet)
if err != nil {
if strings.Contains(err.Error(), "not found") {
fmt.Printf("Couldn't find model '%s'\n", opts.Model)
opts = origOpts.Copy()
continue
}
return err
}
2024-09-12 07:36:21 +08:00
if err := loadOrUnloadModel(cmd, &opts); err != nil {
if strings.Contains(err.Error(), "not found") {
fmt.Printf("Couldn't find model '%s'\n", opts.Model)
opts = origOpts.Copy()
continue
}
if strings.Contains(err.Error(), "does not support thinking") {
fmt.Printf("error: %v\n", err)
continue
}
2024-01-26 04:12:36 +08:00
return err
}
continue
case strings.HasPrefix(line, "/save"):
args := strings.Fields(line)
if len(args) != 2 {
fmt.Println("Usage:\n /save <modelname>")
continue
}
client, err := api.ClientFromEnvironment()
if err != nil {
fmt.Println("error: couldn't connect to ollama server")
return err
}
req := NewCreateRequest(args[1], opts)
2024-01-26 04:12:36 +08:00
fn := func(resp api.ProgressResponse) error { return nil }
err = client.Create(cmd.Context(), req, fn)
if err != nil {
if strings.Contains(err.Error(), errtypes.InvalidModelNameErrMsg) {
fmt.Printf("error: The model name '%s' is invalid\n", args[1])
continue
}
2024-01-26 04:12:36 +08:00
return err
}
fmt.Printf("Created new model '%s'\n", args[1])
continue
case strings.HasPrefix(line, "/clear"):
opts.Messages = []api.Message{}
2024-05-15 02:38:20 +08:00
if opts.System != "" {
newMessage := api.Message{Role: "system", Content: opts.System}
opts.Messages = append(opts.Messages, newMessage)
}
fmt.Println("Cleared session context")
continue
2024-01-05 07:20:26 +08:00
case strings.HasPrefix(line, "/set"):
args := strings.Fields(line)
if len(args) > 1 {
switch args[1] {
case "history":
scanner.HistoryEnable()
case "nohistory":
scanner.HistoryDisable()
case "wordwrap":
opts.WordWrap = true
fmt.Println("Set 'wordwrap' mode.")
case "nowordwrap":
opts.WordWrap = false
fmt.Println("Set 'nowordwrap' mode.")
case "verbose":
if err := cmd.Flags().Set("verbose", "true"); err != nil {
return err
}
2024-01-05 07:20:26 +08:00
fmt.Println("Set 'verbose' mode.")
case "quiet":
if err := cmd.Flags().Set("verbose", "false"); err != nil {
return err
}
2024-01-05 07:20:26 +08:00
fmt.Println("Set 'quiet' mode.")
case "think":
gpt-oss (#11672) * bf16 * tests * gpt-oss * enable gptoss for engine * rough estimate * convert to mxfp4 * handle safetensors U8 * clamp glu/linear * update tokenizer * MXFP4 support This implements the Open Compute Microscaling (MX) FP4 format as a tensor type with backend implementations focusing on mulmat and mulmatid on CPU, CUDA, and Metal. * Unit tests for MXFP4 support This exercises various operations and shapes on both CPU and GPU (if detected on the system) * cuda graph * unit test adjustments * cuda: optimize memory access Read 4 bytes at a time (8 elements) when performing mul_mat_vec_mxfp4 * mac: fix crash on old macos versions cblas_sgemm is only supported on v13.3 and up, however bf16 is only supported on v14+ so we were falling back to ggml-blas and crashing on bf16 tensors. Checking for the function being null seems to be the simplest way to condittionally avoid registering the backend. * server: Minimum context length for gptoss This model requires a minimum context length of 8192 to function effectively. Users can set higher values through all normal mechanisms but lower values will be silently reset. * ggml: Multiply by numParallel for gptoss sliding window When computing the graph size estimate, the context size is already multiplied by numParallel so estimates reflect that. However, since sliding window models use a smaller, fixed context size, they need to manually take numParallel into account. * gpt-oss integration includes harmony parser and thinking levels, etc. * fix sync * fix tests * fix lint --------- Co-authored-by: Daniel Hiltgen <daniel@ollama.com> Co-authored-by: Jesse Gross <jesse@ollama.com> Co-authored-by: Devon Rifkin <drifkin@drifkin.net>
2025-08-06 03:21:16 +08:00
thinkValue := api.ThinkValue{Value: true}
var maybeLevel string
if len(args) > 2 {
maybeLevel = args[2]
}
if maybeLevel != "" {
// TODO(drifkin): validate the level, could be model dependent
// though... It will also be validated on the server once a call is
// made.
thinkValue.Value = maybeLevel
}
opts.Think = &thinkValue
thinkExplicitlySet = true
if client, err := api.ClientFromEnvironment(); err == nil {
ensureThinkingSupport(cmd.Context(), client, opts.Model)
}
gpt-oss (#11672) * bf16 * tests * gpt-oss * enable gptoss for engine * rough estimate * convert to mxfp4 * handle safetensors U8 * clamp glu/linear * update tokenizer * MXFP4 support This implements the Open Compute Microscaling (MX) FP4 format as a tensor type with backend implementations focusing on mulmat and mulmatid on CPU, CUDA, and Metal. * Unit tests for MXFP4 support This exercises various operations and shapes on both CPU and GPU (if detected on the system) * cuda graph * unit test adjustments * cuda: optimize memory access Read 4 bytes at a time (8 elements) when performing mul_mat_vec_mxfp4 * mac: fix crash on old macos versions cblas_sgemm is only supported on v13.3 and up, however bf16 is only supported on v14+ so we were falling back to ggml-blas and crashing on bf16 tensors. Checking for the function being null seems to be the simplest way to condittionally avoid registering the backend. * server: Minimum context length for gptoss This model requires a minimum context length of 8192 to function effectively. Users can set higher values through all normal mechanisms but lower values will be silently reset. * ggml: Multiply by numParallel for gptoss sliding window When computing the graph size estimate, the context size is already multiplied by numParallel so estimates reflect that. However, since sliding window models use a smaller, fixed context size, they need to manually take numParallel into account. * gpt-oss integration includes harmony parser and thinking levels, etc. * fix sync * fix tests * fix lint --------- Co-authored-by: Daniel Hiltgen <daniel@ollama.com> Co-authored-by: Jesse Gross <jesse@ollama.com> Co-authored-by: Devon Rifkin <drifkin@drifkin.net>
2025-08-06 03:21:16 +08:00
if maybeLevel != "" {
fmt.Printf("Set 'think' mode to '%s'.\n", maybeLevel)
} else {
fmt.Println("Set 'think' mode.")
}
case "nothink":
gpt-oss (#11672) * bf16 * tests * gpt-oss * enable gptoss for engine * rough estimate * convert to mxfp4 * handle safetensors U8 * clamp glu/linear * update tokenizer * MXFP4 support This implements the Open Compute Microscaling (MX) FP4 format as a tensor type with backend implementations focusing on mulmat and mulmatid on CPU, CUDA, and Metal. * Unit tests for MXFP4 support This exercises various operations and shapes on both CPU and GPU (if detected on the system) * cuda graph * unit test adjustments * cuda: optimize memory access Read 4 bytes at a time (8 elements) when performing mul_mat_vec_mxfp4 * mac: fix crash on old macos versions cblas_sgemm is only supported on v13.3 and up, however bf16 is only supported on v14+ so we were falling back to ggml-blas and crashing on bf16 tensors. Checking for the function being null seems to be the simplest way to condittionally avoid registering the backend. * server: Minimum context length for gptoss This model requires a minimum context length of 8192 to function effectively. Users can set higher values through all normal mechanisms but lower values will be silently reset. * ggml: Multiply by numParallel for gptoss sliding window When computing the graph size estimate, the context size is already multiplied by numParallel so estimates reflect that. However, since sliding window models use a smaller, fixed context size, they need to manually take numParallel into account. * gpt-oss integration includes harmony parser and thinking levels, etc. * fix sync * fix tests * fix lint --------- Co-authored-by: Daniel Hiltgen <daniel@ollama.com> Co-authored-by: Jesse Gross <jesse@ollama.com> Co-authored-by: Devon Rifkin <drifkin@drifkin.net>
2025-08-06 03:21:16 +08:00
opts.Think = &api.ThinkValue{Value: false}
thinkExplicitlySet = true
if client, err := api.ClientFromEnvironment(); err == nil {
ensureThinkingSupport(cmd.Context(), client, opts.Model)
}
fmt.Println("Set 'nothink' mode.")
2024-01-05 07:20:26 +08:00
case "format":
if len(args) < 3 || args[2] != "json" {
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
} else {
opts.Format = args[2]
fmt.Printf("Set format to '%s' mode.\n", args[2])
}
case "noformat":
opts.Format = ""
fmt.Println("Disabled format.")
case "parameter":
if len(args) < 4 {
usageParameters()
continue
}
2023-12-16 06:07:34 +08:00
params := args[3:]
2024-01-05 07:20:26 +08:00
fp, err := api.FormatParams(map[string][]string{args[2]: params})
if err != nil {
fmt.Printf("Couldn't set parameter: %q\n", err)
2024-01-05 07:20:26 +08:00
continue
}
fmt.Printf("Set parameter '%s' to '%s'\n", args[2], strings.Join(params, ", "))
2024-01-05 07:20:26 +08:00
opts.Options[args[2]] = fp[args[2]]
2024-07-14 11:56:24 +08:00
case "system":
2024-01-05 07:20:26 +08:00
if len(args) < 3 {
usageSet()
continue
}
2024-07-14 11:56:24 +08:00
multiline = MultilineSystem
2024-01-05 07:20:26 +08:00
line := strings.Join(args[2:], " ")
line, ok := strings.CutPrefix(line, `"""`)
if !ok {
multiline = MultilineNone
2024-01-05 07:20:26 +08:00
} else {
// only cut suffix if the line is multiline
line, ok = strings.CutSuffix(line, `"""`)
if ok {
multiline = MultilineNone
}
}
sb.WriteString(line)
if multiline != MultilineNone {
scanner.Prompt.UseAlt = true
continue
}
2024-07-14 11:56:24 +08:00
opts.System = sb.String() // for display in modelfile
newMessage := api.Message{Role: "system", Content: sb.String()}
// Check if the slice is not empty and the last message is from 'system'
if len(opts.Messages) > 0 && opts.Messages[len(opts.Messages)-1].Role == "system" {
// Replace the last message
opts.Messages[len(opts.Messages)-1] = newMessage
} else {
opts.Messages = append(opts.Messages, newMessage)
2024-01-05 07:20:26 +08:00
}
2024-07-14 11:56:24 +08:00
fmt.Println("Set system message.")
sb.Reset()
continue
2024-01-05 07:20:26 +08:00
default:
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
}
} else {
usageSet()
}
case strings.HasPrefix(line, "/show"):
args := strings.Fields(line)
if len(args) > 1 {
client, err := api.ClientFromEnvironment()
if err != nil {
fmt.Println("error: couldn't connect to ollama server")
return err
}
req := &api.ShowRequest{
Name: opts.Model,
System: opts.System,
Options: opts.Options,
}
resp, err := client.Show(cmd.Context(), req)
2024-01-05 07:20:26 +08:00
if err != nil {
fmt.Println("error: couldn't get model")
return err
}
switch args[1] {
case "info":
_ = showInfo(resp, false, os.Stderr)
2024-01-05 07:20:26 +08:00
case "license":
if resp.License == "" {
fmt.Println("No license was specified for this model.")
2024-01-05 07:20:26 +08:00
} else {
fmt.Println(resp.License)
}
case "modelfile":
fmt.Println(resp.Modelfile)
case "parameters":
fmt.Println("Model defined parameters:")
2024-01-05 07:20:26 +08:00
if resp.Parameters == "" {
fmt.Println(" No additional parameters were specified for this model.")
2024-01-05 07:20:26 +08:00
} else {
for _, l := range strings.Split(resp.Parameters, "\n") {
fmt.Printf(" %s\n", l)
2024-01-05 07:20:26 +08:00
}
}
fmt.Println()
if len(opts.Options) > 0 {
fmt.Println("User defined parameters:")
for k, v := range opts.Options {
fmt.Printf(" %-*s %v\n", 30, k, v)
}
fmt.Println()
2024-01-05 07:20:26 +08:00
}
case "system":
switch {
case opts.System != "":
fmt.Println(opts.System + "\n")
case resp.System != "":
fmt.Println(resp.System + "\n")
default:
fmt.Println("No system message was specified for this model.")
2024-01-05 07:20:26 +08:00
}
case "template":
2024-07-14 11:56:24 +08:00
if resp.Template != "" {
2024-01-05 07:20:26 +08:00
fmt.Println(resp.Template)
2024-07-14 11:56:24 +08:00
} else {
fmt.Println("No prompt template was specified for this model.")
2024-01-05 07:20:26 +08:00
}
default:
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
}
} else {
usageShow()
}
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
args := strings.Fields(line)
if len(args) > 1 {
switch args[1] {
case "set", "/set":
usageSet()
case "show", "/show":
usageShow()
case "shortcut", "shortcuts":
usageShortcuts()
}
} else {
usage()
}
case strings.HasPrefix(line, "/exit"), strings.HasPrefix(line, "/bye"):
2024-01-05 07:20:26 +08:00
return nil
case strings.HasPrefix(line, "/"):
args := strings.Fields(line)
isFile := false
2024-01-26 04:12:36 +08:00
if opts.MultiModal {
2024-01-05 07:20:26 +08:00
for _, f := range extractFileNames(line) {
if strings.HasPrefix(f, args[0]) {
isFile = true
break
}
}
}
if !isFile {
2024-01-05 07:20:26 +08:00
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
continue
}
sb.WriteString(line)
2024-01-05 07:20:26 +08:00
default:
sb.WriteString(line)
2024-01-05 07:20:26 +08:00
}
if sb.Len() > 0 && multiline == MultilineNone {
newMessage := api.Message{Role: "user", Content: sb.String()}
2024-01-26 04:12:36 +08:00
if opts.MultiModal {
msg, images, err := extractFileData(sb.String())
2024-01-05 07:20:26 +08:00
if err != nil {
return err
}
newMessage.Content = msg
newMessage.Images = images
2024-01-05 07:20:26 +08:00
}
opts.Messages = append(opts.Messages, newMessage)
assistant, err := chat(cmd, opts)
if err != nil {
gpt-oss (#11672) * bf16 * tests * gpt-oss * enable gptoss for engine * rough estimate * convert to mxfp4 * handle safetensors U8 * clamp glu/linear * update tokenizer * MXFP4 support This implements the Open Compute Microscaling (MX) FP4 format as a tensor type with backend implementations focusing on mulmat and mulmatid on CPU, CUDA, and Metal. * Unit tests for MXFP4 support This exercises various operations and shapes on both CPU and GPU (if detected on the system) * cuda graph * unit test adjustments * cuda: optimize memory access Read 4 bytes at a time (8 elements) when performing mul_mat_vec_mxfp4 * mac: fix crash on old macos versions cblas_sgemm is only supported on v13.3 and up, however bf16 is only supported on v14+ so we were falling back to ggml-blas and crashing on bf16 tensors. Checking for the function being null seems to be the simplest way to condittionally avoid registering the backend. * server: Minimum context length for gptoss This model requires a minimum context length of 8192 to function effectively. Users can set higher values through all normal mechanisms but lower values will be silently reset. * ggml: Multiply by numParallel for gptoss sliding window When computing the graph size estimate, the context size is already multiplied by numParallel so estimates reflect that. However, since sliding window models use a smaller, fixed context size, they need to manually take numParallel into account. * gpt-oss integration includes harmony parser and thinking levels, etc. * fix sync * fix tests * fix lint --------- Co-authored-by: Daniel Hiltgen <daniel@ollama.com> Co-authored-by: Jesse Gross <jesse@ollama.com> Co-authored-by: Devon Rifkin <drifkin@drifkin.net>
2025-08-06 03:21:16 +08:00
if strings.Contains(err.Error(), "does not support thinking") ||
strings.Contains(err.Error(), "invalid think value") {
fmt.Printf("error: %v\n", err)
sb.Reset()
continue
}
2024-01-05 07:20:26 +08:00
return err
}
if assistant != nil {
opts.Messages = append(opts.Messages, *assistant)
}
2024-01-05 07:20:26 +08:00
sb.Reset()
2024-01-05 07:20:26 +08:00
}
}
}
func NewCreateRequest(name string, opts runOptions) *api.CreateRequest {
parentModel := opts.ParentModel
modelName := model.ParseName(parentModel)
if !modelName.IsValid() {
parentModel = ""
}
req := &api.CreateRequest{
Model: name,
From: cmp.Or(parentModel, opts.Model),
}
2024-01-26 04:12:36 +08:00
if opts.System != "" {
req.System = opts.System
2024-01-26 04:12:36 +08:00
}
if len(opts.Options) > 0 {
req.Parameters = opts.Options
2024-01-26 04:12:36 +08:00
}
if len(opts.Messages) > 0 {
req.Messages = opts.Messages
2024-01-26 04:12:36 +08:00
}
return req
2024-01-26 04:12:36 +08:00
}
2024-01-05 07:20:26 +08:00
func normalizeFilePath(fp string) string {
return strings.NewReplacer(
"\\ ", " ", // Escaped space
"\\(", "(", // Escaped left parenthesis
"\\)", ")", // Escaped right parenthesis
"\\[", "[", // Escaped left square bracket
"\\]", "]", // Escaped right square bracket
"\\{", "{", // Escaped left curly brace
"\\}", "}", // Escaped right curly brace
"\\$", "$", // Escaped dollar sign
"\\&", "&", // Escaped ampersand
"\\;", ";", // Escaped semicolon
"\\'", "'", // Escaped single quote
"\\\\", "\\", // Escaped backslash
"\\*", "*", // Escaped asterisk
"\\?", "?", // Escaped question mark
"\\~", "~", // Escaped tilde
).Replace(fp)
2024-01-05 07:20:26 +08:00
}
func extractFileNames(input string) []string {
// Regex to match file paths starting with optional drive letter, / ./ \ or .\ and include escaped or unescaped spaces (\ or %20)
2024-01-05 07:20:26 +08:00
// and followed by more characters and a file extension
// This will capture non filename strings, but we'll check for file existence to remove mismatches
regexPattern := `(?:[a-zA-Z]:)?(?:\./|/|\\)[\S\\ ]+?\.(?i:jpg|jpeg|png|webp)\b`
2024-01-05 07:20:26 +08:00
re := regexp.MustCompile(regexPattern)
return re.FindAllString(input, -1)
}
func extractFileData(input string) (string, []api.ImageData, error) {
2024-01-05 07:20:26 +08:00
filePaths := extractFileNames(input)
var imgs []api.ImageData
2024-01-05 07:20:26 +08:00
for _, fp := range filePaths {
nfp := normalizeFilePath(fp)
data, err := getImageData(nfp)
if errors.Is(err, os.ErrNotExist) {
continue
} else if err != nil {
fmt.Fprintf(os.Stderr, "Couldn't process image: %q\n", err)
2024-01-05 07:20:26 +08:00
return "", imgs, err
}
fmt.Fprintf(os.Stderr, "Added image '%s'\n", nfp)
input = strings.ReplaceAll(input, "'"+nfp+"'", "")
input = strings.ReplaceAll(input, "'"+fp+"'", "")
2024-01-05 07:20:26 +08:00
input = strings.ReplaceAll(input, fp, "")
imgs = append(imgs, data)
}
return strings.TrimSpace(input), imgs, nil
2024-01-05 07:20:26 +08:00
}
func getImageData(filePath string) ([]byte, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, err
}
defer file.Close()
buf := make([]byte, 512)
_, err = file.Read(buf)
if err != nil {
return nil, err
}
contentType := http.DetectContentType(buf)
allowedTypes := []string{"image/jpeg", "image/jpg", "image/png", "image/webp"}
2024-01-05 07:20:26 +08:00
if !slices.Contains(allowedTypes, contentType) {
return nil, fmt.Errorf("invalid image type: %s", contentType)
}
info, err := file.Stat()
if err != nil {
return nil, err
}
// Check if the file size exceeds 100MB
var maxSize int64 = 100 * 1024 * 1024 // 100MB in bytes
if info.Size() > maxSize {
2024-08-02 05:52:15 +08:00
return nil, errors.New("file size exceeds maximum limit (100MB)")
2024-01-05 07:20:26 +08:00
}
buf = make([]byte, info.Size())
_, err = file.Seek(0, 0)
if err != nil {
return nil, err
}
_, err = io.ReadFull(file, buf)
if err != nil {
return nil, err
}
return buf, nil
}