2024-03-23 21:24:18 +08:00
|
|
|
//go:build integration
|
|
|
|
|
|
|
|
package integration
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"context"
|
2024-03-31 00:50:05 +08:00
|
|
|
"errors"
|
2024-03-23 21:24:18 +08:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"log/slog"
|
2025-08-30 05:20:28 +08:00
|
|
|
"math"
|
2024-03-23 21:24:18 +08:00
|
|
|
"math/rand"
|
|
|
|
"net"
|
|
|
|
"net/http"
|
2024-03-31 00:50:05 +08:00
|
|
|
"net/url"
|
2024-03-23 21:24:18 +08:00
|
|
|
"os"
|
|
|
|
"path/filepath"
|
|
|
|
"runtime"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"sync"
|
|
|
|
"testing"
|
|
|
|
"time"
|
|
|
|
|
2024-03-27 04:04:17 +08:00
|
|
|
"github.com/ollama/ollama/api"
|
|
|
|
"github.com/ollama/ollama/app/lifecycle"
|
2025-04-17 05:25:55 +08:00
|
|
|
"github.com/ollama/ollama/format"
|
2024-03-23 21:24:18 +08:00
|
|
|
)
|
|
|
|
|
2025-08-08 06:06:57 +08:00
|
|
|
var (
|
2025-08-30 05:20:28 +08:00
|
|
|
smol = "llama3.2:1b"
|
|
|
|
stream = false
|
2025-04-17 05:25:55 +08:00
|
|
|
)
|
|
|
|
|
2025-07-06 07:07:09 +08:00
|
|
|
var (
|
|
|
|
started = time.Now()
|
|
|
|
|
|
|
|
// Note: add newer models at the top of the list to test them first
|
|
|
|
ollamaEngineChatModels = []string{
|
2025-10-03 09:50:02 +08:00
|
|
|
"qwen3-coder:30b",
|
2025-08-08 06:06:57 +08:00
|
|
|
"gpt-oss:20b",
|
2025-07-06 07:07:09 +08:00
|
|
|
"gemma3n:e2b",
|
|
|
|
"mistral-small3.2:latest",
|
|
|
|
"deepseek-r1:1.5b",
|
|
|
|
"llama3.2-vision:latest",
|
|
|
|
"qwen2.5-coder:latest",
|
|
|
|
"qwen2.5vl:3b",
|
|
|
|
"qwen3:0.6b", // dense
|
2025-10-03 09:50:02 +08:00
|
|
|
"qwen3:1.7b", // dense
|
2025-07-06 07:07:09 +08:00
|
|
|
"qwen3:30b", // MOE
|
|
|
|
"gemma3:1b",
|
|
|
|
"llama3.1:latest",
|
|
|
|
"llama3.2:latest",
|
|
|
|
"gemma2:latest",
|
|
|
|
"minicpm-v:latest", // arch=qwen2
|
|
|
|
"granite-code:latest", // arch=llama
|
|
|
|
}
|
|
|
|
llamaRunnerChatModels = []string{
|
|
|
|
"mistral:latest",
|
|
|
|
"falcon3:latest",
|
|
|
|
"granite3-moe:latest",
|
|
|
|
"command-r:latest",
|
|
|
|
"nemotron-mini:latest",
|
|
|
|
"phi3.5:latest",
|
|
|
|
"solar-pro:latest",
|
|
|
|
"internlm2:latest",
|
|
|
|
"codellama:latest", // arch=llama
|
|
|
|
"phi3:latest",
|
|
|
|
"falcon2:latest",
|
|
|
|
"gemma:latest",
|
|
|
|
"llama2:latest",
|
|
|
|
"nous-hermes:latest",
|
|
|
|
"orca-mini:latest",
|
|
|
|
"qwen:latest",
|
|
|
|
"stablelm2:latest", // Predictions are off, crashes on small VRAM GPUs
|
|
|
|
"falcon:latest",
|
|
|
|
}
|
2025-07-12 03:21:54 +08:00
|
|
|
|
|
|
|
// Some library models are quite large - ensure large VRAM and sufficient disk space
|
|
|
|
// before running scenarios based on this set
|
|
|
|
libraryChatModels = []string{
|
|
|
|
"alfred",
|
|
|
|
"athene-v2",
|
|
|
|
"aya-expanse",
|
|
|
|
"aya",
|
|
|
|
"bakllava",
|
|
|
|
"bespoke-minicheck",
|
|
|
|
"codebooga",
|
|
|
|
"codegeex4",
|
|
|
|
"codegemma",
|
|
|
|
"codellama",
|
|
|
|
"codeqwen",
|
|
|
|
"codestral",
|
|
|
|
"codeup",
|
|
|
|
"cogito",
|
|
|
|
"command-a",
|
|
|
|
"command-r-plus",
|
|
|
|
"command-r",
|
|
|
|
"command-r7b-arabic",
|
|
|
|
"command-r7b",
|
|
|
|
"dbrx",
|
|
|
|
"deepcoder",
|
|
|
|
"deepscaler",
|
|
|
|
"deepseek-coder-v2",
|
|
|
|
"deepseek-coder",
|
|
|
|
"deepseek-llm",
|
|
|
|
"deepseek-r1",
|
|
|
|
// "deepseek-v2.5", // requires 155 GB VRAM
|
|
|
|
"deepseek-v2",
|
|
|
|
// "deepseek-v3", // requires 482 GB VRAM
|
|
|
|
"devstral",
|
|
|
|
"dolphin-llama3",
|
|
|
|
"dolphin-mistral",
|
|
|
|
"dolphin-mixtral",
|
|
|
|
"dolphin-phi",
|
|
|
|
"dolphin3",
|
|
|
|
"dolphincoder",
|
|
|
|
"duckdb-nsql",
|
|
|
|
"everythinglm",
|
|
|
|
"exaone-deep",
|
|
|
|
"exaone3.5",
|
|
|
|
"falcon",
|
|
|
|
"falcon2",
|
|
|
|
"falcon3",
|
|
|
|
"firefunction-v2",
|
|
|
|
"gemma",
|
|
|
|
"gemma2",
|
|
|
|
"gemma3",
|
|
|
|
"gemma3n",
|
|
|
|
"glm4",
|
|
|
|
"goliath",
|
2025-08-08 06:06:57 +08:00
|
|
|
"gpt-oss:20b",
|
2025-07-12 03:21:54 +08:00
|
|
|
"granite-code",
|
|
|
|
"granite3-dense",
|
|
|
|
"granite3-guardian",
|
|
|
|
"granite3-moe",
|
|
|
|
"granite3.1-dense",
|
|
|
|
"granite3.1-moe",
|
|
|
|
"granite3.2-vision",
|
|
|
|
"granite3.2",
|
|
|
|
"granite3.3",
|
|
|
|
"hermes3",
|
|
|
|
"internlm2",
|
|
|
|
"llama-guard3",
|
|
|
|
"llama-pro",
|
|
|
|
"llama2-chinese",
|
|
|
|
"llama2-uncensored",
|
|
|
|
"llama2",
|
|
|
|
"llama3-chatqa",
|
|
|
|
"llama3-gradient",
|
|
|
|
"llama3-groq-tool-use",
|
|
|
|
"llama3.1",
|
|
|
|
"llama3.2-vision",
|
|
|
|
"llama3.2",
|
|
|
|
"llama3.3",
|
|
|
|
"llama3",
|
|
|
|
"llama4",
|
|
|
|
"llava-llama3",
|
|
|
|
"llava-phi3",
|
|
|
|
"llava",
|
|
|
|
"magicoder",
|
|
|
|
"magistral",
|
|
|
|
"marco-o1",
|
|
|
|
"mathstral",
|
|
|
|
"meditron",
|
|
|
|
"medllama2",
|
|
|
|
"megadolphin",
|
|
|
|
"minicpm-v",
|
|
|
|
"mistral-large",
|
|
|
|
"mistral-nemo",
|
|
|
|
"mistral-openorca",
|
|
|
|
"mistral-small",
|
|
|
|
"mistral-small3.1",
|
|
|
|
"mistral-small3.2",
|
|
|
|
"mistral",
|
|
|
|
"mistrallite",
|
|
|
|
"mixtral",
|
|
|
|
"moondream",
|
|
|
|
"nemotron-mini",
|
|
|
|
"nemotron",
|
|
|
|
"neural-chat",
|
|
|
|
"nexusraven",
|
|
|
|
"notus",
|
|
|
|
"nous-hermes",
|
|
|
|
"nous-hermes2-mixtral",
|
|
|
|
"nous-hermes2",
|
|
|
|
"nuextract",
|
|
|
|
"olmo2",
|
|
|
|
"open-orca-platypus2",
|
|
|
|
"openchat",
|
|
|
|
"opencoder",
|
|
|
|
"openhermes",
|
|
|
|
"openthinker",
|
|
|
|
"orca-mini",
|
|
|
|
"orca2",
|
|
|
|
// "phi", // unreliable
|
|
|
|
"phi3.5",
|
|
|
|
"phi3",
|
|
|
|
"phi4-mini-reasoning",
|
|
|
|
"phi4-mini",
|
|
|
|
"phi4-reasoning",
|
|
|
|
"phi4",
|
|
|
|
"phind-codellama",
|
|
|
|
"qwen",
|
|
|
|
"qwen2-math",
|
|
|
|
"qwen2.5-coder",
|
|
|
|
"qwen2.5",
|
|
|
|
"qwen2.5vl",
|
|
|
|
"qwen2",
|
|
|
|
"qwen3:0.6b", // dense
|
|
|
|
"qwen3:30b", // MOE
|
|
|
|
"qwq",
|
|
|
|
"r1-1776",
|
|
|
|
"reader-lm",
|
|
|
|
"reflection",
|
|
|
|
"sailor2",
|
|
|
|
"samantha-mistral",
|
|
|
|
"shieldgemma",
|
|
|
|
"smallthinker",
|
|
|
|
"smollm",
|
|
|
|
"smollm2",
|
|
|
|
"solar-pro",
|
|
|
|
"solar",
|
|
|
|
"sqlcoder",
|
|
|
|
"stable-beluga",
|
|
|
|
"stable-code",
|
|
|
|
"stablelm-zephyr",
|
|
|
|
"stablelm2",
|
|
|
|
"starcoder",
|
|
|
|
"starcoder2",
|
|
|
|
"starling-lm",
|
|
|
|
"tinydolphin",
|
|
|
|
"tinyllama",
|
|
|
|
"tulu3",
|
|
|
|
"vicuna",
|
|
|
|
"wizard-math",
|
|
|
|
"wizard-vicuna-uncensored",
|
|
|
|
"wizard-vicuna",
|
|
|
|
"wizardcoder",
|
|
|
|
"wizardlm-uncensored",
|
|
|
|
"wizardlm2",
|
|
|
|
"xwinlm",
|
|
|
|
"yarn-llama2",
|
|
|
|
"yarn-mistral",
|
|
|
|
"yi-coder",
|
|
|
|
"yi",
|
|
|
|
"zephyr",
|
|
|
|
}
|
|
|
|
libraryEmbedModels = []string{
|
|
|
|
"all-minilm",
|
|
|
|
"bge-large",
|
|
|
|
"bge-m3",
|
|
|
|
"granite-embedding",
|
|
|
|
"mxbai-embed-large",
|
|
|
|
"nomic-embed-text",
|
|
|
|
"paraphrase-multilingual",
|
|
|
|
"snowflake-arctic-embed",
|
|
|
|
"snowflake-arctic-embed2",
|
|
|
|
}
|
2025-09-23 02:23:14 +08:00
|
|
|
|
|
|
|
blueSkyPrompt = "why is the sky blue? Be brief but factual in your reply"
|
|
|
|
blueSkyExpected = []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength", "interact"}
|
|
|
|
|
|
|
|
rainbowPrompt = "how do rainbows form? Be brief but factual in your reply"
|
|
|
|
rainbowFollowups = []string{
|
|
|
|
"Explain the physics involved in them. Be breif in your reply",
|
|
|
|
"Explain the chemistry involved in them. Be breif in your reply",
|
|
|
|
"What are common myths related to them? Be brief in your reply",
|
|
|
|
"What are common fairytales related to them? Be brief in your reply",
|
|
|
|
"Can they form if there is no rain? Be breif in your reply",
|
|
|
|
"Can they form if there are no clouds? Be breif in your reply",
|
|
|
|
"Do they happen on other planets? Be brief in your reply",
|
|
|
|
}
|
2025-10-03 09:50:02 +08:00
|
|
|
rainbowExpected = []string{"water", "droplet", "mist", "glow", "refract", "reflect", "scatter", "particles", "wave", "color", "spectrum", "raindrop", "atmosphere", "frequency", "end", "gold", "fortune", "blessing", "prosperity", "hope", "bridge", "magic", "shower", "sky", "shimmer", "light", "storm", "sunny", "beauty", "sunburst", "phenomenon", "mars", "venus", "jupiter"}
|
2025-07-06 07:07:09 +08:00
|
|
|
)
|
|
|
|
|
2025-08-08 06:06:57 +08:00
|
|
|
func init() {
|
2024-03-31 00:50:05 +08:00
|
|
|
lifecycle.InitLogging()
|
2025-09-23 02:23:14 +08:00
|
|
|
custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL")
|
2025-08-08 06:06:57 +08:00
|
|
|
if custom != "" {
|
2025-09-23 02:23:14 +08:00
|
|
|
slog.Info("setting default test model to " + custom)
|
2025-08-08 06:06:57 +08:00
|
|
|
smol = custom
|
|
|
|
}
|
2024-03-31 00:50:05 +08:00
|
|
|
}
|
|
|
|
|
2024-03-23 21:24:18 +08:00
|
|
|
func FindPort() string {
|
|
|
|
port := 0
|
|
|
|
if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
|
|
|
|
var l *net.TCPListener
|
|
|
|
if l, err = net.ListenTCP("tcp", a); err == nil {
|
|
|
|
port = l.Addr().(*net.TCPAddr).Port
|
|
|
|
l.Close()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if port == 0 {
|
|
|
|
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
|
|
|
|
}
|
|
|
|
return strconv.Itoa(port)
|
|
|
|
}
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
func GetTestEndpoint() (*api.Client, string) {
|
2024-03-23 21:24:18 +08:00
|
|
|
defaultPort := "11434"
|
|
|
|
ollamaHost := os.Getenv("OLLAMA_HOST")
|
|
|
|
|
|
|
|
scheme, hostport, ok := strings.Cut(ollamaHost, "://")
|
|
|
|
if !ok {
|
|
|
|
scheme, hostport = "http", ollamaHost
|
|
|
|
}
|
|
|
|
|
|
|
|
// trim trailing slashes
|
|
|
|
hostport = strings.TrimRight(hostport, "/")
|
|
|
|
|
|
|
|
host, port, err := net.SplitHostPort(hostport)
|
|
|
|
if err != nil {
|
|
|
|
host, port = "127.0.0.1", defaultPort
|
|
|
|
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
|
|
|
|
host = ip.String()
|
|
|
|
} else if hostport != "" {
|
|
|
|
host = hostport
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if os.Getenv("OLLAMA_TEST_EXISTING") == "" && port == defaultPort {
|
|
|
|
port = FindPort()
|
|
|
|
}
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
slog.Info("server connection", "host", host, "port", port)
|
|
|
|
|
|
|
|
return api.NewClient(
|
|
|
|
&url.URL{
|
|
|
|
Scheme: scheme,
|
|
|
|
Host: net.JoinHostPort(host, port),
|
|
|
|
},
|
|
|
|
http.DefaultClient), fmt.Sprintf("%s:%s", host, port)
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
var serverMutex sync.Mutex
|
|
|
|
var serverReady bool
|
|
|
|
|
2024-05-11 05:13:26 +08:00
|
|
|
func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
|
2024-03-23 21:24:18 +08:00
|
|
|
// Make sure the server has been built
|
|
|
|
CLIName, err := filepath.Abs("../ollama")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if runtime.GOOS == "windows" {
|
|
|
|
CLIName += ".exe"
|
|
|
|
}
|
|
|
|
_, err = os.Stat(CLIName)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("CLI missing, did you forget to build first? %w", err)
|
|
|
|
}
|
|
|
|
serverMutex.Lock()
|
|
|
|
defer serverMutex.Unlock()
|
|
|
|
if serverReady {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
|
|
|
|
slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
|
2024-04-17 06:37:28 +08:00
|
|
|
t.Setenv("OLLAMA_HOST", ollamaHost)
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
slog.Info("starting server", "url", ollamaHost)
|
|
|
|
done, err := lifecycle.SpawnServer(ctx, "../ollama")
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("failed to start server: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
<-ctx.Done()
|
|
|
|
serverMutex.Lock()
|
|
|
|
defer serverMutex.Unlock()
|
|
|
|
exitCode := <-done
|
|
|
|
if exitCode > 0 {
|
|
|
|
slog.Warn("server failure", "exit", exitCode)
|
|
|
|
}
|
|
|
|
serverReady = false
|
|
|
|
}()
|
|
|
|
|
|
|
|
// TODO wait only long enough for the server to be responsive...
|
|
|
|
time.Sleep(500 * time.Millisecond)
|
|
|
|
|
|
|
|
serverReady = true
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
func PullIfMissing(ctx context.Context, client *api.Client, modelName string) error {
|
2024-03-28 02:02:06 +08:00
|
|
|
slog.Info("checking status of model", "model", modelName)
|
2024-03-25 07:22:38 +08:00
|
|
|
showReq := &api.ShowRequest{Name: modelName}
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
showCtx, cancel := context.WithDeadlineCause(
|
|
|
|
ctx,
|
2025-04-17 05:25:55 +08:00
|
|
|
time.Now().Add(20*time.Second),
|
2024-03-31 00:50:05 +08:00
|
|
|
fmt.Errorf("show for existing model %s took too long", modelName),
|
|
|
|
)
|
|
|
|
defer cancel()
|
|
|
|
_, err := client.Show(showCtx, showReq)
|
|
|
|
var statusError api.StatusError
|
|
|
|
switch {
|
|
|
|
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
|
|
|
|
break
|
|
|
|
case err != nil:
|
2024-03-25 07:22:38 +08:00
|
|
|
return err
|
2024-03-31 00:50:05 +08:00
|
|
|
default:
|
2024-03-25 07:22:38 +08:00
|
|
|
slog.Info("model already present", "model", modelName)
|
|
|
|
return nil
|
|
|
|
}
|
2024-03-31 00:50:05 +08:00
|
|
|
slog.Info("model missing", "model", modelName)
|
|
|
|
|
2025-04-17 05:25:55 +08:00
|
|
|
stallDuration := 60 * time.Second // This includes checksum verification, which can take a while on larger models, and slower systems
|
2024-03-31 00:50:05 +08:00
|
|
|
stallTimer := time.NewTimer(stallDuration)
|
|
|
|
fn := func(resp api.ProgressResponse) error {
|
|
|
|
// fmt.Print(".")
|
|
|
|
if !stallTimer.Reset(stallDuration) {
|
2024-08-02 05:52:15 +08:00
|
|
|
return errors.New("stall was detected, aborting status reporting")
|
2024-03-31 00:50:05 +08:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2024-03-25 07:22:38 +08:00
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
stream := true
|
2024-03-25 07:22:38 +08:00
|
|
|
pullReq := &api.PullRequest{Name: modelName, Stream: &stream}
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
var pullError error
|
2024-03-25 07:22:38 +08:00
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
done := make(chan int)
|
|
|
|
go func() {
|
|
|
|
pullError = client.Pull(ctx, pullReq, fn)
|
|
|
|
done <- 0
|
|
|
|
}()
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-stallTimer.C:
|
2024-08-02 05:52:15 +08:00
|
|
|
return errors.New("download stalled")
|
2024-03-31 00:50:05 +08:00
|
|
|
case <-done:
|
|
|
|
return pullError
|
2024-03-25 07:22:38 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-28 02:02:06 +08:00
|
|
|
var serverProcMutex sync.Mutex
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
// Returns an Client, the testEndpoint, and a cleanup function, fails the test on errors
|
|
|
|
// Starts the server if needed
|
|
|
|
func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, string, func()) {
|
|
|
|
client, testEndpoint := GetTestEndpoint()
|
|
|
|
if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
|
|
|
|
serverProcMutex.Lock()
|
|
|
|
fp, err := os.CreateTemp("", "ollama-server-*.log")
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("failed to generate log file: %s", err)
|
|
|
|
}
|
|
|
|
lifecycle.ServerLogFile = fp.Name()
|
|
|
|
fp.Close()
|
2025-08-30 05:20:28 +08:00
|
|
|
if err := startServer(t, ctx, testEndpoint); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
2025-10-02 06:12:32 +08:00
|
|
|
// Make sure server is online and healthy before returning
|
|
|
|
listCtx, cancel := context.WithDeadlineCause(
|
|
|
|
ctx,
|
|
|
|
time.Now().Add(120*time.Second),
|
|
|
|
fmt.Errorf("list models took too long"),
|
|
|
|
)
|
|
|
|
defer cancel()
|
|
|
|
models, err := client.ListRunning(listCtx)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if len(models.Models) > 0 {
|
|
|
|
names := make([]string, len(models.Models))
|
|
|
|
for i, m := range models.Models {
|
|
|
|
names[i] = m.Name
|
|
|
|
}
|
|
|
|
slog.Info("currently loaded", "models", names)
|
|
|
|
}
|
2024-03-31 00:50:05 +08:00
|
|
|
|
|
|
|
return client, testEndpoint, func() {
|
2024-03-28 02:02:06 +08:00
|
|
|
if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
|
|
|
|
defer serverProcMutex.Unlock()
|
|
|
|
if t.Failed() {
|
|
|
|
fp, err := os.Open(lifecycle.ServerLogFile)
|
|
|
|
if err != nil {
|
|
|
|
slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err)
|
|
|
|
return
|
|
|
|
}
|
2025-05-05 06:37:59 +08:00
|
|
|
defer fp.Close()
|
2024-03-28 02:02:06 +08:00
|
|
|
data, err := io.ReadAll(fp)
|
|
|
|
if err != nil {
|
|
|
|
slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
slog.Warn("SERVER LOG FOLLOWS")
|
|
|
|
os.Stderr.Write(data)
|
|
|
|
slog.Warn("END OF SERVER")
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
2024-03-31 00:50:05 +08:00
|
|
|
err := os.Remove(lifecycle.ServerLogFile)
|
2024-03-28 02:02:06 +08:00
|
|
|
if err != nil && !os.IsNotExist(err) {
|
|
|
|
slog.Warn("failed to cleanup", "logfile", lifecycle.ServerLogFile, "error", err)
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-03-31 00:50:05 +08:00
|
|
|
}
|
2024-03-23 21:24:18 +08:00
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
func GenerateTestHelper(ctx context.Context, t *testing.T, genReq api.GenerateRequest, anyResp []string) {
|
|
|
|
client, _, cleanup := InitServerConnection(ctx, t)
|
|
|
|
defer cleanup()
|
2025-08-30 05:20:28 +08:00
|
|
|
if err := PullIfMissing(ctx, client, genReq.Model); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2024-03-31 00:50:05 +08:00
|
|
|
DoGenerate(ctx, t, client, genReq, anyResp, 30*time.Second, 10*time.Second)
|
|
|
|
}
|
2024-03-25 07:22:38 +08:00
|
|
|
|
2025-08-16 05:37:54 +08:00
|
|
|
func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq api.GenerateRequest, anyResp []string, initialTimeout, streamTimeout time.Duration) []int {
|
2024-03-31 00:50:05 +08:00
|
|
|
stallTimer := time.NewTimer(initialTimeout)
|
|
|
|
var buf bytes.Buffer
|
2025-08-16 05:37:54 +08:00
|
|
|
var context []int
|
2024-03-31 00:50:05 +08:00
|
|
|
fn := func(response api.GenerateResponse) error {
|
|
|
|
// fmt.Print(".")
|
|
|
|
buf.Write([]byte(response.Response))
|
|
|
|
if !stallTimer.Reset(streamTimeout) {
|
2024-08-02 05:52:15 +08:00
|
|
|
return errors.New("stall was detected while streaming response, aborting")
|
2024-03-31 00:50:05 +08:00
|
|
|
}
|
2025-08-16 05:37:54 +08:00
|
|
|
if len(response.Context) > 0 {
|
|
|
|
context = response.Context
|
|
|
|
}
|
2024-03-31 00:50:05 +08:00
|
|
|
return nil
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
stream := true
|
|
|
|
genReq.Stream = &stream
|
|
|
|
done := make(chan int)
|
|
|
|
var genErr error
|
|
|
|
go func() {
|
|
|
|
genErr = client.Generate(ctx, &genReq, fn)
|
|
|
|
done <- 0
|
|
|
|
}()
|
2024-03-23 21:24:18 +08:00
|
|
|
|
2025-09-10 00:32:15 +08:00
|
|
|
var response string
|
|
|
|
verify := func() {
|
|
|
|
// Verify the response contains the expected data
|
|
|
|
response = buf.String()
|
|
|
|
atLeastOne := false
|
|
|
|
for _, resp := range anyResp {
|
|
|
|
if strings.Contains(strings.ToLower(response), resp) {
|
|
|
|
atLeastOne = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !atLeastOne {
|
|
|
|
t.Fatalf("%s: none of %v found in %s", genReq.Model, anyResp, response)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
select {
|
|
|
|
case <-stallTimer.C:
|
|
|
|
if buf.Len() == 0 {
|
|
|
|
t.Errorf("generate never started. Timed out after :%s", initialTimeout.String())
|
|
|
|
} else {
|
|
|
|
t.Errorf("generate stalled. Response so far:%s", buf.String())
|
|
|
|
}
|
|
|
|
case <-done:
|
2025-07-12 03:21:54 +08:00
|
|
|
if genErr != nil && strings.Contains(genErr.Error(), "model requires more system memory") {
|
|
|
|
slog.Warn("model is too large for the target test system", "model", genReq.Model, "error", genErr)
|
2025-08-16 05:37:54 +08:00
|
|
|
return context
|
2025-07-12 03:21:54 +08:00
|
|
|
}
|
2025-08-30 05:20:28 +08:00
|
|
|
if genErr != nil {
|
|
|
|
t.Fatalf("%s failed with %s request prompt %s", genErr, genReq.Model, genReq.Prompt)
|
|
|
|
}
|
2025-09-10 00:32:15 +08:00
|
|
|
verify()
|
2024-03-31 00:50:05 +08:00
|
|
|
slog.Info("test pass", "model", genReq.Model, "prompt", genReq.Prompt, "contains", anyResp, "response", response)
|
|
|
|
case <-ctx.Done():
|
2025-09-10 00:32:15 +08:00
|
|
|
// On slow systems, we might timeout before some models finish rambling, so check what we have so far to see
|
|
|
|
// if it's considered a pass - the stallTimer will detect hangs, but we want to consider slow systems a pass
|
|
|
|
// if they are still generating valid responses
|
|
|
|
slog.Warn("outer test context done while waiting for generate")
|
|
|
|
verify()
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
2025-08-16 05:37:54 +08:00
|
|
|
return context
|
2024-03-31 00:50:05 +08:00
|
|
|
}
|
2024-03-23 21:24:18 +08:00
|
|
|
|
2024-03-31 00:50:05 +08:00
|
|
|
// Generate a set of requests
|
2025-04-17 05:25:55 +08:00
|
|
|
// By default each request uses llama3.2 as the model
|
2024-03-31 00:50:05 +08:00
|
|
|
func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
|
|
|
return []api.GenerateRequest{
|
|
|
|
{
|
2025-04-17 05:25:55 +08:00
|
|
|
Model: smol,
|
2025-08-16 05:37:54 +08:00
|
|
|
Prompt: "why is the ocean blue? Be brief but factual in your reply",
|
2024-06-01 05:28:02 +08:00
|
|
|
Stream: &stream,
|
|
|
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
2024-03-31 00:50:05 +08:00
|
|
|
}, {
|
2025-04-17 05:25:55 +08:00
|
|
|
Model: smol,
|
2025-08-16 05:37:54 +08:00
|
|
|
Prompt: "why is the color of dirt brown? Be brief but factual in your reply",
|
2024-06-01 05:28:02 +08:00
|
|
|
Stream: &stream,
|
|
|
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
2024-03-31 00:50:05 +08:00
|
|
|
}, {
|
2025-04-17 05:25:55 +08:00
|
|
|
Model: smol,
|
2025-10-02 06:12:32 +08:00
|
|
|
Prompt: rainbowPrompt,
|
2024-06-01 05:28:02 +08:00
|
|
|
Stream: &stream,
|
|
|
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
2024-03-31 00:50:05 +08:00
|
|
|
}, {
|
2025-04-17 05:25:55 +08:00
|
|
|
Model: smol,
|
2025-08-16 05:37:54 +08:00
|
|
|
Prompt: "what is the origin of independence day? Be brief but factual in your reply",
|
2024-06-01 05:28:02 +08:00
|
|
|
Stream: &stream,
|
|
|
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
2024-03-31 00:50:05 +08:00
|
|
|
}, {
|
2025-04-17 05:25:55 +08:00
|
|
|
Model: smol,
|
2025-08-16 05:37:54 +08:00
|
|
|
Prompt: "what is the composition of air? Be brief but factual in your reply",
|
2024-06-01 05:28:02 +08:00
|
|
|
Stream: &stream,
|
|
|
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
2024-03-31 00:50:05 +08:00
|
|
|
},
|
|
|
|
},
|
|
|
|
[][]string{
|
2025-09-23 02:23:14 +08:00
|
|
|
{"sunlight", "scatter", "interact", "color", "surface", "depth", "red", "orange", "yellow", "absorb", "wavelength", "water", "molecule"},
|
|
|
|
{"soil", "organic", "earth", "black", "tan", "chemical", "processes", "pigment", "particle", "iron oxide", "rust", "air", "water", "wet", "mixture", "mixing", "mineral", "element", "decomposed", "matter", "wavelength"},
|
2025-10-02 06:12:32 +08:00
|
|
|
rainbowExpected,
|
2024-08-06 07:34:54 +08:00
|
|
|
{"fourth", "july", "declaration", "independence"},
|
2025-09-23 02:23:14 +08:00
|
|
|
{"nitrogen", "oxygen", "carbon", "dioxide", "water", "vapor", "fluid", "particles", "gas"},
|
2024-03-25 07:22:38 +08:00
|
|
|
}
|
2024-03-23 21:24:18 +08:00
|
|
|
}
|
2025-04-17 05:25:55 +08:00
|
|
|
|
2025-08-30 05:20:28 +08:00
|
|
|
func DoChat(ctx context.Context, t *testing.T, client *api.Client, req api.ChatRequest, anyResp []string, initialTimeout, streamTimeout time.Duration) *api.Message {
|
|
|
|
stallTimer := time.NewTimer(initialTimeout)
|
|
|
|
var buf bytes.Buffer
|
|
|
|
role := "assistant"
|
|
|
|
fn := func(response api.ChatResponse) error {
|
|
|
|
// fmt.Print(".")
|
|
|
|
role = response.Message.Role
|
|
|
|
buf.Write([]byte(response.Message.Content))
|
|
|
|
if !stallTimer.Reset(streamTimeout) {
|
|
|
|
return errors.New("stall was detected while streaming response, aborting")
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
stream := true
|
|
|
|
req.Stream = &stream
|
|
|
|
done := make(chan int)
|
|
|
|
var genErr error
|
|
|
|
go func() {
|
|
|
|
genErr = client.Chat(ctx, &req, fn)
|
|
|
|
done <- 0
|
|
|
|
}()
|
|
|
|
|
2025-09-10 00:32:15 +08:00
|
|
|
var response string
|
|
|
|
verify := func() {
|
|
|
|
// Verify the response contains the expected data
|
|
|
|
response = buf.String()
|
|
|
|
atLeastOne := false
|
|
|
|
for _, resp := range anyResp {
|
|
|
|
if strings.Contains(strings.ToLower(response), resp) {
|
|
|
|
atLeastOne = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !atLeastOne {
|
|
|
|
t.Fatalf("%s: none of %v found in \"%s\" -- request was:%v", req.Model, anyResp, response, req.Messages)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2025-08-30 05:20:28 +08:00
|
|
|
select {
|
|
|
|
case <-stallTimer.C:
|
|
|
|
if buf.Len() == 0 {
|
|
|
|
t.Errorf("generate never started. Timed out after :%s", initialTimeout.String())
|
|
|
|
} else {
|
|
|
|
t.Errorf("generate stalled. Response so far:%s", buf.String())
|
|
|
|
}
|
|
|
|
case <-done:
|
|
|
|
if genErr != nil && strings.Contains(genErr.Error(), "model requires more system memory") {
|
|
|
|
slog.Warn("model is too large for the target test system", "model", req.Model, "error", genErr)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
if genErr != nil {
|
|
|
|
t.Fatalf("%s failed with %s request prompt %v", genErr, req.Model, req.Messages)
|
|
|
|
}
|
2025-09-10 00:32:15 +08:00
|
|
|
verify()
|
2025-08-30 05:20:28 +08:00
|
|
|
slog.Info("test pass", "model", req.Model, "messages", req.Messages, "contains", anyResp, "response", response)
|
|
|
|
case <-ctx.Done():
|
2025-09-10 00:32:15 +08:00
|
|
|
// On slow systems, we might timeout before some models finish rambling, so check what we have so far to see
|
|
|
|
// if it's considered a pass - the stallTimer will detect hangs, but we want to consider slow systems a pass
|
|
|
|
// if they are still generating valid responses
|
|
|
|
slog.Warn("outer test context done while waiting for chat")
|
|
|
|
verify()
|
2025-08-30 05:20:28 +08:00
|
|
|
}
|
|
|
|
return &api.Message{Role: role, Content: buf.String()}
|
|
|
|
}
|
|
|
|
|
|
|
|
func ChatRequests() ([]api.ChatRequest, [][]string) {
|
|
|
|
genReqs, results := GenerateRequests()
|
|
|
|
reqs := make([]api.ChatRequest, len(genReqs))
|
|
|
|
// think := api.ThinkValue{Value: "low"}
|
|
|
|
for i := range reqs {
|
|
|
|
reqs[i].Model = genReqs[i].Model
|
|
|
|
reqs[i].Stream = genReqs[i].Stream
|
|
|
|
reqs[i].KeepAlive = genReqs[i].KeepAlive
|
|
|
|
// reqs[i].Think = &think
|
|
|
|
reqs[i].Messages = []api.Message{
|
|
|
|
{
|
|
|
|
Role: "user",
|
|
|
|
Content: genReqs[i].Prompt,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return reqs, results
|
|
|
|
}
|
|
|
|
|
2025-04-17 05:25:55 +08:00
|
|
|
func skipUnderMinVRAM(t *testing.T, gb uint64) {
|
|
|
|
// TODO use info API in the future
|
|
|
|
if s := os.Getenv("OLLAMA_MAX_VRAM"); s != "" {
|
|
|
|
maxVram, err := strconv.ParseUint(s, 10, 64)
|
2025-08-30 05:20:28 +08:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2025-04-17 05:25:55 +08:00
|
|
|
// Don't hammer on small VRAM cards...
|
|
|
|
if maxVram < gb*format.GibiByte {
|
|
|
|
t.Skip("skipping with small VRAM to avoid timeouts")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2025-05-07 02:20:48 +08:00
|
|
|
|
2025-08-30 05:20:28 +08:00
|
|
|
// Skip if the target model isn't X% GPU loaded to avoid excessive runtime
|
|
|
|
func skipIfNotGPULoaded(ctx context.Context, t *testing.T, client *api.Client, model string, minPercent int) {
|
|
|
|
models, err := client.ListRunning(ctx)
|
|
|
|
if err != nil {
|
|
|
|
t.Fatalf("failed to list running models: %s", err)
|
|
|
|
}
|
|
|
|
loaded := []string{}
|
|
|
|
for _, m := range models.Models {
|
|
|
|
loaded = append(loaded, m.Name)
|
|
|
|
if m.Name != model {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
gpuPercent := 0
|
|
|
|
switch {
|
|
|
|
case m.SizeVRAM == 0:
|
|
|
|
gpuPercent = 0
|
|
|
|
case m.SizeVRAM == m.Size:
|
|
|
|
gpuPercent = 100
|
|
|
|
case m.SizeVRAM > m.Size || m.Size == 0:
|
|
|
|
t.Logf("unexpected size detected: %d", m.SizeVRAM)
|
|
|
|
default:
|
|
|
|
sizeCPU := m.Size - m.SizeVRAM
|
|
|
|
cpuPercent := math.Round(float64(sizeCPU) / float64(m.Size) * 110)
|
|
|
|
gpuPercent = int(100 - cpuPercent)
|
|
|
|
}
|
|
|
|
if gpuPercent < minPercent {
|
|
|
|
t.Skip(fmt.Sprintf("test requires minimum %d%% GPU load, but model %s only has %d%%", minPercent, model, gpuPercent))
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
t.Skip(fmt.Sprintf("model %s not loaded - actually loaded: %v", model, loaded))
|
|
|
|
}
|
|
|
|
|
2025-05-07 02:20:48 +08:00
|
|
|
func getTimeouts(t *testing.T) (soft time.Duration, hard time.Duration) {
|
|
|
|
deadline, hasDeadline := t.Deadline()
|
|
|
|
if !hasDeadline {
|
|
|
|
return 8 * time.Minute, 10 * time.Minute
|
|
|
|
} else if deadline.Compare(time.Now().Add(2*time.Minute)) <= 0 {
|
|
|
|
t.Skip("too little time")
|
|
|
|
return time.Duration(0), time.Duration(0)
|
|
|
|
}
|
|
|
|
return -time.Since(deadline.Add(-2 * time.Minute)), -time.Since(deadline.Add(-20 * time.Second))
|
|
|
|
}
|