mirror of https://github.com/ollama/ollama.git
tests: add single threaded history test (#12295)
* tests: add single threaded history test Also tidies up some existing tests to handle more model output variation * test: add support for testing specific architectures
This commit is contained in:
parent
af060eb250
commit
c23e6f4cae
|
@ -12,3 +12,6 @@ The integration tests have 2 modes of operating.
|
||||||
|
|
||||||
> [!IMPORTANT]
|
> [!IMPORTANT]
|
||||||
> Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree `go build .` in addition to GPU support with cmake if applicable on your platform. The integration tests expect to find an ollama binary at the top of the tree.
|
> Before running the tests locally without the "test existing" setting, compile ollama from the top of the source tree `go build .` in addition to GPU support with cmake if applicable on your platform. The integration tests expect to find an ollama binary at the top of the tree.
|
||||||
|
|
||||||
|
|
||||||
|
Many tests use a default small model suitable to run on many systems. You can override this default model by setting `OLLAMA_TEST_DEFAULT_MODEL`
|
|
@ -22,13 +22,12 @@ func TestAPIGenerate(t *testing.T) {
|
||||||
// Set up the test data
|
// Set up the test data
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
Model: smol,
|
Model: smol,
|
||||||
Prompt: "why is the sky blue? be brief",
|
Prompt: blueSkyPrompt,
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"seed": 123,
|
"seed": 123,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
anyResp := []string{"rayleigh", "scattering"}
|
|
||||||
|
|
||||||
client, _, cleanup := InitServerConnection(ctx, t)
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
@ -120,14 +119,14 @@ func TestAPIGenerate(t *testing.T) {
|
||||||
// Verify the response contains the expected data
|
// Verify the response contains the expected data
|
||||||
response := buf.String()
|
response := buf.String()
|
||||||
atLeastOne := false
|
atLeastOne := false
|
||||||
for _, resp := range anyResp {
|
for _, resp := range blueSkyExpected {
|
||||||
if strings.Contains(strings.ToLower(response), resp) {
|
if strings.Contains(strings.ToLower(response), resp) {
|
||||||
atLeastOne = true
|
atLeastOne = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !atLeastOne {
|
if !atLeastOne {
|
||||||
t.Errorf("none of %v found in %s", anyResp, response)
|
t.Errorf("none of %v found in %s", blueSkyExpected, response)
|
||||||
}
|
}
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
t.Error("outer test context done while waiting for generate")
|
t.Error("outer test context done while waiting for generate")
|
||||||
|
@ -181,7 +180,7 @@ func TestAPIChat(t *testing.T) {
|
||||||
Messages: []api.Message{
|
Messages: []api.Message{
|
||||||
{
|
{
|
||||||
Role: "user",
|
Role: "user",
|
||||||
Content: "why is the sky blue? be brief",
|
Content: blueSkyPrompt,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
|
@ -189,7 +188,6 @@ func TestAPIChat(t *testing.T) {
|
||||||
"seed": 123,
|
"seed": 123,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
anyResp := []string{"rayleigh", "scattering"}
|
|
||||||
|
|
||||||
client, _, cleanup := InitServerConnection(ctx, t)
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
@ -279,14 +277,14 @@ func TestAPIChat(t *testing.T) {
|
||||||
// Verify the response contains the expected data
|
// Verify the response contains the expected data
|
||||||
response := buf.String()
|
response := buf.String()
|
||||||
atLeastOne := false
|
atLeastOne := false
|
||||||
for _, resp := range anyResp {
|
for _, resp := range blueSkyExpected {
|
||||||
if strings.Contains(strings.ToLower(response), resp) {
|
if strings.Contains(strings.ToLower(response), resp) {
|
||||||
atLeastOne = true
|
atLeastOne = true
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !atLeastOne {
|
if !atLeastOne {
|
||||||
t.Errorf("none of %v found in %s", anyResp, response)
|
t.Errorf("none of %v found in %s", blueSkyExpected, response)
|
||||||
}
|
}
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
t.Error("outer test context done while waiting for chat")
|
t.Error("outer test context done while waiting for chat")
|
||||||
|
|
|
@ -19,14 +19,14 @@ func TestBlueSky(t *testing.T) {
|
||||||
// Set up the test data
|
// Set up the test data
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
Model: smol,
|
Model: smol,
|
||||||
Prompt: "why is the sky blue?",
|
Prompt: blueSkyPrompt,
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
Options: map[string]any{
|
Options: map[string]any{
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"seed": 123,
|
"seed": 123,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
GenerateTestHelper(ctx, t, req, []string{"rayleigh", "scattering"})
|
GenerateTestHelper(ctx, t, req, blueSkyExpected)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestUnicode(t *testing.T) {
|
func TestUnicode(t *testing.T) {
|
||||||
|
@ -110,12 +110,12 @@ func TestUnicodeModelDir(t *testing.T) {
|
||||||
|
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
Model: smol,
|
Model: smol,
|
||||||
Prompt: "why is the sky blue?",
|
Prompt: blueSkyPrompt,
|
||||||
Stream: &stream,
|
Stream: &stream,
|
||||||
Options: map[string]any{
|
Options: map[string]any{
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"seed": 123,
|
"seed": 123,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
GenerateTestHelper(ctx, t, req, []string{"rayleigh", "scattering"})
|
GenerateTestHelper(ctx, t, req, blueSkyExpected)
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,11 +63,11 @@ func TestContextExhaustion(t *testing.T) {
|
||||||
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
if err := PullIfMissing(ctx, client, req.Model); err != nil {
|
||||||
t.Fatalf("PullIfMissing failed: %v", err)
|
t.Fatalf("PullIfMissing failed: %v", err)
|
||||||
}
|
}
|
||||||
DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water"}, 120*time.Second, 10*time.Second)
|
DoGenerate(ctx, t, client, req, []string{"once", "upon", "lived", "sunny", "cloudy", "clear", "water", "time", "travel", "world"}, 120*time.Second, 10*time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send multiple generate requests with prior context and ensure the response is coherant and expected
|
// Send multiple generate requests with prior context and ensure the response is coherant and expected
|
||||||
func TestGenerateWithHistory(t *testing.T) {
|
func TestParallelGenerateWithHistory(t *testing.T) {
|
||||||
modelOverride := ollamaEngineChatModels[0] // Most recent ollama engine model
|
modelOverride := ollamaEngineChatModels[0] // Most recent ollama engine model
|
||||||
req, resp := GenerateRequests()
|
req, resp := GenerateRequests()
|
||||||
numParallel := 2
|
numParallel := 2
|
||||||
|
@ -113,8 +113,48 @@ func TestGenerateWithHistory(t *testing.T) {
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Send generate requests with prior context and ensure the response is coherant and expected
|
||||||
|
func TestGenerateWithHistory(t *testing.T) {
|
||||||
|
req := api.GenerateRequest{
|
||||||
|
Model: smol,
|
||||||
|
Prompt: rainbowPrompt,
|
||||||
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
|
Options: map[string]any{
|
||||||
|
"num_ctx": 16384,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
softTimeout, hardTimeout := getTimeouts(t)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
|
||||||
|
defer cancel()
|
||||||
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
// Get the server running (if applicable) warm the model up with a single initial request
|
||||||
|
slog.Info("loading", "model", req.Model)
|
||||||
|
err := client.Generate(ctx,
|
||||||
|
&api.GenerateRequest{Model: req.Model, KeepAlive: &api.Duration{Duration: 10 * time.Second}, Options: req.Options},
|
||||||
|
func(response api.GenerateResponse) error { return nil },
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to load model %s: %s", req.Model, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
req.Context = DoGenerate(ctx, t, client, req, rainbowExpected, 30*time.Second, 20*time.Second)
|
||||||
|
|
||||||
|
for i := 0; i < len(rainbowFollowups); i++ {
|
||||||
|
req.Prompt = rainbowFollowups[i]
|
||||||
|
if time.Now().Sub(started) > softTimeout {
|
||||||
|
slog.Info("exceeded soft timeout, winding down test")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
req.Context = DoGenerate(ctx, t, client, req, rainbowExpected, 30*time.Second, 20*time.Second)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Send multiple chat requests with prior context and ensure the response is coherant and expected
|
// Send multiple chat requests with prior context and ensure the response is coherant and expected
|
||||||
func TestChatWithHistory(t *testing.T) {
|
func TestParallelChatWithHistory(t *testing.T) {
|
||||||
modelOverride := ollamaEngineChatModels[0] // Most recent ollama engine model
|
modelOverride := ollamaEngineChatModels[0] // Most recent ollama engine model
|
||||||
req, resp := ChatRequests()
|
req, resp := ChatRequests()
|
||||||
numParallel := 2
|
numParallel := 2
|
||||||
|
@ -164,3 +204,55 @@ func TestChatWithHistory(t *testing.T) {
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Send generate requests with prior context and ensure the response is coherant and expected
|
||||||
|
func TestChatWithHistory(t *testing.T) {
|
||||||
|
req := api.ChatRequest{
|
||||||
|
Model: smol,
|
||||||
|
Stream: &stream,
|
||||||
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
|
Options: map[string]any{
|
||||||
|
"num_ctx": 16384,
|
||||||
|
},
|
||||||
|
Messages: []api.Message{
|
||||||
|
{
|
||||||
|
Role: "user",
|
||||||
|
Content: rainbowPrompt,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
softTimeout, hardTimeout := getTimeouts(t)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
|
||||||
|
defer cancel()
|
||||||
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
// Get the server running (if applicable) warm the model up with a single initial request
|
||||||
|
slog.Info("loading", "model", req.Model)
|
||||||
|
err := client.Generate(ctx,
|
||||||
|
&api.GenerateRequest{Model: req.Model, KeepAlive: &api.Duration{Duration: 10 * time.Second}, Options: req.Options},
|
||||||
|
func(response api.GenerateResponse) error { return nil },
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to load model %s: %s", req.Model, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
assistant := DoChat(ctx, t, client, req, rainbowExpected, 30*time.Second, 20*time.Second)
|
||||||
|
|
||||||
|
for i := 0; i < len(rainbowFollowups); i++ {
|
||||||
|
if time.Now().Sub(started) > softTimeout {
|
||||||
|
slog.Info("exceeded soft timeout, winding down test")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
req.Messages = append(req.Messages,
|
||||||
|
*assistant,
|
||||||
|
api.Message{Role: "user", Content: rainbowFollowups[i]},
|
||||||
|
)
|
||||||
|
|
||||||
|
assistant = DoChat(ctx, t, client, req, rainbowExpected, 30*time.Second, 20*time.Second)
|
||||||
|
if assistant == nil {
|
||||||
|
t.Fatalf("didn't get an assistant response for context")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -4,7 +4,9 @@ package integration
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -20,6 +22,7 @@ func TestLibraryModelsGenerate(t *testing.T) {
|
||||||
defer cancel()
|
defer cancel()
|
||||||
client, _, cleanup := InitServerConnection(ctx, t)
|
client, _, cleanup := InitServerConnection(ctx, t)
|
||||||
defer cleanup()
|
defer cleanup()
|
||||||
|
targetArch := os.Getenv("OLLAMA_TEST_ARCHITECTURE")
|
||||||
|
|
||||||
chatModels := libraryChatModels
|
chatModels := libraryChatModels
|
||||||
for _, model := range chatModels {
|
for _, model := range chatModels {
|
||||||
|
@ -30,16 +33,26 @@ func TestLibraryModelsGenerate(t *testing.T) {
|
||||||
if err := PullIfMissing(ctx, client, model); err != nil {
|
if err := PullIfMissing(ctx, client, model); err != nil {
|
||||||
t.Fatalf("pull failed %s", err)
|
t.Fatalf("pull failed %s", err)
|
||||||
}
|
}
|
||||||
|
if targetArch != "" {
|
||||||
|
resp, err := client.Show(ctx, &api.ShowRequest{Name: model})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unable to show model: %s", err)
|
||||||
|
}
|
||||||
|
arch := resp.ModelInfo["general.architecture"].(string)
|
||||||
|
if arch != targetArch {
|
||||||
|
t.Skip(fmt.Sprintf("Skipping %s architecture %s != %s", model, arch, targetArch))
|
||||||
|
}
|
||||||
|
}
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
Model: model,
|
Model: model,
|
||||||
Prompt: "why is the sky blue?",
|
Prompt: blueSkyPrompt,
|
||||||
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
KeepAlive: &api.Duration{Duration: 10 * time.Second},
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
"seed": 123,
|
"seed": 123,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
anyResp := []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength"}
|
anyResp := blueSkyExpected
|
||||||
// Special cases
|
// Special cases
|
||||||
if model == "duckdb-nsql" {
|
if model == "duckdb-nsql" {
|
||||||
anyResp = []string{"select", "from"}
|
anyResp = []string{"select", "from"}
|
||||||
|
|
|
@ -68,14 +68,13 @@ func TestModelsGenerate(t *testing.T) {
|
||||||
// TODO - fiddle with context size
|
// TODO - fiddle with context size
|
||||||
req := api.GenerateRequest{
|
req := api.GenerateRequest{
|
||||||
Model: model,
|
Model: model,
|
||||||
Prompt: "why is the sky blue?",
|
Prompt: blueSkyPrompt,
|
||||||
Options: map[string]interface{}{
|
Options: map[string]interface{}{
|
||||||
"temperature": 0,
|
"temperature": 0,
|
||||||
"seed": 123,
|
"seed": 123,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
anyResp := []string{"rayleigh", "scattering", "atmosphere", "nitrogen", "oxygen"}
|
DoGenerate(ctx, t, client, req, blueSkyExpected, 120*time.Second, 30*time.Second)
|
||||||
DoGenerate(ctx, t, client, req, anyResp, 120*time.Second, 30*time.Second)
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,6 +40,18 @@ var (
|
||||||
// cat int.log | grep MODEL_PERF_HEADER | head -1| cut -f2- -d: > perf.csv
|
// cat int.log | grep MODEL_PERF_HEADER | head -1| cut -f2- -d: > perf.csv
|
||||||
// cat int.log | grep MODEL_PERF_DATA | cut -f2- -d: >> perf.csv
|
// cat int.log | grep MODEL_PERF_DATA | cut -f2- -d: >> perf.csv
|
||||||
func TestModelsPerf(t *testing.T) {
|
func TestModelsPerf(t *testing.T) {
|
||||||
|
if s := os.Getenv("OLLAMA_NEW_ENGINE"); s != "" {
|
||||||
|
doModelPerfTest(t, ollamaEngineChatModels)
|
||||||
|
} else {
|
||||||
|
doModelPerfTest(t, append(ollamaEngineChatModels, llamaRunnerChatModels...))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLibraryModelsPerf(t *testing.T) {
|
||||||
|
doModelPerfTest(t, libraryChatModels)
|
||||||
|
}
|
||||||
|
|
||||||
|
func doModelPerfTest(t *testing.T, chatModels []string) {
|
||||||
softTimeout, hardTimeout := getTimeouts(t)
|
softTimeout, hardTimeout := getTimeouts(t)
|
||||||
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
|
slog.Info("Setting timeouts", "soft", softTimeout, "hard", hardTimeout)
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
|
ctx, cancel := context.WithTimeout(context.Background(), hardTimeout)
|
||||||
|
@ -65,14 +77,12 @@ func TestModelsPerf(t *testing.T) {
|
||||||
}
|
}
|
||||||
longPrompt := "summarize the following: " + string(data)
|
longPrompt := "summarize the following: " + string(data)
|
||||||
|
|
||||||
var chatModels []string
|
targetArch := os.Getenv("OLLAMA_TEST_ARCHITECTURE")
|
||||||
if s := os.Getenv("OLLAMA_NEW_ENGINE"); s != "" {
|
|
||||||
chatModels = ollamaEngineChatModels
|
|
||||||
} else {
|
|
||||||
chatModels = append(ollamaEngineChatModels, llamaRunnerChatModels...)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, model := range chatModels {
|
for _, model := range chatModels {
|
||||||
|
if !strings.Contains(model, ":") {
|
||||||
|
model = model + ":latest"
|
||||||
|
}
|
||||||
t.Run(model, func(t *testing.T) {
|
t.Run(model, func(t *testing.T) {
|
||||||
if time.Now().Sub(started) > softTimeout {
|
if time.Now().Sub(started) > softTimeout {
|
||||||
t.Skip("skipping remaining tests to avoid excessive runtime")
|
t.Skip("skipping remaining tests to avoid excessive runtime")
|
||||||
|
@ -88,6 +98,9 @@ func TestModelsPerf(t *testing.T) {
|
||||||
}
|
}
|
||||||
arch := resp.ModelInfo["general.architecture"].(string)
|
arch := resp.ModelInfo["general.architecture"].(string)
|
||||||
maxContext = int(resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))
|
maxContext = int(resp.ModelInfo[fmt.Sprintf("%s.context_length", arch)].(float64))
|
||||||
|
if targetArch != "" && arch != targetArch {
|
||||||
|
t.Skip(fmt.Sprintf("Skipping %s architecture %s != %s", model, arch, targetArch))
|
||||||
|
}
|
||||||
|
|
||||||
if maxVram > 0 {
|
if maxVram > 0 {
|
||||||
resp, err := client.List(ctx)
|
resp, err := client.List(ctx)
|
||||||
|
@ -151,8 +164,8 @@ func TestModelsPerf(t *testing.T) {
|
||||||
prompt string
|
prompt string
|
||||||
anyResp []string
|
anyResp []string
|
||||||
}{
|
}{
|
||||||
{"why is the sky blue?", []string{"rayleigh", "scattering", "atmosphere", "nitrogen", "oxygen"}},
|
{blueSkyPrompt, blueSkyExpected},
|
||||||
{maxPrompt, []string{"shakespeare", "oppression", "sorrows", "gutenberg", "child", "license", "sonnet", "melancholy"}},
|
{maxPrompt, []string{"shakespeare", "oppression", "sorrows", "gutenberg", "child", "license", "sonnet", "melancholy", "love", "sorrow", "beauty"}},
|
||||||
}
|
}
|
||||||
var gpuPercent int
|
var gpuPercent int
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
|
@ -241,11 +254,12 @@ func TestModelsPerf(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// Round the logged prompt count for comparisons across versions/configurations which can vary slightly
|
||||||
fmt.Fprintf(os.Stderr, "MODEL_PERF_HEADER:%s,%s,%s,%s,%s,%s,%s\n",
|
fmt.Fprintf(os.Stderr, "MODEL_PERF_HEADER:%s,%s,%s,%s,%s,%s,%s\n",
|
||||||
"MODEL",
|
"MODEL",
|
||||||
"CONTEXT",
|
"CONTEXT",
|
||||||
"GPU PERCENT",
|
"GPU PERCENT",
|
||||||
"PROMPT COUNT",
|
"APPROX PROMPT COUNT",
|
||||||
"LOAD TIME",
|
"LOAD TIME",
|
||||||
"PROMPT EVAL TPS",
|
"PROMPT EVAL TPS",
|
||||||
"EVAL TPS",
|
"EVAL TPS",
|
||||||
|
@ -254,7 +268,7 @@ func TestModelsPerf(t *testing.T) {
|
||||||
model,
|
model,
|
||||||
numCtx,
|
numCtx,
|
||||||
gpuPercent,
|
gpuPercent,
|
||||||
resp.PromptEvalCount,
|
(resp.PromptEvalCount/10)*10,
|
||||||
float64(resp.LoadDuration)/1000000000.0,
|
float64(resp.LoadDuration)/1000000000.0,
|
||||||
float64(resp.PromptEvalCount)/(float64(resp.PromptEvalDuration)/1000000000.0),
|
float64(resp.PromptEvalCount)/(float64(resp.PromptEvalDuration)/1000000000.0),
|
||||||
float64(resp.EvalCount)/(float64(resp.EvalDuration)/1000000000.0),
|
float64(resp.EvalCount)/(float64(resp.EvalDuration)/1000000000.0),
|
||||||
|
|
|
@ -76,7 +76,7 @@ func TestQuantization(t *testing.T) {
|
||||||
stream := true
|
stream := true
|
||||||
genReq := api.GenerateRequest{
|
genReq := api.GenerateRequest{
|
||||||
Model: newName,
|
Model: newName,
|
||||||
Prompt: "why is the sky blue?",
|
Prompt: blueSkyPrompt,
|
||||||
KeepAlive: &api.Duration{Duration: 3 * time.Second},
|
KeepAlive: &api.Duration{Duration: 3 * time.Second},
|
||||||
Options: map[string]any{
|
Options: map[string]any{
|
||||||
"seed": 42,
|
"seed": 42,
|
||||||
|
@ -88,14 +88,13 @@ func TestQuantization(t *testing.T) {
|
||||||
|
|
||||||
// Some smaller quantizations can cause models to have poor quality
|
// Some smaller quantizations can cause models to have poor quality
|
||||||
// or get stuck in repetition loops, so we stop as soon as we have any matches
|
// or get stuck in repetition loops, so we stop as soon as we have any matches
|
||||||
anyResp := []string{"rayleigh", "scattering", "day", "sun", "moon", "color", "nitrogen", "oxygen"}
|
|
||||||
reqCtx, reqCancel := context.WithCancel(ctx)
|
reqCtx, reqCancel := context.WithCancel(ctx)
|
||||||
atLeastOne := false
|
atLeastOne := false
|
||||||
var buf bytes.Buffer
|
var buf bytes.Buffer
|
||||||
genfn := func(response api.GenerateResponse) error {
|
genfn := func(response api.GenerateResponse) error {
|
||||||
buf.Write([]byte(response.Response))
|
buf.Write([]byte(response.Response))
|
||||||
fullResp := strings.ToLower(buf.String())
|
fullResp := strings.ToLower(buf.String())
|
||||||
for _, resp := range anyResp {
|
for _, resp := range blueSkyExpected {
|
||||||
if strings.Contains(fullResp, resp) {
|
if strings.Contains(fullResp, resp) {
|
||||||
atLeastOne = true
|
atLeastOne = true
|
||||||
t.Log(fullResp)
|
t.Log(fullResp)
|
||||||
|
|
|
@ -256,13 +256,29 @@ var (
|
||||||
"snowflake-arctic-embed",
|
"snowflake-arctic-embed",
|
||||||
"snowflake-arctic-embed2",
|
"snowflake-arctic-embed2",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
blueSkyPrompt = "why is the sky blue? Be brief but factual in your reply"
|
||||||
|
blueSkyExpected = []string{"rayleigh", "scatter", "atmosphere", "nitrogen", "oxygen", "wavelength", "interact"}
|
||||||
|
|
||||||
|
rainbowPrompt = "how do rainbows form? Be brief but factual in your reply"
|
||||||
|
rainbowFollowups = []string{
|
||||||
|
"Explain the physics involved in them. Be breif in your reply",
|
||||||
|
"Explain the chemistry involved in them. Be breif in your reply",
|
||||||
|
"Explain the quantum mechanics involved in them. Be breif in your reply",
|
||||||
|
"What are common myths related to them? Be brief in your reply",
|
||||||
|
"What are common fairytales related to them? Be brief in your reply",
|
||||||
|
"Can they form if there is no rain? Be breif in your reply",
|
||||||
|
"Can they form if there are no clouds? Be breif in your reply",
|
||||||
|
"Do they happen on other planets? Be brief in your reply",
|
||||||
|
}
|
||||||
|
rainbowExpected = []string{"water", "droplet", "mist", "glow", "refracted", "reflect", "color", "spectrum", "frequency", "end", "gold", "fortune", "blessing", "prosperity"}
|
||||||
)
|
)
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
lifecycle.InitLogging()
|
lifecycle.InitLogging()
|
||||||
custom := os.Getenv("OLLAMA_TEST_SMOL_MODEL")
|
custom := os.Getenv("OLLAMA_TEST_DEFAULT_MODEL")
|
||||||
if custom != "" {
|
if custom != "" {
|
||||||
slog.Info("setting smol test model to " + custom)
|
slog.Info("setting default test model to " + custom)
|
||||||
smol = custom
|
smol = custom
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -577,11 +593,11 @@ func GenerateRequests() ([]api.GenerateRequest, [][]string) {
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
[][]string{
|
[][]string{
|
||||||
{"sunlight", "scattering", "interact", "color", "surface", "depth", "red", "orange", "yellow", "absorbs", "wavelength"},
|
{"sunlight", "scatter", "interact", "color", "surface", "depth", "red", "orange", "yellow", "absorb", "wavelength", "water", "molecule"},
|
||||||
{"soil", "organic", "earth", "black", "tan", "chemical", "processes", "pigments", "particles", "iron oxide", "rust", "air", "water", "mixture", "mixing"},
|
{"soil", "organic", "earth", "black", "tan", "chemical", "processes", "pigment", "particle", "iron oxide", "rust", "air", "water", "wet", "mixture", "mixing", "mineral", "element", "decomposed", "matter", "wavelength"},
|
||||||
{"water", "droplet", "refracted", "reflect", "color", "spectrum"},
|
{"water", "droplet", "refract", "reflect", "color", "spectrum", "raindrop"},
|
||||||
{"fourth", "july", "declaration", "independence"},
|
{"fourth", "july", "declaration", "independence"},
|
||||||
{"nitrogen", "oxygen", "carbon", "dioxide", "water", "vapor"},
|
{"nitrogen", "oxygen", "carbon", "dioxide", "water", "vapor", "fluid", "particles", "gas"},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue