| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | package server | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"bytes" | 
					
						
							|  |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 	"errors" | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	"log/slog" | 
					
						
							|  |  |  | 	"os" | 
					
						
							|  |  |  | 	"testing" | 
					
						
							|  |  |  | 	"time" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 	"github.com/stretchr/testify/require" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	"github.com/ollama/ollama/api" | 
					
						
							|  |  |  | 	"github.com/ollama/ollama/app/lifecycle" | 
					
						
							|  |  |  | 	"github.com/ollama/ollama/format" | 
					
						
							|  |  |  | 	"github.com/ollama/ollama/gpu" | 
					
						
							|  |  |  | 	"github.com/ollama/ollama/llm" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | func TestMain(m *testing.M) { | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	os.Setenv("OLLAMA_DEBUG", "1") | 
					
						
							|  |  |  | 	lifecycle.InitLogging() | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 	os.Exit(m.Run()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestInitScheduler(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithCancel(context.Background()) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.NotNil(t, s.loaded) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestLoad(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-05-04 00:08:33 +08:00
										 |  |  | 	ctx, done := context.WithTimeout(context.Background(), 20*time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	defer done() | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							| 
									
										
										
										
											2024-04-25 07:37:03 +08:00
										 |  |  | 	var ggml *llm.GGML // value not used in tests
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	req := &LlmRequest{ | 
					
						
							|  |  |  | 		ctx:             ctx, | 
					
						
							|  |  |  | 		model:           &Model{ModelPath: "foo"}, | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		opts:            api.DefaultOptions(), | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		successCh:       make(chan *runnerRef, 1), | 
					
						
							|  |  |  | 		errCh:           make(chan error, 1), | 
					
						
							| 
									
										
										
										
											2024-07-03 06:12:43 +08:00
										 |  |  | 		sessionDuration: &api.Duration{Duration: 2 * time.Second}, | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	// Fail to load model first
 | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) { | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 		return nil, errors.New("something failed to load model blah") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	gpus := gpu.GpuInfoList{} | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	s.load(req, ggml, gpus, 0) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.Empty(t, req.successCh) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, req.errCh, 1) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.Empty(t, s.loaded) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	err := <-req.errCh | 
					
						
							|  |  |  | 	require.Contains(t, err.Error(), "this model may be incompatible") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	server := &mockLlm{estimatedVRAM: 10, estimatedVRAMByGPU: map[string]uint64{}} | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) { | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		return server, nil | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	s.load(req, ggml, gpus, 0) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	select { | 
					
						
							|  |  |  | 	case err := <-req.errCh: | 
					
						
							|  |  |  | 		require.NoError(t, err) | 
					
						
							|  |  |  | 	case resp := <-req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, uint64(10), resp.estimatedVRAM) | 
					
						
							|  |  |  | 		require.Equal(t, uint(1), resp.refCount) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		require.Len(t, s.loaded, 1) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	req.model.ModelPath = "dummy_model_path" | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 	server.waitResp = errors.New("wait failure") | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	s.load(req, ggml, gpus, 0) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	select { | 
					
						
							|  |  |  | 	case err := <-req.errCh: | 
					
						
							|  |  |  | 		require.Contains(t, err.Error(), "wait failure") | 
					
						
							|  |  |  | 	case resp := <-req.successCh: | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatalf("unexpected success %v", resp) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	runner := s.loaded["dummy_model_path"] | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.NotNil(t, runner) | 
					
						
							|  |  |  | 	require.Equal(t, uint(0), runner.refCount) | 
					
						
							| 
									
										
										
										
											2024-04-24 04:07:16 +08:00
										 |  |  | 	time.Sleep(1 * time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, s.expiredCh, 1) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | type reqBundle struct { | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	ctx     context.Context //nolint:containedctx
 | 
					
						
							|  |  |  | 	ctxDone func() | 
					
						
							|  |  |  | 	srv     *mockLlm | 
					
						
							|  |  |  | 	req     *LlmRequest | 
					
						
							| 
									
										
										
										
											2024-04-25 07:17:24 +08:00
										 |  |  | 	ggml    *llm.GGML | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | func (scenario *reqBundle) newServer(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) { | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	return scenario.srv, nil | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, estimatedVRAM uint64, duration *api.Duration) *reqBundle { | 
					
						
							|  |  |  | 	b := &reqBundle{} | 
					
						
							|  |  |  | 	b.ctx, b.ctxDone = context.WithCancel(ctx) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	t.Helper() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	f, err := os.CreateTemp(t.TempDir(), modelName) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.NoError(t, err) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	defer f.Close() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-01 11:00:49 +08:00
										 |  |  | 	require.NoError(t, llm.WriteGGUF(f, llm.KV{ | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		"general.architecture":          "llama", | 
					
						
							|  |  |  | 		"general.name":                  "name", | 
					
						
							|  |  |  | 		"llama.context_length":          uint32(32), | 
					
						
							|  |  |  | 		"llama.embedding_length":        uint32(4096), | 
					
						
							|  |  |  | 		"llama.block_count":             uint32(1), | 
					
						
							|  |  |  | 		"llama.attention.head_count":    uint32(32), | 
					
						
							|  |  |  | 		"llama.attention.head_count_kv": uint32(32), | 
					
						
							|  |  |  | 		"tokenizer.ggml.tokens":         []string{" "}, | 
					
						
							|  |  |  | 		"tokenizer.ggml.scores":         []float32{0}, | 
					
						
							|  |  |  | 		"tokenizer.ggml.token_type":     []int32{0}, | 
					
						
							| 
									
										
										
										
											2024-07-09 07:59:48 +08:00
										 |  |  | 	}, []llm.Tensor{ | 
					
						
							| 
									
										
										
										
											2024-06-25 12:47:52 +08:00
										 |  |  | 		{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, | 
					
						
							|  |  |  | 		{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))}, | 
					
						
							| 
									
										
										
										
											2024-06-01 11:00:49 +08:00
										 |  |  | 	})) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.NoError(t, err) | 
					
						
							| 
									
										
										
										
											2024-04-25 07:17:24 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	fname := f.Name() | 
					
						
							|  |  |  | 	model := &Model{Name: modelName, ModelPath: fname} | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	b.ggml, err = llm.LoadModel(model.ModelPath, 0) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.NoError(t, err) | 
					
						
							| 
									
										
										
										
											2024-04-25 07:17:24 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	if duration == nil { | 
					
						
							|  |  |  | 		duration = &api.Duration{Duration: 5 * time.Millisecond} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	b.req = &LlmRequest{ | 
					
						
							|  |  |  | 		ctx:             b.ctx, | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		model:           model, | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		opts:            api.DefaultOptions(), | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		sessionDuration: duration, | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		successCh:       make(chan *runnerRef, 1), | 
					
						
							|  |  |  | 		errCh:           make(chan error, 1), | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	b.srv = &mockLlm{estimatedVRAM: estimatedVRAM, estimatedVRAMByGPU: map[string]uint64{"": estimatedVRAM}} | 
					
						
							|  |  |  | 	return b | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | func getGpuFn() gpu.GpuInfoList { | 
					
						
							|  |  |  | 	g := gpu.GpuInfo{Library: "metal"} | 
					
						
							|  |  |  | 	g.TotalMemory = 24 * format.GigaByte | 
					
						
							|  |  |  | 	g.FreeMemory = 12 * format.GigaByte | 
					
						
							|  |  |  | 	return []gpu.GpuInfo{g} | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | func getCpuFn() gpu.GpuInfoList { | 
					
						
							|  |  |  | 	g := gpu.GpuInfo{Library: "cpu"} | 
					
						
							|  |  |  | 	g.TotalMemory = 32 * format.GigaByte | 
					
						
							|  |  |  | 	g.FreeMemory = 26 * format.GigaByte | 
					
						
							|  |  |  | 	return []gpu.GpuInfo{g} | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | func TestRequestsSameModelSameRequest(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	s := InitScheduler(ctx) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.getGpuFn = getGpuFn | 
					
						
							|  |  |  | 	s.getCpuFn = getCpuFn | 
					
						
							|  |  |  | 	a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond}) | 
					
						
							|  |  |  | 	b := newScenarioRequest(t, ctx, "ollama-model-1", 11, &api.Duration{Duration: 0}) | 
					
						
							|  |  |  | 	b.req.model = a.req.model | 
					
						
							|  |  |  | 	b.ggml = a.ggml | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	s.newServerFn = a.newServer | 
					
						
							|  |  |  | 	slog.Info("a") | 
					
						
							|  |  |  | 	s.pendingReqCh <- a.req | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, s.pendingReqCh, 1) | 
					
						
							|  |  |  | 	s.Run(ctx) | 
					
						
							|  |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case resp := <-a.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, a.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Empty(t, a.req.errCh) | 
					
						
							|  |  |  | 	case err := <-a.req.errCh: | 
					
						
							| 
									
										
										
										
											2024-07-04 05:47:42 +08:00
										 |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Same runner as first request due to not needing a reload
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.newServerFn = b.newServer | 
					
						
							|  |  |  | 	slog.Info("b") | 
					
						
							|  |  |  | 	s.pendingReqCh <- b.req | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case resp := <-b.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, a.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Empty(t, b.req.errCh) | 
					
						
							|  |  |  | 	case err := <-b.req.errCh: | 
					
						
							|  |  |  | 		t.Fatal(err.Error()) | 
					
						
							|  |  |  | 	case <-ctx.Done(): | 
					
						
							|  |  |  | 		t.Fatal("timeout") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestRequestsSimpleReloadSameModel(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 	s.getGpuFn = getGpuFn | 
					
						
							|  |  |  | 	s.getCpuFn = getCpuFn | 
					
						
							|  |  |  | 	a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond}) | 
					
						
							|  |  |  | 	b := newScenarioRequest(t, ctx, "ollama-model-1", 20, &api.Duration{Duration: 5 * time.Millisecond}) | 
					
						
							|  |  |  | 	tmpModel := *a.req.model | 
					
						
							|  |  |  | 	b.req.model = &tmpModel | 
					
						
							|  |  |  | 	b.ggml = a.ggml | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	s.newServerFn = a.newServer | 
					
						
							|  |  |  | 	slog.Info("a") | 
					
						
							|  |  |  | 	s.pendingReqCh <- a.req | 
					
						
							|  |  |  | 	require.Len(t, s.pendingReqCh, 1) | 
					
						
							|  |  |  | 	s.Run(ctx) | 
					
						
							|  |  |  | 	select { | 
					
						
							|  |  |  | 	case resp := <-a.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, a.srv) | 
					
						
							|  |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							|  |  |  | 		require.Empty(t, a.req.errCh) | 
					
						
							|  |  |  | 	case err := <-a.req.errCh: | 
					
						
							| 
									
										
										
										
											2024-07-04 05:47:42 +08:00
										 |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Trigger a reload
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.newServerFn = b.newServer | 
					
						
							|  |  |  | 	b.req.model.AdapterPaths = []string{"new"} | 
					
						
							|  |  |  | 	slog.Info("b") | 
					
						
							|  |  |  | 	s.pendingReqCh <- b.req | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	// finish first two requests, so model can reload
 | 
					
						
							|  |  |  | 	time.Sleep(1 * time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	a.ctxDone() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case resp := <-b.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, b.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Empty(t, b.req.errCh) | 
					
						
							|  |  |  | 	case err := <-b.req.errCh: | 
					
						
							| 
									
										
										
										
											2024-07-04 05:47:42 +08:00
										 |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestRequestsMultipleLoadedModels(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 	s.getGpuFn = getGpuFn | 
					
						
							|  |  |  | 	s.getCpuFn = getCpuFn | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Multiple loaded models
 | 
					
						
							|  |  |  | 	a := newScenarioRequest(t, ctx, "ollama-model-3a", 1*format.GigaByte, nil) | 
					
						
							|  |  |  | 	b := newScenarioRequest(t, ctx, "ollama-model-3b", 24*format.GigaByte, nil) | 
					
						
							|  |  |  | 	c := newScenarioRequest(t, ctx, "ollama-model-4a", 30, nil) | 
					
						
							|  |  |  | 	c.req.opts.NumGPU = 0                                       // CPU load, will be allowed
 | 
					
						
							|  |  |  | 	d := newScenarioRequest(t, ctx, "ollama-model-3c", 30, nil) // Needs prior unloaded
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 10:41:17 +08:00
										 |  |  | 	t.Setenv("OLLAMA_MAX_LOADED_MODELS", "1") | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.newServerFn = a.newServer | 
					
						
							|  |  |  | 	slog.Info("a") | 
					
						
							|  |  |  | 	s.pendingReqCh <- a.req | 
					
						
							|  |  |  | 	s.Run(ctx) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case resp := <-a.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, a.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Empty(t, a.req.errCh) | 
					
						
							|  |  |  | 	case err := <-a.req.errCh: | 
					
						
							| 
									
										
										
										
											2024-07-04 05:47:42 +08:00
										 |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, s.loaded, 1) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-04 10:41:17 +08:00
										 |  |  | 	t.Setenv("OLLAMA_MAX_LOADED_MODELS", "0") | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.newServerFn = b.newServer | 
					
						
							|  |  |  | 	slog.Info("b") | 
					
						
							|  |  |  | 	s.pendingReqCh <- b.req | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case resp := <-b.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, b.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Empty(t, b.req.errCh) | 
					
						
							|  |  |  | 	case err := <-b.req.errCh: | 
					
						
							| 
									
										
										
										
											2024-07-04 05:47:42 +08:00
										 |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, s.loaded, 2) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	// This is a CPU load with NumGPU = 0 so it should load
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.newServerFn = c.newServer | 
					
						
							|  |  |  | 	slog.Info("c") | 
					
						
							|  |  |  | 	s.pendingReqCh <- c.req | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case resp := <-c.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, c.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Empty(t, c.req.errCh) | 
					
						
							|  |  |  | 	case err := <-c.req.errCh: | 
					
						
							| 
									
										
										
										
											2024-07-04 05:47:42 +08:00
										 |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	s.loadedMu.Lock() | 
					
						
							|  |  |  | 	require.Len(t, s.loaded, 3) | 
					
						
							|  |  |  | 	s.loadedMu.Unlock() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Try to load a model that wont fit
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.newServerFn = d.newServer | 
					
						
							|  |  |  | 	slog.Info("d") | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							|  |  |  | 	require.Len(t, s.loaded, 3) | 
					
						
							|  |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	a.ctxDone() // Won't help since this one isn't big enough to make room
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	time.Sleep(2 * time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s.pendingReqCh <- d.req | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	// finish prior request, so new model can load
 | 
					
						
							|  |  |  | 	time.Sleep(6 * time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							|  |  |  | 	require.Len(t, s.loaded, 2) | 
					
						
							|  |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	b.ctxDone() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	select { | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case resp := <-d.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, d.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Empty(t, d.req.errCh) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							|  |  |  | 	require.Len(t, s.loaded, 2) | 
					
						
							|  |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | func TestGetRunner(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	a := newScenarioRequest(t, ctx, "ollama-model-1a", 10, &api.Duration{Duration: 2 * time.Millisecond}) | 
					
						
							|  |  |  | 	b := newScenarioRequest(t, ctx, "ollama-model-1b", 10, &api.Duration{Duration: 2 * time.Millisecond}) | 
					
						
							|  |  |  | 	c := newScenarioRequest(t, ctx, "ollama-model-1c", 10, &api.Duration{Duration: 2 * time.Millisecond}) | 
					
						
							| 
									
										
										
										
											2024-07-04 10:41:17 +08:00
										 |  |  | 	t.Setenv("OLLAMA_MAX_QUEUE", "1") | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 	s.getGpuFn = getGpuFn | 
					
						
							|  |  |  | 	s.getCpuFn = getCpuFn | 
					
						
							|  |  |  | 	s.newServerFn = a.newServer | 
					
						
							|  |  |  | 	slog.Info("a") | 
					
						
							|  |  |  | 	successCh1a, errCh1a := s.GetRunner(a.ctx, a.req.model, a.req.opts, a.req.sessionDuration) | 
					
						
							|  |  |  | 	require.Len(t, s.pendingReqCh, 1) | 
					
						
							|  |  |  | 	slog.Info("b") | 
					
						
							|  |  |  | 	successCh1b, errCh1b := s.GetRunner(b.ctx, b.req.model, b.req.opts, b.req.sessionDuration) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, s.pendingReqCh, 1) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.Empty(t, successCh1b) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, errCh1b, 1) | 
					
						
							|  |  |  | 	err := <-errCh1b | 
					
						
							|  |  |  | 	require.Contains(t, err.Error(), "server busy") | 
					
						
							|  |  |  | 	s.Run(ctx) | 
					
						
							|  |  |  | 	select { | 
					
						
							|  |  |  | 	case resp := <-successCh1a: | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 		require.Equal(t, resp.llama, a.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							|  |  |  | 		require.Empty(t, errCh1a) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case err := <-errCh1a: | 
					
						
							|  |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	a.ctxDone() // Set "a" model to idle so it can unload
 | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, s.loaded, 1) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	c.req.model.ModelPath = "bad path" | 
					
						
							|  |  |  | 	slog.Info("c") | 
					
						
							|  |  |  | 	successCh1c, errCh1c := s.GetRunner(c.ctx, c.req.model, c.req.opts, c.req.sessionDuration) | 
					
						
							| 
									
										
										
										
											2024-05-07 05:15:37 +08:00
										 |  |  | 	// Starts in pending channel, then should be quickly processsed to return an error
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	time.Sleep(20 * time.Millisecond) // Long enough for the "a" model to expire and unload
 | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.Empty(t, successCh1c) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.Empty(t, s.loaded) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Len(t, errCh1c, 1) | 
					
						
							|  |  |  | 	err = <-errCh1c | 
					
						
							|  |  |  | 	require.Contains(t, err.Error(), "bad path") | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	b.ctxDone() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // TODO - add one scenario that triggers the bogus finished event with positive ref count
 | 
					
						
							|  |  |  | func TestPrematureExpired(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-05-04 00:08:33 +08:00
										 |  |  | 	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	defer done() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	// Same model, same request
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	scenario1a := newScenarioRequest(t, ctx, "ollama-model-1a", 10, nil) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 	s.getGpuFn = func() gpu.GpuInfoList { | 
					
						
							|  |  |  | 		g := gpu.GpuInfo{Library: "metal"} | 
					
						
							|  |  |  | 		g.TotalMemory = 24 * format.GigaByte | 
					
						
							|  |  |  | 		g.FreeMemory = 12 * format.GigaByte | 
					
						
							|  |  |  | 		return []gpu.GpuInfo{g} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	s.newServerFn = scenario1a.newServer | 
					
						
							|  |  |  | 	successCh1a, errCh1a := s.GetRunner(scenario1a.ctx, scenario1a.req.model, scenario1a.req.opts, scenario1a.req.sessionDuration) | 
					
						
							|  |  |  | 	require.Len(t, s.pendingReqCh, 1) | 
					
						
							|  |  |  | 	s.Run(ctx) | 
					
						
							|  |  |  | 	select { | 
					
						
							|  |  |  | 	case resp := <-successCh1a: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, scenario1a.srv) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							|  |  |  | 		require.Empty(t, errCh1a) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		require.Len(t, s.loaded, 1) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		slog.Info("sending premature expired event now") | 
					
						
							|  |  |  | 		s.expiredCh <- resp // Shouldn't happen in real life, but make sure its safe
 | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case err := <-errCh1a: | 
					
						
							|  |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							| 
									
										
										
										
											2024-07-03 06:12:43 +08:00
										 |  |  | 	time.Sleep(scenario1a.req.sessionDuration.Duration) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	scenario1a.ctxDone() | 
					
						
							|  |  |  | 	time.Sleep(20 * time.Millisecond) | 
					
						
							|  |  |  | 	require.LessOrEqual(t, len(s.finishedReqCh), 1) | 
					
						
							|  |  |  | 	time.Sleep(10 * time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.Empty(t, s.finishedReqCh) | 
					
						
							| 
									
										
										
										
											2024-04-25 07:17:24 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-05-22 13:21:04 +08:00
										 |  |  | 	require.Empty(t, s.loaded) | 
					
						
							| 
									
										
										
										
											2024-04-25 07:17:24 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	// also shouldn't happen in real life
 | 
					
						
							|  |  |  | 	s.finishedReqCh <- scenario1a.req | 
					
						
							|  |  |  | 	time.Sleep(5 * time.Millisecond) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestUseLoadedRunner(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-05-04 00:08:33 +08:00
										 |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	req := &LlmRequest{ | 
					
						
							|  |  |  | 		ctx:             ctx, | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		opts:            api.DefaultOptions(), | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 		successCh:       make(chan *runnerRef, 1), | 
					
						
							| 
									
										
										
										
											2024-07-03 06:12:43 +08:00
										 |  |  | 		sessionDuration: &api.Duration{Duration: 2}, | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	finished := make(chan *LlmRequest) | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	llm1 := &mockLlm{estimatedVRAMByGPU: map[string]uint64{}} | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	r1 := &runnerRef{llama: llm1, sessionDuration: 1, numParallel: 1} | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	req.useLoadedRunner(r1, finished) | 
					
						
							|  |  |  | 	require.Equal(t, uint(1), r1.refCount) | 
					
						
							|  |  |  | 	require.Equal(t, time.Duration(2), r1.sessionDuration) | 
					
						
							|  |  |  | 	select { | 
					
						
							|  |  |  | 	case success := <-req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, r1, success) | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	case err := <-req.errCh: | 
					
						
							|  |  |  | 		t.Fatal(err.Error()) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	case <-ctx.Done(): | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		t.Fatal("timeout") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	done() | 
					
						
							|  |  |  | 	fin := <-finished | 
					
						
							|  |  |  | 	require.Equal(t, req, fin) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestUpdateFreeSpace(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-05-04 00:08:33 +08:00
										 |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	defer done() | 
					
						
							|  |  |  | 	gpus := gpu.GpuInfoList{ | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Library: "a", | 
					
						
							|  |  |  | 			ID:      "1", | 
					
						
							|  |  |  | 		}, | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Library: "a", | 
					
						
							|  |  |  | 			ID:      "2", | 
					
						
							|  |  |  | 		}, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	gpus[0].TotalMemory = 1000 | 
					
						
							|  |  |  | 	gpus[0].FreeMemory = 900 | 
					
						
							|  |  |  | 	gpus[1].TotalMemory = 2000 | 
					
						
							|  |  |  | 	gpus[1].FreeMemory = 1900 | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	llm1 := &mockLlm{estimatedVRAMByGPU: map[string]uint64{"1": 50, "2": 50}} | 
					
						
							|  |  |  | 	llm2 := &mockLlm{estimatedVRAMByGPU: map[string]uint64{"1": 125, "2": 75}} | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	r1 := &runnerRef{llama: llm1, gpus: gpus, numParallel: 1} | 
					
						
							|  |  |  | 	r2 := &runnerRef{llama: llm2, gpus: gpus, numParallel: 1} | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	s.loaded["a"] = r1 | 
					
						
							|  |  |  | 	s.loaded["b"] = r2 | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	s.updateFreeSpace(gpus) | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	require.Equal(t, uint64(1000-50-125), gpus[0].FreeMemory) | 
					
						
							|  |  |  | 	require.Equal(t, uint64(2000-50-75), gpus[1].FreeMemory) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-06 03:07:20 +08:00
										 |  |  | func TestFilterGPUsWithoutLoadingModels(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							|  |  |  | 	gpus := gpu.GpuInfoList{ | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Library: "cuda", | 
					
						
							|  |  |  | 			ID:      "0", | 
					
						
							|  |  |  | 		}, | 
					
						
							|  |  |  | 		{ | 
					
						
							|  |  |  | 			Library: "cuda", | 
					
						
							|  |  |  | 			ID:      "1", | 
					
						
							|  |  |  | 		}, | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	r1 := &runnerRef{gpus: gpu.GpuInfoList{gpus[0]}, loading: true} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 	s.loadedMu.Lock() | 
					
						
							|  |  |  | 	s.loaded["a"] = r1 | 
					
						
							|  |  |  | 	s.loadedMu.Unlock() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	tmp := s.filterGPUsWithoutLoadingModels(gpus) | 
					
						
							|  |  |  | 	require.Len(t, tmp, 1) | 
					
						
							|  |  |  | 	require.Equal(t, "1", tmp[0].ID) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	r1.gpus = gpu.GpuInfoList{gpus[1]} | 
					
						
							|  |  |  | 	tmp = s.filterGPUsWithoutLoadingModels(gpus) | 
					
						
							|  |  |  | 	require.Len(t, tmp, 1) | 
					
						
							|  |  |  | 	require.Equal(t, "0", tmp[0].ID) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	r1.gpus = gpu.GpuInfoList{} | 
					
						
							|  |  |  | 	tmp = s.filterGPUsWithoutLoadingModels(gpus) | 
					
						
							|  |  |  | 	require.Len(t, tmp, 2) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestFindRunnerToUnload(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-05-04 00:08:33 +08:00
										 |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	defer done() | 
					
						
							| 
									
										
										
										
											2024-05-06 08:18:27 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	r1 := &runnerRef{refCount: 1, sessionDuration: 1, numParallel: 1} | 
					
						
							|  |  |  | 	r2 := &runnerRef{sessionDuration: 2, numParallel: 1} | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	s.loaded["a"] = r1 | 
					
						
							|  |  |  | 	s.loaded["b"] = r2 | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-06 08:18:27 +08:00
										 |  |  | 	resp := s.findRunnerToUnload() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Equal(t, r2, resp) | 
					
						
							|  |  |  | 	r2.refCount = 1 | 
					
						
							| 
									
										
										
										
											2024-05-06 08:18:27 +08:00
										 |  |  | 	resp = s.findRunnerToUnload() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.Equal(t, r1, resp) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestNeedsReload(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-05-04 00:08:33 +08:00
										 |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	defer done() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	llm := &mockLlm{estimatedVRAMByGPU: map[string]uint64{}} | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	do := api.DefaultOptions() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	runner := &runnerRef{ | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 		model: &Model{ | 
					
						
							|  |  |  | 			AdapterPaths:   []string{"adapter1"}, | 
					
						
							|  |  |  | 			ProjectorPaths: []string{"projector1"}, | 
					
						
							|  |  |  | 		}, | 
					
						
							|  |  |  | 		Options:     &do, | 
					
						
							|  |  |  | 		llama:       llm, | 
					
						
							|  |  |  | 		numParallel: 1, | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	req := &LlmRequest{ | 
					
						
							|  |  |  | 		model: &Model{ | 
					
						
							|  |  |  | 			AdapterPaths:   []string{"adapter2"}, | 
					
						
							|  |  |  | 			ProjectorPaths: []string{"projector2"}, | 
					
						
							|  |  |  | 		}, | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 		opts: api.DefaultOptions(), | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 	resp := runner.needsReload(ctx, req) | 
					
						
							|  |  |  | 	require.True(t, resp) | 
					
						
							| 
									
										
										
										
											2024-05-14 08:17:36 +08:00
										 |  |  | 	req.model.AdapterPaths = runner.model.AdapterPaths | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	resp = runner.needsReload(ctx, req) | 
					
						
							|  |  |  | 	require.True(t, resp) | 
					
						
							| 
									
										
										
										
											2024-05-14 08:17:36 +08:00
										 |  |  | 	req.model.ProjectorPaths = runner.model.ProjectorPaths | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	runner.loading = true | 
					
						
							|  |  |  | 	req.opts.NumBatch = 1234 | 
					
						
							|  |  |  | 	resp = runner.needsReload(ctx, req) | 
					
						
							|  |  |  | 	require.True(t, resp) | 
					
						
							|  |  |  | 	req.opts.NumBatch = runner.Options.NumBatch | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 	llm.pingResp = errors.New("foo") | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	resp = runner.needsReload(ctx, req) | 
					
						
							|  |  |  | 	require.True(t, resp) | 
					
						
							|  |  |  | 	llm.pingResp = nil | 
					
						
							|  |  |  | 	resp = runner.needsReload(ctx, req) | 
					
						
							|  |  |  | 	require.False(t, resp) | 
					
						
							|  |  |  | 	req.opts.NumGPU = 99 | 
					
						
							|  |  |  | 	resp = runner.needsReload(ctx, req) | 
					
						
							| 
									
										
										
										
											2024-04-26 07:02:40 +08:00
										 |  |  | 	require.True(t, resp) | 
					
						
							|  |  |  | 	req.opts.NumGPU = -1 | 
					
						
							|  |  |  | 	resp = runner.needsReload(ctx, req) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	require.False(t, resp) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestUnloadAllRunners(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-05-04 00:08:33 +08:00
										 |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	defer done() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	llm1 := &mockLlm{estimatedVRAMByGPU: map[string]uint64{}} | 
					
						
							|  |  |  | 	llm2 := &mockLlm{estimatedVRAMByGPU: map[string]uint64{}} | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 	s.unloadAllRunners() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	r1 := &runnerRef{llama: llm1, numParallel: 1} | 
					
						
							|  |  |  | 	r2 := &runnerRef{llama: llm2, numParallel: 1} | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Lock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	s.loaded["a"] = r1 | 
					
						
							|  |  |  | 	s.loaded["b"] = r2 | 
					
						
							| 
									
										
										
										
											2024-04-29 04:40:31 +08:00
										 |  |  | 	s.loadedMu.Unlock() | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	s.unloadAllRunners() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	require.True(t, llm1.closeCalled) | 
					
						
							|  |  |  | 	require.True(t, llm2.closeCalled) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func TestUnload(t *testing.T) { | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	llm1 := &mockLlm{estimatedVRAMByGPU: map[string]uint64{}} | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	r1 := &runnerRef{llama: llm1, numParallel: 1} | 
					
						
							|  |  |  | 	r2 := &runnerRef{model: &Model{AdapterPaths: []string{"A"}}, numParallel: 1} | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | 	r1.unload() | 
					
						
							|  |  |  | 	require.True(t, llm1.closeCalled) | 
					
						
							|  |  |  | 	r2.unload() | 
					
						
							| 
									
										
										
										
											2024-05-14 08:17:36 +08:00
										 |  |  | 	require.Nil(t, r2.model) | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | func TestAlreadyCanceled(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithTimeout(context.Background(), 500*time.Millisecond) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							|  |  |  | 	dctx, done2 := context.WithCancel(ctx) | 
					
						
							|  |  |  | 	done2() | 
					
						
							| 
									
										
										
										
											2024-07-06 06:30:06 +08:00
										 |  |  | 	scenario1a := newScenarioRequest(t, dctx, "ollama-model-1", 10, &api.Duration{Duration: 0}) | 
					
						
							| 
									
										
										
										
											2024-05-07 08:47:52 +08:00
										 |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 	slog.Info("scenario1a") | 
					
						
							|  |  |  | 	s.pendingReqCh <- scenario1a.req | 
					
						
							|  |  |  | 	require.Len(t, s.pendingReqCh, 1) | 
					
						
							|  |  |  | 	s.Run(ctx) | 
					
						
							|  |  |  | 	time.Sleep(5 * time.Millisecond) | 
					
						
							|  |  |  | 	require.Empty(t, s.pendingReqCh) | 
					
						
							|  |  |  | 	require.Empty(t, scenario1a.req.errCh) | 
					
						
							|  |  |  | 	require.Empty(t, scenario1a.req.successCh) | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-23 02:57:26 +08:00
										 |  |  | func TestHomogeneousGPUs(t *testing.T) { | 
					
						
							|  |  |  | 	ctx, done := context.WithTimeout(context.Background(), 100*time.Millisecond) | 
					
						
							|  |  |  | 	defer done() | 
					
						
							|  |  |  | 	s := InitScheduler(ctx) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	s.getGpuFn = func() gpu.GpuInfoList { | 
					
						
							|  |  |  | 		// Set memory values to require the model to be spread
 | 
					
						
							|  |  |  | 		gpus := []gpu.GpuInfo{ | 
					
						
							|  |  |  | 			{Library: "cuda"}, | 
					
						
							|  |  |  | 			{Library: "rocm"}, | 
					
						
							|  |  |  | 		} | 
					
						
							|  |  |  | 		gpus[0].TotalMemory = 1 * format.GibiByte | 
					
						
							|  |  |  | 		gpus[0].FreeMemory = 256 * format.MebiByte | 
					
						
							|  |  |  | 		gpus[1].TotalMemory = 1 * format.GibiByte | 
					
						
							|  |  |  | 		gpus[1].FreeMemory = 256 * format.MebiByte | 
					
						
							|  |  |  | 		return gpus | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	s.getCpuFn = getCpuFn | 
					
						
							|  |  |  | 	a := newScenarioRequest(t, ctx, "ollama-model-1", 10, &api.Duration{Duration: 5 * time.Millisecond}) | 
					
						
							|  |  |  | 	s.newServerFn = func(gpus gpu.GpuInfoList, model string, ggml *llm.GGML, adapters []string, projectors []string, opts api.Options, numParallel int) (llm.LlamaServer, error) { | 
					
						
							|  |  |  | 		require.Len(t, gpus, 1) | 
					
						
							|  |  |  | 		return a.newServer(gpus, model, ggml, adapters, projectors, opts, numParallel) | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 	slog.Info("a") | 
					
						
							|  |  |  | 	s.pendingReqCh <- a.req | 
					
						
							|  |  |  | 	require.Len(t, s.pendingReqCh, 1) | 
					
						
							|  |  |  | 	s.Run(ctx) | 
					
						
							|  |  |  | 	select { | 
					
						
							|  |  |  | 	case resp := <-a.req.successCh: | 
					
						
							|  |  |  | 		require.Equal(t, resp.llama, a.srv) | 
					
						
							|  |  |  | 		require.Empty(t, s.pendingReqCh) | 
					
						
							|  |  |  | 		require.Empty(t, a.req.errCh) | 
					
						
							|  |  |  | 	case err := <-a.req.errCh: | 
					
						
							|  |  |  | 		t.Fatal(err.Error()) | 
					
						
							|  |  |  | 	case <-ctx.Done(): | 
					
						
							|  |  |  | 		t.Fatal("timeout") | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | type mockLlm struct { | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	pingResp           error | 
					
						
							|  |  |  | 	waitResp           error | 
					
						
							|  |  |  | 	completionResp     error | 
					
						
							| 
									
										
										
										
											2024-07-31 04:12:21 +08:00
										 |  |  | 	embedResp          *llm.EmbedResponse | 
					
						
							| 
									
										
										
										
											2024-07-16 03:14:24 +08:00
										 |  |  | 	embedRespErr       error | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | 	tokenizeResp       []int | 
					
						
							|  |  |  | 	tokenizeRespErr    error | 
					
						
							|  |  |  | 	detokenizeResp     string | 
					
						
							|  |  |  | 	detonekizeRespErr  error | 
					
						
							|  |  |  | 	closeResp          error | 
					
						
							|  |  |  | 	closeCalled        bool | 
					
						
							|  |  |  | 	estimatedVRAM      uint64 | 
					
						
							|  |  |  | 	estimatedTotal     uint64 | 
					
						
							|  |  |  | 	estimatedVRAMByGPU map[string]uint64 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (s *mockLlm) Ping(ctx context.Context) error             { return s.pingResp } | 
					
						
							|  |  |  | func (s *mockLlm) WaitUntilRunning(ctx context.Context) error { return s.waitResp } | 
					
						
							|  |  |  | func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error { | 
					
						
							|  |  |  | 	return s.completionResp | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-31 04:12:21 +08:00
										 |  |  | func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) { | 
					
						
							| 
									
										
										
										
											2024-07-16 03:14:24 +08:00
										 |  |  | 	return s.embedResp, s.embedRespErr | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) { | 
					
						
							|  |  |  | 	return s.tokenizeResp, s.tokenizeRespErr | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | func (s *mockLlm) Detokenize(ctx context.Context, tokens []int) (string, error) { | 
					
						
							|  |  |  | 	return s.detokenizeResp, s.detonekizeRespErr | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-08-02 05:52:15 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-31 00:50:05 +08:00
										 |  |  | func (s *mockLlm) Close() error { | 
					
						
							|  |  |  | 	s.closeCalled = true | 
					
						
							|  |  |  | 	return s.closeResp | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2024-06-04 10:09:23 +08:00
										 |  |  | func (s *mockLlm) EstimatedVRAM() uint64                  { return s.estimatedVRAM } | 
					
						
							|  |  |  | func (s *mockLlm) EstimatedTotal() uint64                 { return s.estimatedTotal } | 
					
						
							| 
									
										
										
										
											2024-06-06 03:07:20 +08:00
										 |  |  | func (s *mockLlm) EstimatedVRAMByGPU(gpuid string) uint64 { return s.estimatedVRAMByGPU[gpuid] } |