mirror of https://github.com/ollama/ollama.git
avoid context overflow (#11175)
For smaller context models, make sure we do not exceed the training size.
This commit is contained in:
parent
1c6669e64c
commit
10a8e04a8d
|
@ -139,6 +139,13 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
|
||||||
gpus = discover.GetCPUInfo()
|
gpus = discover.GetCPUInfo()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify the requested context size is <= the model training size
|
||||||
|
trainCtx := f.KV().ContextLength()
|
||||||
|
if opts.NumCtx/numParallel > int(trainCtx) && trainCtx > 0 {
|
||||||
|
slog.Warn("requested context size too large for model", "num_ctx", opts.NumCtx, "num_parallel", numParallel, "n_ctx_train", trainCtx)
|
||||||
|
opts.NumCtx = int(trainCtx) * numParallel
|
||||||
|
}
|
||||||
|
|
||||||
estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel)
|
estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel)
|
||||||
if len(gpus) > 1 || gpus[0].Library != "cpu" {
|
if len(gpus) > 1 || gpus[0].Library != "cpu" {
|
||||||
switch {
|
switch {
|
||||||
|
|
Loading…
Reference in New Issue