mirror of https://github.com/ollama/ollama.git
sched: fix runner leak during reloading unload (#10819)
When the same model is being reloaded rapidly with client connections being canceled before the model finishes loading, the queued unload event could cause a leak of runners by deleting a different runner from the loaded list.
This commit is contained in:
parent
adff143bcd
commit
d950ff12c0
|
@ -387,6 +387,17 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
||||||
s.loadedMu.Unlock()
|
s.loadedMu.Unlock()
|
||||||
runner.refMu.Unlock()
|
runner.refMu.Unlock()
|
||||||
slog.Debug("duplicate expired event, ignoring", "runner", runner)
|
slog.Debug("duplicate expired event, ignoring", "runner", runner)
|
||||||
|
} else if runner.pid != runnerToUnload.pid {
|
||||||
|
// If the pids do not match, we likely had multiple load
|
||||||
|
// failures for the same model in quick succession due to
|
||||||
|
// request context canceled and are draining the queue of
|
||||||
|
// events. Ensure the orphaned runner is properly shut down, but
|
||||||
|
// do not delete the mismatched loaded runner, or wait for VRAM
|
||||||
|
// convergence.
|
||||||
|
slog.Debug("orphaned runner shutting down", "orphan", runner, "loaded", runnerToUnload)
|
||||||
|
runner.unload()
|
||||||
|
s.loadedMu.Unlock()
|
||||||
|
runner.refMu.Unlock()
|
||||||
} else {
|
} else {
|
||||||
slog.Debug("starting background wait for VRAM recovery", "runner", runner)
|
slog.Debug("starting background wait for VRAM recovery", "runner", runner)
|
||||||
finished := runner.waitForVRAMRecovery()
|
finished := runner.waitForVRAMRecovery()
|
||||||
|
|
Loading…
Reference in New Issue