sched: fix runner leak during reloading unload (#10819)

When the same model is being reloaded rapidly with client connections being canceled before the model finishes loading, the queued unload event could cause a leak of runners by deleting a different runner from the loaded list.
2025-05-22 14:31:36 -07:00 · 2025-05-22 14:31:36 -07:00 · d950ff12c0
parent adff143bcd
commit d950ff12c0
1 changed files with 11 additions and 0 deletions
--- a/server/sched.go
+++ b/server/sched.go
@ -387,6 +387,17 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
 				s.loadedMu.Unlock()
 				runner.refMu.Unlock()
 				slog.Debug("duplicate expired event, ignoring", "runner", runner)
 			} else if runner.pid != runnerToUnload.pid {
 				// If the pids do not match, we likely had multiple load
 				// failures for the same model in quick succession due to
 				// request context canceled and are draining the queue of
 				// events. Ensure the orphaned runner is properly shut down, but
 				// do not delete the mismatched loaded runner, or wait for VRAM
 				// convergence.
 				slog.Debug("orphaned runner shutting down", "orphan", runner, "loaded", runnerToUnload)
 				runner.unload()
 				s.loadedMu.Unlock()
 				runner.refMu.Unlock()
 			} else {
 				slog.Debug("starting background wait for VRAM recovery", "runner", runner)
 				finished := runner.waitForVRAMRecovery()