From 42ecb9f13896c5329764e1946ec3ab1aad2de0a1 Mon Sep 17 00:00:00 2001 From: Ire Gaddr <130914610+IreGaddr@users.noreply.github.com> Date: Wed, 9 Apr 2025 18:01:02 -0500 Subject: [PATCH] fix(scheduler): make model unload order deterministic (#10185) --- server/sched.go | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/server/sched.go b/server/sched.go index 8082680be..f3978796c 100644 --- a/server/sched.go +++ b/server/sched.go @@ -667,13 +667,19 @@ func (runner *runnerRef) waitForVRAMRecovery() chan any { return finished } -type ByDuration []*runnerRef +type ByDurationAndName []*runnerRef -func (a ByDuration) Len() int { return len(a) } -func (a ByDuration) Swap(i, j int) { a[i], a[j] = a[j], a[i] } -func (a ByDuration) Less(i, j int) bool { - // uint64 to turn negative time (never unload) to largest - return uint64(a[i].sessionDuration) < uint64(a[j].sessionDuration) +func (a ByDurationAndName) Len() int { return len(a) } +func (a ByDurationAndName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } +func (a ByDurationAndName) Less(i, j int) bool { + // Primary sort by session duration (uint64 to handle negatives) + d1 := uint64(a[i].sessionDuration) + d2 := uint64(a[j].sessionDuration) + if d1 != d2 { + return d1 < d2 + } + // Secondary sort by model path lex order + return a[i].modelPath < a[j].modelPath } // TODO - future consideration to pick runners based on size @@ -775,7 +781,7 @@ func (s *Scheduler) findRunnerToUnload() *runnerRef { // In the future we can enhance the algorithm to be smarter about picking the optimal runner to unload // e.g., if we have multiple options, will one make room for the request? - sort.Sort(ByDuration(runnerList)) + sort.Sort(ByDurationAndName(runnerList)) // First try to find a runner that's already idle for _, runner := range runnerList {