fix(scheduler): make model unload order deterministic (#10185)

This commit is contained in:
Ire Gaddr 2025-04-09 18:01:02 -05:00 committed by GitHub
parent 5c0331fd83
commit 42ecb9f138
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -667,13 +667,19 @@ func (runner *runnerRef) waitForVRAMRecovery() chan any {
return finished
}
type ByDuration []*runnerRef
type ByDurationAndName []*runnerRef
func (a ByDuration) Len() int { return len(a) }
func (a ByDuration) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByDuration) Less(i, j int) bool {
// uint64 to turn negative time (never unload) to largest
return uint64(a[i].sessionDuration) < uint64(a[j].sessionDuration)
func (a ByDurationAndName) Len() int { return len(a) }
func (a ByDurationAndName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a ByDurationAndName) Less(i, j int) bool {
// Primary sort by session duration (uint64 to handle negatives)
d1 := uint64(a[i].sessionDuration)
d2 := uint64(a[j].sessionDuration)
if d1 != d2 {
return d1 < d2
}
// Secondary sort by model path lex order
return a[i].modelPath < a[j].modelPath
}
// TODO - future consideration to pick runners based on size
@ -775,7 +781,7 @@ func (s *Scheduler) findRunnerToUnload() *runnerRef {
// In the future we can enhance the algorithm to be smarter about picking the optimal runner to unload
// e.g., if we have multiple options, will one make room for the request?
sort.Sort(ByDuration(runnerList))
sort.Sort(ByDurationAndName(runnerList))
// First try to find a runner that's already idle
for _, runner := range runnerList {