fix(scheduler): make model unload order deterministic (#10185)
This commit is contained in:
parent
5c0331fd83
commit
42ecb9f138
@ -667,13 +667,19 @@ func (runner *runnerRef) waitForVRAMRecovery() chan any {
|
|||||||
return finished
|
return finished
|
||||||
}
|
}
|
||||||
|
|
||||||
type ByDuration []*runnerRef
|
type ByDurationAndName []*runnerRef
|
||||||
|
|
||||||
func (a ByDuration) Len() int { return len(a) }
|
func (a ByDurationAndName) Len() int { return len(a) }
|
||||||
func (a ByDuration) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
func (a ByDurationAndName) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
||||||
func (a ByDuration) Less(i, j int) bool {
|
func (a ByDurationAndName) Less(i, j int) bool {
|
||||||
// uint64 to turn negative time (never unload) to largest
|
// Primary sort by session duration (uint64 to handle negatives)
|
||||||
return uint64(a[i].sessionDuration) < uint64(a[j].sessionDuration)
|
d1 := uint64(a[i].sessionDuration)
|
||||||
|
d2 := uint64(a[j].sessionDuration)
|
||||||
|
if d1 != d2 {
|
||||||
|
return d1 < d2
|
||||||
|
}
|
||||||
|
// Secondary sort by model path lex order
|
||||||
|
return a[i].modelPath < a[j].modelPath
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO - future consideration to pick runners based on size
|
// TODO - future consideration to pick runners based on size
|
||||||
@ -775,7 +781,7 @@ func (s *Scheduler) findRunnerToUnload() *runnerRef {
|
|||||||
|
|
||||||
// In the future we can enhance the algorithm to be smarter about picking the optimal runner to unload
|
// In the future we can enhance the algorithm to be smarter about picking the optimal runner to unload
|
||||||
// e.g., if we have multiple options, will one make room for the request?
|
// e.g., if we have multiple options, will one make room for the request?
|
||||||
sort.Sort(ByDuration(runnerList))
|
sort.Sort(ByDurationAndName(runnerList))
|
||||||
|
|
||||||
// First try to find a runner that's already idle
|
// First try to find a runner that's already idle
|
||||||
for _, runner := range runnerList {
|
for _, runner := range runnerList {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user