diff --git a/server/sched.go b/server/sched.go index b4600dbf7..091833c7c 100644 --- a/server/sched.go +++ b/server/sched.go @@ -194,11 +194,6 @@ func (s *Scheduler) processPending(ctx context.Context) { break } - // Embedding models should always be loaded with parallel=1 - if pending.model.CheckCapabilities(CapabilityCompletion) != nil { - numParallel = 1 - } - // Evaluate if the model will fit in the available system memory, or if we should unload a model first if len(gpus) == 1 && gpus[0].Library == "cpu" { // simplifying assumption of defaultParallel when in CPU mode