runner: default to greedy sampler for performance (#9407)
As are adding support for weighted sampling we have seen some performance regressions, bypassing the sampler logic for now and defaulting to greedy until we can benchmark the new sampler logic.
This commit is contained in:
parent
c245b0406f
commit
0c1041ad85
@ -575,23 +575,11 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
sampler, err := sample.NewSampler(
|
||||
req.Temperature,
|
||||
req.TopK,
|
||||
req.TopP,
|
||||
req.MinP,
|
||||
req.Seed,
|
||||
)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to create sampler: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
seq, err := s.NewSequence(req.Prompt, req.Images, NewSequenceParams{
|
||||
numPredict: req.NumPredict,
|
||||
stop: req.Stop,
|
||||
numKeep: int32(req.NumKeep),
|
||||
sampler: sampler,
|
||||
sampler: sample.Greedy(), // TODO: add support for different samplers when performance is optimized
|
||||
embedding: false,
|
||||
})
|
||||
if err != nil {
|
||||
|
Loading…
x
Reference in New Issue
Block a user