diff --git a/runner/llamarunner/runner.go b/runner/llamarunner/runner.go index 83802d604..ee5d47f6e 100644 --- a/runner/llamarunner/runner.go +++ b/runner/llamarunner/runner.go @@ -599,7 +599,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { if errors.Is(err, context.Canceled) { slog.Info("aborting completion request due to client closing the connection") } else { - slog.Error("Failed to acquire semaphore", "error", err) + http.Error(w, fmt.Sprintf("Failed to acquire semaphore: %v", err), http.StatusInternalServerError) } return } @@ -611,6 +611,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, true) if err != nil { s.mu.Unlock() + s.seqsSem.Release(1) http.Error(w, fmt.Sprintf("Failed to load cache: %v", err), http.StatusInternalServerError) return } @@ -626,6 +627,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { s.mu.Unlock() if !found { + s.seqsSem.Release(1) http.Error(w, "could not find an available sequence", http.StatusInternalServerError) return } @@ -691,7 +693,7 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) { if errors.Is(err, context.Canceled) { slog.Info("aborting embeddings request due to client closing the connection") } else { - slog.Error("Failed to acquire semaphore", "error", err) + http.Error(w, fmt.Sprintf("Failed to acquire semaphore: %v", err), http.StatusInternalServerError) } return } @@ -703,6 +705,7 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) { seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs, false) if err != nil { s.mu.Unlock() + s.seqsSem.Release(1) http.Error(w, fmt.Sprintf("Failed to load cache: %v", err), http.StatusInternalServerError) return } @@ -715,6 +718,7 @@ func (s *Server) embeddings(w http.ResponseWriter, r *http.Request) { s.mu.Unlock() if !found { + s.seqsSem.Release(1) http.Error(w, "could not find an available sequence", http.StatusInternalServerError) return } diff --git a/runner/ollamarunner/runner.go b/runner/ollamarunner/runner.go index 6d20fa85b..bc7a07ed6 100644 --- a/runner/ollamarunner/runner.go +++ b/runner/ollamarunner/runner.go @@ -609,7 +609,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { if errors.Is(err, context.Canceled) { slog.Info("aborting completion request due to client closing the connection") } else { - slog.Error("Failed to acquire semaphore", "error", err) + http.Error(w, fmt.Sprintf("Failed to acquire semaphore: %v", err), http.StatusInternalServerError) } return } @@ -621,6 +621,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { seq.cache, seq.inputs, err = s.cache.LoadCacheSlot(seq.inputs) if err != nil { s.mu.Unlock() + s.seqsSem.Release(1) http.Error(w, fmt.Sprintf("Failed to load cache: %v", err), http.StatusInternalServerError) return } @@ -634,6 +635,7 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) { s.mu.Unlock() if !found { + s.seqsSem.Release(1) http.Error(w, "could not find an available sequence", http.StatusInternalServerError) return }