WIP updated routes

2024-12-16 15:49:28 -08:00 · 2024-12-16 15:49:28 -08:00 · e679885733
commit e679885733
parent f0a5f7994b
3 changed files with 71 additions and 24 deletions
--- a/api/types.go
+++ b/api/types.go
@ -297,10 +297,6 @@ type EmbeddingResponse struct {
 type TokenizeRequest struct {
 	Model string `json:"model"`
 	Text  string `json:"text"`
 	// KeepAlive controls how long the model will stay loaded in memory following
 	// this request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 }
 // TokenizeResponse is the response from [Client.Tokenize].
@ -312,10 +308,6 @@ type TokenizeResponse struct {
 type DetokenizeRequest struct {
 	Model  string `json:"model"`
 	Tokens []int  `json:"tokens"`
 	// KeepAlive controls how long the model will stay loaded in memory following
 	// this request.
 	KeepAlive *Duration `json:"keep_alive,omitempty"`
 }
 // DetokenizeResponse is the response from [Client.Detokenize].
--- a/docs/api.md
+++ b/docs/api.md
@ -1506,8 +1506,8 @@ POST /api/tokenize
 ```shell
 curl -X POST http://localhost:11434/api/tokenize -d '{
-  "model": "llama3.1:8b",
+  "model": "llama3.2",
-  "text": "Why the sky is blue?"
+  "text": "Why is the sky blue?"
 }'
 ```
@ -1538,15 +1538,15 @@ POST /api/detokenize
 ```shell
 curl -X POST http://localhost:11434/api/detokenize -d '{
-  "model": "llama3.1:8b",
+  "model": "llama3.2",
-  "tokens": [10445,279,13180,374,6437,30]
+  "tokens": [10445,374,279,13180,6437,30]
 }'
 ```
 #### Response
 ```json
-{"text":"Why the sky is blue?"}
+{"text":"Why is the sky blue?"}
 ```
--- a/server/routes.go
+++ b/server/routes.go
@ -30,6 +30,7 @@ import (
 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/discover"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/llama"
 	"github.com/ollama/ollama/llm"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/parser"
@ -569,15 +570,43 @@ func (s *Server) TokenizeHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}
-	runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
+	if req.Model == "" {
-	if err != nil {
+		http.Error(w, "missing `model` for tokenization", http.StatusBadRequest)
 		http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
 		return
 	}
-	tokens, err := runner.Tokenize(r.Context(), req.Text)
+	name := model.ParseName(req.Model)
 	if !name.IsValid() {
 		http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
 		return
 	}
 	name, err := getExistingName(name)
 	if err != nil {
-		http.Error(w, err.Error(), http.StatusInternalServerError)
+		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
 		return
 	}
 	// Get local model path
 	modelPath, err := GetModel(name.String())
 	if err != nil {
 		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
 		return
 	}
 	model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
 		VocabOnly: true,
 		UseMmap:   true,
 	})
 	if err != nil {
 		http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
 		return
 	}
 	defer llama.FreeModel(model)
 	// Tokenize the text
 	tokens, err := model.Tokenize(req.Text, false, true)
 	if err != nil {
 		http.Error(w, fmt.Sprintf("failed to tokenize text: %v", err), http.StatusInternalServerError)
 		return
 	}
@ -611,17 +640,43 @@ func (s *Server) DetokenizeHandler(w http.ResponseWriter, r *http.Request) {
 		return
 	}
-	runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
+	if req.Model == "" {
-	if err != nil {
+		http.Error(w, "missing `model` for detokenization", http.StatusBadRequest)
 		http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
 		return
 	}
-	text, err := runner.Detokenize(r.Context(), req.Tokens)
+	name := model.ParseName(req.Model)
-	if err != nil {
+	if !name.IsValid() {
-		http.Error(w, err.Error(), http.StatusInternalServerError)
+		http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
 		return
 	}
 	name, err := getExistingName(name)
 	if err != nil {
 		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
 		return
 	}
 	// Get local model path
 	modelPath, err := GetModel(name.String())
 	if err != nil {
 		http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
 		return
 	}
 	model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
 		VocabOnly: true,
 		UseMmap:   true,
 	})
 	if err != nil {
 		http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
 		return
 	}
 	defer llama.FreeModel(model)
 	var text string
 	for _, token := range req.Tokens {
 		text += model.TokenToPiece(token)
 	}
 	w.Header().Set("Content-Type", "application/json")
 	if err := json.NewEncoder(w).Encode(api.DetokenizeResponse{