diff --git a/api/types.go b/api/types.go index 2fe46f0ce..3281eea2d 100644 --- a/api/types.go +++ b/api/types.go @@ -297,10 +297,6 @@ type EmbeddingResponse struct { type TokenizeRequest struct { Model string `json:"model"` Text string `json:"text"` - - // KeepAlive controls how long the model will stay loaded in memory following - // this request. - KeepAlive *Duration `json:"keep_alive,omitempty"` } // TokenizeResponse is the response from [Client.Tokenize]. @@ -312,10 +308,6 @@ type TokenizeResponse struct { type DetokenizeRequest struct { Model string `json:"model"` Tokens []int `json:"tokens"` - - // KeepAlive controls how long the model will stay loaded in memory following - // this request. - KeepAlive *Duration `json:"keep_alive,omitempty"` } // DetokenizeResponse is the response from [Client.Detokenize]. diff --git a/docs/api.md b/docs/api.md index 59af2a284..aa16ab2fe 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1506,8 +1506,8 @@ POST /api/tokenize ```shell curl -X POST http://localhost:11434/api/tokenize -d '{ - "model": "llama3.1:8b", - "text": "Why the sky is blue?" + "model": "llama3.2", + "text": "Why is the sky blue?" }' ``` @@ -1538,15 +1538,15 @@ POST /api/detokenize ```shell curl -X POST http://localhost:11434/api/detokenize -d '{ - "model": "llama3.1:8b", - "tokens": [10445,279,13180,374,6437,30] + "model": "llama3.2", + "tokens": [10445,374,279,13180,6437,30] }' ``` #### Response ```json -{"text":"Why the sky is blue?"} +{"text":"Why is the sky blue?"} ``` diff --git a/server/routes.go b/server/routes.go index 949368d07..6788b6d9a 100644 --- a/server/routes.go +++ b/server/routes.go @@ -30,6 +30,7 @@ import ( "github.com/ollama/ollama/api" "github.com/ollama/ollama/discover" "github.com/ollama/ollama/envconfig" + "github.com/ollama/ollama/llama" "github.com/ollama/ollama/llm" "github.com/ollama/ollama/openai" "github.com/ollama/ollama/parser" @@ -569,15 +570,43 @@ func (s *Server) TokenizeHandler(w http.ResponseWriter, r *http.Request) { return } - runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive) - if err != nil { - http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound) + if req.Model == "" { + http.Error(w, "missing `model` for tokenization", http.StatusBadRequest) return } - tokens, err := runner.Tokenize(r.Context(), req.Text) + name := model.ParseName(req.Model) + if !name.IsValid() { + http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest) + return + } + name, err := getExistingName(name) if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound) + return + } + + // Get local model path + modelPath, err := GetModel(name.String()) + if err != nil { + http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound) + return + } + + model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{ + VocabOnly: true, + UseMmap: true, + }) + if err != nil { + http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError) + return + } + defer llama.FreeModel(model) + + // Tokenize the text + tokens, err := model.Tokenize(req.Text, false, true) + if err != nil { + http.Error(w, fmt.Sprintf("failed to tokenize text: %v", err), http.StatusInternalServerError) return } @@ -611,17 +640,43 @@ func (s *Server) DetokenizeHandler(w http.ResponseWriter, r *http.Request) { return } - runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive) - if err != nil { - http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound) + if req.Model == "" { + http.Error(w, "missing `model` for detokenization", http.StatusBadRequest) return } - text, err := runner.Detokenize(r.Context(), req.Tokens) - if err != nil { - http.Error(w, err.Error(), http.StatusInternalServerError) + name := model.ParseName(req.Model) + if !name.IsValid() { + http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest) return } + name, err := getExistingName(name) + if err != nil { + http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound) + return + } + + // Get local model path + modelPath, err := GetModel(name.String()) + if err != nil { + http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound) + return + } + + model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{ + VocabOnly: true, + UseMmap: true, + }) + if err != nil { + http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError) + return + } + defer llama.FreeModel(model) + + var text string + for _, token := range req.Tokens { + text += model.TokenToPiece(token) + } w.Header().Set("Content-Type", "application/json") if err := json.NewEncoder(w).Encode(api.DetokenizeResponse{