WIP updated routes

This commit is contained in:
ParthSareen 2024-12-16 15:49:28 -08:00
parent f0a5f7994b
commit e679885733
3 changed files with 71 additions and 24 deletions

View File

@ -297,10 +297,6 @@ type EmbeddingResponse struct {
type TokenizeRequest struct {
Model string `json:"model"`
Text string `json:"text"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
}
// TokenizeResponse is the response from [Client.Tokenize].
@ -312,10 +308,6 @@ type TokenizeResponse struct {
type DetokenizeRequest struct {
Model string `json:"model"`
Tokens []int `json:"tokens"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
}
// DetokenizeResponse is the response from [Client.Detokenize].

View File

@ -1506,8 +1506,8 @@ POST /api/tokenize
```shell
curl -X POST http://localhost:11434/api/tokenize -d '{
"model": "llama3.1:8b",
"text": "Why the sky is blue?"
"model": "llama3.2",
"text": "Why is the sky blue?"
}'
```
@ -1538,15 +1538,15 @@ POST /api/detokenize
```shell
curl -X POST http://localhost:11434/api/detokenize -d '{
"model": "llama3.1:8b",
"tokens": [10445,279,13180,374,6437,30]
"model": "llama3.2",
"tokens": [10445,374,279,13180,6437,30]
}'
```
#### Response
```json
{"text":"Why the sky is blue?"}
{"text":"Why is the sky blue?"}
```

View File

@ -30,6 +30,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/discover"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/llama"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/openai"
"github.com/ollama/ollama/parser"
@ -569,15 +570,43 @@ func (s *Server) TokenizeHandler(w http.ResponseWriter, r *http.Request) {
return
}
runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
if err != nil {
http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
if req.Model == "" {
http.Error(w, "missing `model` for tokenization", http.StatusBadRequest)
return
}
tokens, err := runner.Tokenize(r.Context(), req.Text)
name := model.ParseName(req.Model)
if !name.IsValid() {
http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
return
}
name, err := getExistingName(name)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
return
}
// Get local model path
modelPath, err := GetModel(name.String())
if err != nil {
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
return
}
model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
VocabOnly: true,
UseMmap: true,
})
if err != nil {
http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
return
}
defer llama.FreeModel(model)
// Tokenize the text
tokens, err := model.Tokenize(req.Text, false, true)
if err != nil {
http.Error(w, fmt.Sprintf("failed to tokenize text: %v", err), http.StatusInternalServerError)
return
}
@ -611,17 +640,43 @@ func (s *Server) DetokenizeHandler(w http.ResponseWriter, r *http.Request) {
return
}
runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
if err != nil {
http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
if req.Model == "" {
http.Error(w, "missing `model` for detokenization", http.StatusBadRequest)
return
}
text, err := runner.Detokenize(r.Context(), req.Tokens)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
name := model.ParseName(req.Model)
if !name.IsValid() {
http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
return
}
name, err := getExistingName(name)
if err != nil {
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
return
}
// Get local model path
modelPath, err := GetModel(name.String())
if err != nil {
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
return
}
model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
VocabOnly: true,
UseMmap: true,
})
if err != nil {
http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
return
}
defer llama.FreeModel(model)
var text string
for _, token := range req.Tokens {
text += model.TokenToPiece(token)
}
w.Header().Set("Content-Type", "application/json")
if err := json.NewEncoder(w).Encode(api.DetokenizeResponse{