WIP updated routes
This commit is contained in:
parent
f0a5f7994b
commit
e679885733
@ -297,10 +297,6 @@ type EmbeddingResponse struct {
|
||||
type TokenizeRequest struct {
|
||||
Model string `json:"model"`
|
||||
Text string `json:"text"`
|
||||
|
||||
// KeepAlive controls how long the model will stay loaded in memory following
|
||||
// this request.
|
||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
||||
}
|
||||
|
||||
// TokenizeResponse is the response from [Client.Tokenize].
|
||||
@ -312,10 +308,6 @@ type TokenizeResponse struct {
|
||||
type DetokenizeRequest struct {
|
||||
Model string `json:"model"`
|
||||
Tokens []int `json:"tokens"`
|
||||
|
||||
// KeepAlive controls how long the model will stay loaded in memory following
|
||||
// this request.
|
||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
||||
}
|
||||
|
||||
// DetokenizeResponse is the response from [Client.Detokenize].
|
||||
|
10
docs/api.md
10
docs/api.md
@ -1506,8 +1506,8 @@ POST /api/tokenize
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/tokenize -d '{
|
||||
"model": "llama3.1:8b",
|
||||
"text": "Why the sky is blue?"
|
||||
"model": "llama3.2",
|
||||
"text": "Why is the sky blue?"
|
||||
}'
|
||||
```
|
||||
|
||||
@ -1538,15 +1538,15 @@ POST /api/detokenize
|
||||
|
||||
```shell
|
||||
curl -X POST http://localhost:11434/api/detokenize -d '{
|
||||
"model": "llama3.1:8b",
|
||||
"tokens": [10445,279,13180,374,6437,30]
|
||||
"model": "llama3.2",
|
||||
"tokens": [10445,374,279,13180,6437,30]
|
||||
}'
|
||||
```
|
||||
|
||||
#### Response
|
||||
|
||||
```json
|
||||
{"text":"Why the sky is blue?"}
|
||||
{"text":"Why is the sky blue?"}
|
||||
```
|
||||
|
||||
|
||||
|
@ -30,6 +30,7 @@ import (
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/discover"
|
||||
"github.com/ollama/ollama/envconfig"
|
||||
"github.com/ollama/ollama/llama"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/openai"
|
||||
"github.com/ollama/ollama/parser"
|
||||
@ -569,15 +570,43 @@ func (s *Server) TokenizeHandler(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
|
||||
if req.Model == "" {
|
||||
http.Error(w, "missing `model` for tokenization", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
tokens, err := runner.Tokenize(r.Context(), req.Text)
|
||||
name := model.ParseName(req.Model)
|
||||
if !name.IsValid() {
|
||||
http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
name, err := getExistingName(name)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
// Get local model path
|
||||
modelPath, err := GetModel(name.String())
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
|
||||
VocabOnly: true,
|
||||
UseMmap: true,
|
||||
})
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer llama.FreeModel(model)
|
||||
|
||||
// Tokenize the text
|
||||
tokens, err := model.Tokenize(req.Text, false, true)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("failed to tokenize text: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
@ -611,17 +640,43 @@ func (s *Server) DetokenizeHandler(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
}
|
||||
|
||||
runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
|
||||
if req.Model == "" {
|
||||
http.Error(w, "missing `model` for detokenization", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
text, err := runner.Detokenize(r.Context(), req.Tokens)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
name := model.ParseName(req.Model)
|
||||
if !name.IsValid() {
|
||||
http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
name, err := getExistingName(name)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
// Get local model path
|
||||
modelPath, err := GetModel(name.String())
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
|
||||
VocabOnly: true,
|
||||
UseMmap: true,
|
||||
})
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
defer llama.FreeModel(model)
|
||||
|
||||
var text string
|
||||
for _, token := range req.Tokens {
|
||||
text += model.TokenToPiece(token)
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
if err := json.NewEncoder(w).Encode(api.DetokenizeResponse{
|
||||
|
Loading…
x
Reference in New Issue
Block a user