WIP updated routes
This commit is contained in:
parent
f0a5f7994b
commit
e679885733
@ -297,10 +297,6 @@ type EmbeddingResponse struct {
|
|||||||
type TokenizeRequest struct {
|
type TokenizeRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Text string `json:"text"`
|
Text string `json:"text"`
|
||||||
|
|
||||||
// KeepAlive controls how long the model will stay loaded in memory following
|
|
||||||
// this request.
|
|
||||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TokenizeResponse is the response from [Client.Tokenize].
|
// TokenizeResponse is the response from [Client.Tokenize].
|
||||||
@ -312,10 +308,6 @@ type TokenizeResponse struct {
|
|||||||
type DetokenizeRequest struct {
|
type DetokenizeRequest struct {
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
Tokens []int `json:"tokens"`
|
Tokens []int `json:"tokens"`
|
||||||
|
|
||||||
// KeepAlive controls how long the model will stay loaded in memory following
|
|
||||||
// this request.
|
|
||||||
KeepAlive *Duration `json:"keep_alive,omitempty"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// DetokenizeResponse is the response from [Client.Detokenize].
|
// DetokenizeResponse is the response from [Client.Detokenize].
|
||||||
|
10
docs/api.md
10
docs/api.md
@ -1506,8 +1506,8 @@ POST /api/tokenize
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -X POST http://localhost:11434/api/tokenize -d '{
|
curl -X POST http://localhost:11434/api/tokenize -d '{
|
||||||
"model": "llama3.1:8b",
|
"model": "llama3.2",
|
||||||
"text": "Why the sky is blue?"
|
"text": "Why is the sky blue?"
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -1538,15 +1538,15 @@ POST /api/detokenize
|
|||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl -X POST http://localhost:11434/api/detokenize -d '{
|
curl -X POST http://localhost:11434/api/detokenize -d '{
|
||||||
"model": "llama3.1:8b",
|
"model": "llama3.2",
|
||||||
"tokens": [10445,279,13180,374,6437,30]
|
"tokens": [10445,374,279,13180,6437,30]
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Response
|
#### Response
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{"text":"Why the sky is blue?"}
|
{"text":"Why is the sky blue?"}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,6 +30,7 @@ import (
|
|||||||
"github.com/ollama/ollama/api"
|
"github.com/ollama/ollama/api"
|
||||||
"github.com/ollama/ollama/discover"
|
"github.com/ollama/ollama/discover"
|
||||||
"github.com/ollama/ollama/envconfig"
|
"github.com/ollama/ollama/envconfig"
|
||||||
|
"github.com/ollama/ollama/llama"
|
||||||
"github.com/ollama/ollama/llm"
|
"github.com/ollama/ollama/llm"
|
||||||
"github.com/ollama/ollama/openai"
|
"github.com/ollama/ollama/openai"
|
||||||
"github.com/ollama/ollama/parser"
|
"github.com/ollama/ollama/parser"
|
||||||
@ -569,15 +570,43 @@ func (s *Server) TokenizeHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
|
if req.Model == "" {
|
||||||
if err != nil {
|
http.Error(w, "missing `model` for tokenization", http.StatusBadRequest)
|
||||||
http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
tokens, err := runner.Tokenize(r.Context(), req.Text)
|
name := model.ParseName(req.Model)
|
||||||
|
if !name.IsValid() {
|
||||||
|
http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
name, err := getExistingName(name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get local model path
|
||||||
|
modelPath, err := GetModel(name.String())
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
|
||||||
|
VocabOnly: true,
|
||||||
|
UseMmap: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer llama.FreeModel(model)
|
||||||
|
|
||||||
|
// Tokenize the text
|
||||||
|
tokens, err := model.Tokenize(req.Text, false, true)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("failed to tokenize text: %v", err), http.StatusInternalServerError)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -611,17 +640,43 @@ func (s *Server) DetokenizeHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
runner, _, _, err := s.scheduleRunner(r.Context(), req.Model, []Capability{}, nil, req.KeepAlive)
|
if req.Model == "" {
|
||||||
if err != nil {
|
http.Error(w, "missing `model` for detokenization", http.StatusBadRequest)
|
||||||
http.Error(w, fmt.Sprintf("model '%s' not found", req.Model), http.StatusNotFound)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
text, err := runner.Detokenize(r.Context(), req.Tokens)
|
name := model.ParseName(req.Model)
|
||||||
if err != nil {
|
if !name.IsValid() {
|
||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, fmt.Sprintf("model name `%q` is invalid", req.Model), http.StatusBadRequest)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
name, err := getExistingName(name)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get local model path
|
||||||
|
modelPath, err := GetModel(name.String())
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("model `%s` not found", req.Model), http.StatusNotFound)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
model, err := llama.LoadModelFromFile(modelPath.ModelPath, llama.ModelParams{
|
||||||
|
VocabOnly: true,
|
||||||
|
UseMmap: true,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, fmt.Sprintf("failed to load model: %v", err), http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
defer llama.FreeModel(model)
|
||||||
|
|
||||||
|
var text string
|
||||||
|
for _, token := range req.Tokens {
|
||||||
|
text += model.TokenToPiece(token)
|
||||||
|
}
|
||||||
|
|
||||||
w.Header().Set("Content-Type", "application/json")
|
w.Header().Set("Content-Type", "application/json")
|
||||||
if err := json.NewEncoder(w).Encode(api.DetokenizeResponse{
|
if err := json.NewEncoder(w).Encode(api.DetokenizeResponse{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user