Compare commits
18 Commits
mattw/pyth
...
v0.1.13
Author | SHA1 | Date | |
---|---|---|---|
![]() |
cedae0d17a | ||
![]() |
bb80a597db | ||
![]() |
6681d37861 | ||
![]() |
0409c1fa59 | ||
![]() |
b56e92470a | ||
![]() |
5687f1a0cf | ||
![]() |
7eda3d0c55 | ||
![]() |
7194a07d4d | ||
![]() |
13efd5f218 | ||
![]() |
c4bdfffd96 | ||
![]() |
26c63418e0 | ||
![]() |
2799784ac8 | ||
![]() |
91897a606f | ||
![]() |
96122b7271 | ||
![]() |
39be7fdb98 | ||
![]() |
c2e3b89176 | ||
![]() |
cde31cb220 | ||
![]() |
63097607b2 |
@@ -19,5 +19,11 @@ RUN apt-get update && apt-get install -y ca-certificates
|
||||
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
|
||||
EXPOSE 11434
|
||||
ENV OLLAMA_HOST 0.0.0.0
|
||||
|
||||
# set some environment variable for better NVIDIA compatibility
|
||||
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
|
||||
|
||||
ENTRYPOINT ["/bin/ollama"]
|
||||
CMD ["serve"]
|
||||
|
@@ -233,6 +233,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||
- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
|
||||
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
|
||||
- [Amica](https://github.com/semperai/amica)
|
||||
- [chatd](https://github.com/BruceMacD/chatd)
|
||||
|
||||
### Terminal
|
||||
|
||||
|
61
api/types.go
61
api/types.go
@@ -6,6 +6,7 @@ import (
|
||||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
@@ -360,3 +361,63 @@ func (d *Duration) UnmarshalJSON(b []byte) (err error) {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FormatParams converts specified parameter options to their correct types
|
||||
func FormatParams(params map[string][]string) (map[string]interface{}, error) {
|
||||
opts := Options{}
|
||||
valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
|
||||
typeOpts := reflect.TypeOf(opts) // types of the fields in the options struct
|
||||
|
||||
// build map of json struct tags to their types
|
||||
jsonOpts := make(map[string]reflect.StructField)
|
||||
for _, field := range reflect.VisibleFields(typeOpts) {
|
||||
jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
|
||||
if jsonTag != "" {
|
||||
jsonOpts[jsonTag] = field
|
||||
}
|
||||
}
|
||||
|
||||
out := make(map[string]interface{})
|
||||
// iterate params and set values based on json struct tags
|
||||
for key, vals := range params {
|
||||
if opt, ok := jsonOpts[key]; !ok {
|
||||
return nil, fmt.Errorf("unknown parameter '%s'", key)
|
||||
} else {
|
||||
field := valueOpts.FieldByName(opt.Name)
|
||||
if field.IsValid() && field.CanSet() {
|
||||
switch field.Kind() {
|
||||
case reflect.Float32:
|
||||
floatVal, err := strconv.ParseFloat(vals[0], 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid float value %s", vals)
|
||||
}
|
||||
|
||||
out[key] = float32(floatVal)
|
||||
case reflect.Int:
|
||||
intVal, err := strconv.ParseInt(vals[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid int value %s", vals)
|
||||
}
|
||||
|
||||
out[key] = intVal
|
||||
case reflect.Bool:
|
||||
boolVal, err := strconv.ParseBool(vals[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid bool value %s", vals)
|
||||
}
|
||||
|
||||
out[key] = boolVal
|
||||
case reflect.String:
|
||||
out[key] = vals[0]
|
||||
case reflect.Slice:
|
||||
// TODO: only string slices are supported right now
|
||||
out[key] = vals
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
224
cmd/cmd.go
224
cmd/cmd.go
@@ -412,10 +412,19 @@ func PullHandler(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
func RunGenerate(cmd *cobra.Command, args []string) error {
|
||||
interactive := true
|
||||
|
||||
opts := generateOptions{
|
||||
Model: args[0],
|
||||
WordWrap: os.Getenv("TERM") == "xterm-256color",
|
||||
Options: map[string]interface{}{},
|
||||
}
|
||||
|
||||
format, err := cmd.Flags().GetString("format")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
opts.Format = format
|
||||
|
||||
prompts := args[1:]
|
||||
|
||||
@@ -427,34 +436,40 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
|
||||
}
|
||||
|
||||
prompts = append([]string{string(in)}, prompts...)
|
||||
opts.WordWrap = false
|
||||
interactive = false
|
||||
}
|
||||
|
||||
// output is being piped
|
||||
if !term.IsTerminal(int(os.Stdout.Fd())) {
|
||||
return generate(cmd, args[0], strings.Join(prompts, " "), false, format)
|
||||
opts.Prompt = strings.Join(prompts, " ")
|
||||
if len(prompts) > 0 {
|
||||
interactive = false
|
||||
}
|
||||
|
||||
wordWrap := os.Getenv("TERM") == "xterm-256color"
|
||||
|
||||
nowrap, err := cmd.Flags().GetBool("nowordwrap")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if nowrap {
|
||||
wordWrap = false
|
||||
opts.WordWrap = !nowrap
|
||||
|
||||
if !interactive {
|
||||
return generate(cmd, opts)
|
||||
}
|
||||
|
||||
// prompts are provided via stdin or args so don't enter interactive mode
|
||||
if len(prompts) > 0 {
|
||||
return generate(cmd, args[0], strings.Join(prompts, " "), wordWrap, format)
|
||||
}
|
||||
|
||||
return generateInteractive(cmd, args[0], wordWrap, format)
|
||||
return generateInteractive(cmd, opts)
|
||||
}
|
||||
|
||||
type generateContextKey string
|
||||
|
||||
func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format string) error {
|
||||
type generateOptions struct {
|
||||
Model string
|
||||
Prompt string
|
||||
WordWrap bool
|
||||
Format string
|
||||
System string
|
||||
Template string
|
||||
Options map[string]interface{}
|
||||
}
|
||||
|
||||
func generate(cmd *cobra.Command, opts generateOptions) error {
|
||||
client, err := api.ClientFromEnvironment()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -475,7 +490,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
|
||||
|
||||
termWidth, _, err := term.GetSize(int(os.Stdout.Fd()))
|
||||
if err != nil {
|
||||
wordWrap = false
|
||||
opts.WordWrap = false
|
||||
}
|
||||
|
||||
cancelCtx, cancel := context.WithCancel(context.Background())
|
||||
@@ -483,24 +498,30 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
|
||||
|
||||
sigChan := make(chan os.Signal, 1)
|
||||
signal.Notify(sigChan, syscall.SIGINT)
|
||||
var abort bool
|
||||
|
||||
go func() {
|
||||
<-sigChan
|
||||
cancel()
|
||||
abort = true
|
||||
}()
|
||||
|
||||
var currentLineLength int
|
||||
var wordBuffer string
|
||||
|
||||
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
|
||||
request := api.GenerateRequest{
|
||||
Model: opts.Model,
|
||||
Prompt: opts.Prompt,
|
||||
Context: generateContext,
|
||||
Format: opts.Format,
|
||||
System: opts.System,
|
||||
Template: opts.Template,
|
||||
Options: opts.Options,
|
||||
}
|
||||
fn := func(response api.GenerateResponse) error {
|
||||
p.StopAndClear()
|
||||
|
||||
latest = response
|
||||
|
||||
if wordWrap {
|
||||
if opts.WordWrap {
|
||||
for _, ch := range response.Response {
|
||||
if currentLineLength+1 > termWidth-5 {
|
||||
// backtrack the length of the last word and clear to the end of the line
|
||||
@@ -529,21 +550,18 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
|
||||
}
|
||||
|
||||
if err := client.Generate(cancelCtx, &request, fn); err != nil {
|
||||
if strings.Contains(err.Error(), "context canceled") && abort {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
if prompt != "" {
|
||||
if opts.Prompt != "" {
|
||||
fmt.Println()
|
||||
fmt.Println()
|
||||
}
|
||||
|
||||
if !latest.Done {
|
||||
if abort {
|
||||
return nil
|
||||
}
|
||||
return errors.New("unexpected end of response")
|
||||
return nil
|
||||
}
|
||||
|
||||
verbose, err := cmd.Flags().GetBool("verbose")
|
||||
@@ -562,9 +580,22 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
|
||||
return nil
|
||||
}
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format string) error {
|
||||
type MultilineState int
|
||||
|
||||
const (
|
||||
MultilineNone MultilineState = iota
|
||||
MultilinePrompt
|
||||
MultilineSystem
|
||||
MultilineTemplate
|
||||
)
|
||||
|
||||
func generateInteractive(cmd *cobra.Command, opts generateOptions) error {
|
||||
// load the model
|
||||
if err := generate(cmd, model, "", false, ""); err != nil {
|
||||
loadOpts := generateOptions{
|
||||
Model: opts.Model,
|
||||
Prompt: "",
|
||||
}
|
||||
if err := generate(cmd, loadOpts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -581,14 +612,17 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
|
||||
usageSet := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Commands:")
|
||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
|
||||
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
|
||||
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
|
||||
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter ... Set a parameter")
|
||||
fmt.Fprintln(os.Stderr, " /set system <string> Set system prompt")
|
||||
fmt.Fprintln(os.Stderr, " /set template <string> Set prompt template")
|
||||
fmt.Fprintln(os.Stderr, " /set history Enable history")
|
||||
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
|
||||
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
|
||||
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
|
||||
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
|
||||
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
|
||||
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
@@ -602,6 +636,22 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
// only list out the most common parameters
|
||||
usageParameters := func() {
|
||||
fmt.Fprintln(os.Stderr, "Available Parameters:")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter seed <int> Random number seed")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_predict <int> Max number of tokens to predict")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_k <int> Pick from top k num of tokens")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter top_p <float> Pick token based on sum of probabilities")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_ctx <int> Set the context size")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter temperature <float> Set creativity level")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_penalty <float> How strongly to penalize repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter repeat_last_n <int> Set how far back to look for repetitions")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter num_gpu <int> The number of layers to send to the GPU")
|
||||
fmt.Fprintln(os.Stderr, " /set parameter stop \"<string>\", ... Set the stop parameters")
|
||||
fmt.Fprintln(os.Stderr, "")
|
||||
}
|
||||
|
||||
scanner, err := readline.New(readline.Prompt{
|
||||
Prompt: ">>> ",
|
||||
AltPrompt: "... ",
|
||||
@@ -615,6 +665,7 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
fmt.Print(readline.StartBracketedPaste)
|
||||
defer fmt.Printf(readline.EndBracketedPaste)
|
||||
|
||||
var multiline MultilineState
|
||||
var prompt string
|
||||
|
||||
for {
|
||||
@@ -649,8 +700,21 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
|
||||
prompt = strings.TrimPrefix(prompt, `"""`)
|
||||
scanner.Prompt.UseAlt = false
|
||||
|
||||
switch multiline {
|
||||
case MultilineSystem:
|
||||
opts.System = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set system template.\n")
|
||||
case MultilineTemplate:
|
||||
opts.Template = prompt
|
||||
prompt = ""
|
||||
fmt.Println("Set model template.\n")
|
||||
}
|
||||
multiline = MultilineNone
|
||||
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
|
||||
scanner.Prompt.UseAlt = true
|
||||
multiline = MultilinePrompt
|
||||
prompt += line + "\n"
|
||||
continue
|
||||
case scanner.Pasting:
|
||||
@@ -670,10 +734,10 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
case "nohistory":
|
||||
scanner.HistoryDisable()
|
||||
case "wordwrap":
|
||||
wordWrap = true
|
||||
opts.WordWrap = true
|
||||
fmt.Println("Set 'wordwrap' mode.")
|
||||
case "nowordwrap":
|
||||
wordWrap = false
|
||||
opts.WordWrap = false
|
||||
fmt.Println("Set 'nowordwrap' mode.")
|
||||
case "verbose":
|
||||
cmd.Flags().Set("verbose", "true")
|
||||
@@ -685,12 +749,59 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
if len(args) < 3 || args[2] != "json" {
|
||||
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
|
||||
} else {
|
||||
format = args[2]
|
||||
opts.Format = args[2]
|
||||
fmt.Printf("Set format to '%s' mode.\n", args[2])
|
||||
}
|
||||
case "noformat":
|
||||
format = ""
|
||||
opts.Format = ""
|
||||
fmt.Println("Disabled format.")
|
||||
case "parameter":
|
||||
if len(args) < 4 {
|
||||
usageParameters()
|
||||
continue
|
||||
}
|
||||
var params []string
|
||||
for _, p := range args[3:] {
|
||||
params = append(params, p)
|
||||
}
|
||||
fp, err := api.FormatParams(map[string][]string{args[2]: params})
|
||||
if err != nil {
|
||||
fmt.Printf("Couldn't set parameter: %q\n\n", err)
|
||||
continue
|
||||
}
|
||||
fmt.Printf("Set parameter '%s' to '%s'\n\n", args[2], strings.Join(params, ", "))
|
||||
opts.Options[args[2]] = fp[args[2]]
|
||||
case "system", "template":
|
||||
if len(args) < 3 {
|
||||
usageSet()
|
||||
continue
|
||||
}
|
||||
line := strings.Join(args[2:], " ")
|
||||
line = strings.TrimPrefix(line, `"""`)
|
||||
if strings.HasPrefix(args[2], `"""`) {
|
||||
cut, found := strings.CutSuffix(line, `"""`)
|
||||
prompt += cut + "\n"
|
||||
if found {
|
||||
opts.System = prompt
|
||||
if args[1] == "system" {
|
||||
fmt.Println("Set system template.\n")
|
||||
} else {
|
||||
fmt.Println("Set prompt template.\n")
|
||||
}
|
||||
prompt = ""
|
||||
} else {
|
||||
prompt = `"""` + prompt
|
||||
if args[1] == "system" {
|
||||
multiline = MultilineSystem
|
||||
} else {
|
||||
multiline = MultilineTemplate
|
||||
}
|
||||
scanner.Prompt.UseAlt = true
|
||||
}
|
||||
} else {
|
||||
opts.System = line
|
||||
fmt.Println("Set system template.\n")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
|
||||
}
|
||||
@@ -705,7 +816,7 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
fmt.Println("error: couldn't connect to ollama server")
|
||||
return err
|
||||
}
|
||||
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: model})
|
||||
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: opts.Model})
|
||||
if err != nil {
|
||||
fmt.Println("error: couldn't get model")
|
||||
return err
|
||||
@@ -724,19 +835,33 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
if resp.Parameters == "" {
|
||||
fmt.Print("No parameters were specified for this model.\n\n")
|
||||
} else {
|
||||
if len(opts.Options) > 0 {
|
||||
fmt.Println("User defined parameters:")
|
||||
for k, v := range opts.Options {
|
||||
fmt.Printf("%-*s %v\n", 30, k, v)
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
fmt.Println("Model defined parameters:")
|
||||
fmt.Println(resp.Parameters)
|
||||
}
|
||||
case "system":
|
||||
if resp.System == "" {
|
||||
switch {
|
||||
case opts.System != "":
|
||||
fmt.Println(opts.System + "\n")
|
||||
case resp.System != "":
|
||||
fmt.Println(resp.System + "\n")
|
||||
default:
|
||||
fmt.Print("No system prompt was specified for this model.\n\n")
|
||||
} else {
|
||||
fmt.Println(resp.System)
|
||||
}
|
||||
case "template":
|
||||
if resp.Template == "" {
|
||||
fmt.Print("No prompt template was specified for this model.\n\n")
|
||||
} else {
|
||||
switch {
|
||||
case opts.Template != "":
|
||||
fmt.Println(opts.Template + "\n")
|
||||
case resp.Template != "":
|
||||
fmt.Println(resp.Template)
|
||||
default:
|
||||
fmt.Print("No prompt template was specified for this model.\n\n")
|
||||
}
|
||||
default:
|
||||
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
|
||||
@@ -766,8 +891,9 @@ func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format
|
||||
prompt += line
|
||||
}
|
||||
|
||||
if len(prompt) > 0 && prompt[0] != '/' {
|
||||
if err := generate(cmd, model, prompt, wordWrap, format); err != nil {
|
||||
if len(prompt) > 0 && multiline == MultilineNone {
|
||||
opts.Prompt = prompt
|
||||
if err := generate(cmd, opts); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@@ -23,7 +23,7 @@ Ollama binds to 127.0.0.1 port 11434 by default. Change the bind address with th
|
||||
On macOS:
|
||||
|
||||
```bash
|
||||
OLLAMA_HOST=0.0.0.0:11435 ollama serve
|
||||
OLLAMA_HOST=0.0.0.0:11434 ollama serve
|
||||
```
|
||||
|
||||
On Linux:
|
||||
@@ -59,7 +59,7 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
|
||||
On Linux:
|
||||
|
||||
```bash
|
||||
echo 'Environment="OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
|
||||
echo 'Environment="OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
|
||||
```
|
||||
|
||||
Reload `systemd` and restart Ollama:
|
||||
|
@@ -42,12 +42,13 @@ text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
|
||||
all_splits = text_splitter.split_documents(data)
|
||||
```
|
||||
|
||||
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. For now, we don't have embeddings built in to Ollama, though we will be adding that soon, so for now, we can use the GPT4All library for that. We will use ChromaDB in this example for a vector database. `pip install GPT4All chromadb`
|
||||
It's split up, but we have to find the relevant splits and then submit those to the model. We can do this by creating embeddings and storing them in a vector database. We can use Ollama directly to instantiate an embedding model. We will use ChromaDB in this example for a vector database. `pip install GPT4All chromadb`
|
||||
|
||||
```python
|
||||
from langchain.embeddings import GPT4AllEmbeddings
|
||||
from langchain.embeddings import OllamaEmbeddings
|
||||
from langchain.vectorstores import Chroma
|
||||
vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())
|
||||
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="llama2")
|
||||
vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)
|
||||
```
|
||||
|
||||
Now let's ask a question from the document. **Who was Neleus, and who is in his family?** Neleus is a character in the Odyssey, and the answer can be found in our text.
|
||||
|
@@ -25,9 +25,11 @@ spec:
|
||||
image: ollama/ollama:latest
|
||||
env:
|
||||
- name: PATH
|
||||
value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin
|
||||
value: /usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
- name: LD_LIBRARY_PATH
|
||||
value: /usr/local/nvidia/lib64
|
||||
value: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
|
||||
- name: NVIDIA_DRIVER_CAPABILITIES
|
||||
value: compute,utility
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 11434
|
||||
|
@@ -217,7 +217,7 @@ fi
|
||||
|
||||
if ! check_gpu nvidia-smi || [ -z "$(nvidia-smi | grep -o "CUDA Version: [0-9]*\.[0-9]*")" ]; then
|
||||
case $OS_NAME in
|
||||
centos|rhel) install_cuda_driver_yum 'rhel' $OS_VERSION ;;
|
||||
centos|rhel) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -d '.' -f 1) ;;
|
||||
rocky) install_cuda_driver_yum 'rhel' $(echo $OS_VERSION | cut -c1) ;;
|
||||
fedora) install_cuda_driver_yum $OS_NAME $OS_VERSION ;;
|
||||
amzn) install_cuda_driver_yum 'fedora' '35' ;;
|
||||
|
@@ -14,7 +14,6 @@ import (
|
||||
"net/url"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -376,6 +375,15 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||
layer.MediaType = mediatype
|
||||
layers = append(layers, layer)
|
||||
case "adapter":
|
||||
if strings.HasPrefix(c.Args, "@") {
|
||||
blobPath, err := GetBlobsPath(strings.TrimPrefix(c.Args, "@"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
c.Args = blobPath
|
||||
}
|
||||
|
||||
fn(api.ProgressResponse{Status: "creating adapter layer"})
|
||||
bin, err := os.Open(realpath(modelFileDir, c.Args))
|
||||
if err != nil {
|
||||
@@ -426,7 +434,7 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
|
||||
if len(params) > 0 {
|
||||
fn(api.ProgressResponse{Status: "creating parameters layer"})
|
||||
|
||||
formattedParams, err := formatParams(params)
|
||||
formattedParams, err := api.FormatParams(params)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -581,64 +589,6 @@ func GetLayerWithBufferFromLayer(layer *Layer) (*LayerReader, error) {
|
||||
return newLayer, nil
|
||||
}
|
||||
|
||||
// formatParams converts specified parameter options to their correct types
|
||||
func formatParams(params map[string][]string) (map[string]interface{}, error) {
|
||||
opts := api.Options{}
|
||||
valueOpts := reflect.ValueOf(&opts).Elem() // names of the fields in the options struct
|
||||
typeOpts := reflect.TypeOf(opts) // types of the fields in the options struct
|
||||
|
||||
// build map of json struct tags to their types
|
||||
jsonOpts := make(map[string]reflect.StructField)
|
||||
for _, field := range reflect.VisibleFields(typeOpts) {
|
||||
jsonTag := strings.Split(field.Tag.Get("json"), ",")[0]
|
||||
if jsonTag != "" {
|
||||
jsonOpts[jsonTag] = field
|
||||
}
|
||||
}
|
||||
|
||||
out := make(map[string]interface{})
|
||||
// iterate params and set values based on json struct tags
|
||||
for key, vals := range params {
|
||||
if opt, ok := jsonOpts[key]; ok {
|
||||
field := valueOpts.FieldByName(opt.Name)
|
||||
if field.IsValid() && field.CanSet() {
|
||||
switch field.Kind() {
|
||||
case reflect.Float32:
|
||||
floatVal, err := strconv.ParseFloat(vals[0], 32)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid float value %s", vals)
|
||||
}
|
||||
|
||||
out[key] = float32(floatVal)
|
||||
case reflect.Int:
|
||||
intVal, err := strconv.ParseInt(vals[0], 10, 64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid int value %s", vals)
|
||||
}
|
||||
|
||||
out[key] = intVal
|
||||
case reflect.Bool:
|
||||
boolVal, err := strconv.ParseBool(vals[0])
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid bool value %s", vals)
|
||||
}
|
||||
|
||||
out[key] = boolVal
|
||||
case reflect.String:
|
||||
out[key] = vals[0]
|
||||
case reflect.Slice:
|
||||
// TODO: only string slices are supported right now
|
||||
out[key] = vals
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown type %s for %s", field.Kind(), key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func getLayerDigests(layers []*LayerReader) ([]string, error) {
|
||||
var digests []string
|
||||
for _, l := range layers {
|
||||
|
@@ -67,6 +67,20 @@ func ParseModelPath(name string) ModelPath {
|
||||
return mp
|
||||
}
|
||||
|
||||
var errModelPathInvalid = errors.New("invalid model path")
|
||||
|
||||
func (mp ModelPath) Validate() error {
|
||||
if mp.Repository == "" {
|
||||
return fmt.Errorf("%w: model repository name is required", errModelPathInvalid)
|
||||
}
|
||||
|
||||
if strings.Contains(mp.Tag, ":") {
|
||||
return fmt.Errorf("%w: ':' (colon) is not allowed in tag names", errModelPathInvalid)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (mp ModelPath) GetNamespaceRepository() string {
|
||||
return fmt.Sprintf("%s/%s", mp.Namespace, mp.Repository)
|
||||
}
|
||||
|
@@ -416,6 +416,11 @@ func CreateModelHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := ParseModelPath(req.Name).Validate(); err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if req.Path == "" && req.Modelfile == "" {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": "path or modelfile are required"})
|
||||
return
|
||||
@@ -640,6 +645,11 @@ func CopyModelHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
if err := ParseModelPath(req.Destination).Validate(); err != nil {
|
||||
c.AbortWithStatusJSON(http.StatusBadRequest, gin.H{"error": err.Error()})
|
||||
return
|
||||
}
|
||||
|
||||
if err := CopyModel(req.Source, req.Destination); err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
c.JSON(http.StatusNotFound, gin.H{"error": fmt.Sprintf("model '%s' not found", req.Source)})
|
||||
|
@@ -5,6 +5,7 @@ import (
|
||||
"crypto/md5"
|
||||
"errors"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"log"
|
||||
"math"
|
||||
@@ -102,7 +103,7 @@ func (b *blobUpload) Prepare(ctx context.Context, requestURL *url.URL, opts *Reg
|
||||
}
|
||||
|
||||
// set part.N to the current number of parts
|
||||
b.Parts = append(b.Parts, blobUploadPart{blobUpload: b, N: len(b.Parts), Offset: offset, Size: size})
|
||||
b.Parts = append(b.Parts, blobUploadPart{N: len(b.Parts), Offset: offset, Size: size})
|
||||
offset += size
|
||||
}
|
||||
|
||||
@@ -147,14 +148,13 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
||||
g.Go(func() error {
|
||||
var err error
|
||||
for try := 0; try < maxRetries; try++ {
|
||||
err = b.uploadChunk(inner, http.MethodPatch, requestURL, part, opts)
|
||||
err = b.uploadPart(inner, http.MethodPatch, requestURL, part, opts)
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled):
|
||||
return err
|
||||
case errors.Is(err, errMaxRetriesExceeded):
|
||||
return err
|
||||
case err != nil:
|
||||
part.Reset()
|
||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
||||
time.Sleep(sleep)
|
||||
@@ -176,17 +176,10 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
||||
|
||||
requestURL := <-b.nextURL
|
||||
|
||||
var sb strings.Builder
|
||||
|
||||
// calculate md5 checksum and add it to the commit request
|
||||
var sb strings.Builder
|
||||
for _, part := range b.Parts {
|
||||
hash := md5.New()
|
||||
if _, err := io.Copy(hash, io.NewSectionReader(b.file, part.Offset, part.Size)); err != nil {
|
||||
b.err = err
|
||||
return
|
||||
}
|
||||
|
||||
sb.Write(hash.Sum(nil))
|
||||
sb.Write(part.Sum(nil))
|
||||
}
|
||||
|
||||
md5sum := md5.Sum([]byte(sb.String()))
|
||||
@@ -201,27 +194,25 @@ func (b *blobUpload) Run(ctx context.Context, opts *RegistryOptions) {
|
||||
headers.Set("Content-Length", "0")
|
||||
|
||||
for try := 0; try < maxRetries; try++ {
|
||||
resp, err := makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
|
||||
if err != nil {
|
||||
b.err = err
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return
|
||||
}
|
||||
|
||||
var resp *http.Response
|
||||
resp, err = makeRequestWithRetry(ctx, http.MethodPut, requestURL, headers, nil, opts)
|
||||
if errors.Is(err, context.Canceled) {
|
||||
break
|
||||
} else if err != nil {
|
||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||
log.Printf("%s complete upload attempt %d failed: %v, retrying in %s", b.Digest[7:19], try, err, sleep)
|
||||
time.Sleep(sleep)
|
||||
continue
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
b.err = nil
|
||||
b.done = true
|
||||
return
|
||||
break
|
||||
}
|
||||
|
||||
b.err = err
|
||||
b.done = true
|
||||
}
|
||||
|
||||
func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
|
||||
func (b *blobUpload) uploadPart(ctx context.Context, method string, requestURL *url.URL, part *blobUploadPart, opts *RegistryOptions) error {
|
||||
headers := make(http.Header)
|
||||
headers.Set("Content-Type", "application/octet-stream")
|
||||
headers.Set("Content-Length", fmt.Sprintf("%d", part.Size))
|
||||
@@ -232,8 +223,13 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
||||
}
|
||||
|
||||
sr := io.NewSectionReader(b.file, part.Offset, part.Size)
|
||||
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, part), opts)
|
||||
|
||||
md5sum := md5.New()
|
||||
w := &progressWriter{blobUpload: b}
|
||||
|
||||
resp, err := makeRequest(ctx, method, requestURL, headers, io.TeeReader(sr, io.MultiWriter(w, md5sum)), opts)
|
||||
if err != nil {
|
||||
w.Rollback()
|
||||
return err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
@@ -245,11 +241,13 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
||||
|
||||
nextURL, err := url.Parse(location)
|
||||
if err != nil {
|
||||
w.Rollback()
|
||||
return err
|
||||
}
|
||||
|
||||
switch {
|
||||
case resp.StatusCode == http.StatusTemporaryRedirect:
|
||||
w.Rollback()
|
||||
b.nextURL <- nextURL
|
||||
|
||||
redirectURL, err := resp.Location()
|
||||
@@ -259,14 +257,13 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
||||
|
||||
// retry uploading to the redirect URL
|
||||
for try := 0; try < maxRetries; try++ {
|
||||
err = b.uploadChunk(ctx, http.MethodPut, redirectURL, part, nil)
|
||||
err = b.uploadPart(ctx, http.MethodPut, redirectURL, part, nil)
|
||||
switch {
|
||||
case errors.Is(err, context.Canceled):
|
||||
return err
|
||||
case errors.Is(err, errMaxRetriesExceeded):
|
||||
return err
|
||||
case err != nil:
|
||||
part.Reset()
|
||||
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
|
||||
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
|
||||
time.Sleep(sleep)
|
||||
@@ -279,6 +276,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
||||
return fmt.Errorf("%w: %w", errMaxRetriesExceeded, err)
|
||||
|
||||
case resp.StatusCode == http.StatusUnauthorized:
|
||||
w.Rollback()
|
||||
auth := resp.Header.Get("www-authenticate")
|
||||
authRedir := ParseAuthRedirectString(auth)
|
||||
token, err := getAuthToken(ctx, authRedir)
|
||||
@@ -289,6 +287,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
||||
opts.Token = token
|
||||
fallthrough
|
||||
case resp.StatusCode >= http.StatusBadRequest:
|
||||
w.Rollback()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -301,6 +300,7 @@ func (b *blobUpload) uploadChunk(ctx context.Context, method string, requestURL
|
||||
b.nextURL <- nextURL
|
||||
}
|
||||
|
||||
part.Hash = md5sum
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -341,22 +341,26 @@ func (b *blobUpload) Wait(ctx context.Context, fn func(api.ProgressResponse)) er
|
||||
|
||||
type blobUploadPart struct {
|
||||
// N is the part number
|
||||
N int
|
||||
Offset int64
|
||||
Size int64
|
||||
N int
|
||||
Offset int64
|
||||
Size int64
|
||||
hash.Hash
|
||||
}
|
||||
|
||||
type progressWriter struct {
|
||||
written int64
|
||||
*blobUpload
|
||||
}
|
||||
|
||||
func (p *blobUploadPart) Write(b []byte) (n int, err error) {
|
||||
func (p *progressWriter) Write(b []byte) (n int, err error) {
|
||||
n = len(b)
|
||||
p.written += int64(n)
|
||||
p.Completed.Add(int64(n))
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func (p *blobUploadPart) Reset() {
|
||||
p.Completed.Add(-int64(p.written))
|
||||
func (p *progressWriter) Rollback() {
|
||||
p.Completed.Add(-p.written)
|
||||
p.written = 0
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user