Compare commits

...

299 Commits

Author SHA1 Message Date
Jeffrey Morgan
131413ddff wip 2023-10-22 09:54:59 -04:00
Jeffrey Morgan
ccff9ca09c Update README.md 2023-10-21 11:58:10 -04:00
Jeffrey Morgan
436a5be49c Update README.md 2023-10-21 11:57:32 -04:00
Matt Williams
cc0bf96398 Merge pull request #829 from jmorganca/mattw/example-summarize-news
added python rag news summary
2023-10-20 21:03:16 -07:00
Michael Yang
386169205c update runtime options (#864) 2023-10-20 21:17:14 -04:00
Michael Yang
0d6342a882 Merge pull request #863 from jmorganca/mxyng/nil-pointer
fix: nil pointer dereference
2023-10-20 17:23:37 -07:00
Michael Yang
75bee074b6 fix: nil pointer dereference 2023-10-20 16:55:24 -07:00
Michael Yang
533d76368c Merge pull request #859 from jmorganca/mxyng/fix-hostname
fix: ollama host for hostname
2023-10-20 11:40:56 -07:00
Michael Yang
459f4a7889 fix: ollama host for hostname 2023-10-20 11:32:41 -07:00
Matt Williams
25c63c91d8 Update README.md
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-10-19 13:52:40 -07:00
Jeffrey Morgan
cbfff4f868 update dependencies in app/ 2023-10-19 15:52:41 -04:00
Jeffrey Morgan
7ed5a39bc7 simpler check for model loading compatibility errors 2023-10-19 14:50:49 -04:00
Michael Yang
cc1d03f4ec Merge pull request #841 from jmorganca/mxyng/cleanup-cmd-args 2023-10-19 11:22:40 -07:00
Michael Yang
846f593dbf Merge pull request #828 from jmorganca/mxyng/template-parameters
image: show parameters
2023-10-19 09:31:31 -07:00
Michael Yang
0a53da03fd Merge pull request #843 from jmorganca/mxyng/request-validation
basic request validation
2023-10-19 09:30:45 -07:00
Michael Yang
2ce1793a1d go fmt 2023-10-19 09:21:51 -07:00
Michael Yang
e1c5be24e7 check json eof 2023-10-19 09:21:51 -07:00
Michael Yang
2ad8a074ac generate: set created_at
move the empty response so it's more visible
2023-10-19 09:21:51 -07:00
Michael Yang
7e547c6833 s/message/error/ 2023-10-19 09:21:04 -07:00
Michael Yang
689842b9ff request: bad request when model missing fields 2023-10-19 09:21:04 -07:00
Michael Yang
a19d47642e models: rm workDir from CreateModel
unused after removing EMBED
2023-10-19 09:21:04 -07:00
Jeffrey Morgan
a7dad24d92 add error for falcon and starcoder vocab compatibility (#844)
add error for falcon and starcoder vocab compatibility
---------
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-10-19 12:18:31 -04:00
Jeffrey Morgan
6b213216d5 Update import.md 2023-10-19 12:17:36 -04:00
Bruce MacDonald
fe6f3b48f7 do not reload the running llm when runtime params change (#840)
- only reload the running llm if the model has changed, or the options for loading the running model have changed
- rename loaded llm to runner to differentiate from loaded model image
- remove logic which keeps the first system prompt in the generation context
2023-10-19 10:39:58 -04:00
Michael Yang
36c88cb9db cmd: set ExactArgs 2023-10-18 14:40:48 -07:00
Michael Yang
235e43d7f6 Merge pull request #833 from discovertomorrow/leadingspace
Fix Issue with Leading Whitespaces in Decoded Context
2023-10-18 13:52:48 -07:00
Arne Müller
730996e530 use TrimPrefix instead of TrimLeft 2023-10-18 22:51:30 +02:00
Arne Müller
ce6197a8e0 removed redundant strings.CutPrefix from Decode 2023-10-18 22:47:20 +02:00
Arne Müller
46b9953f32 use strings.TrimLeft to remove spaces 2023-10-18 22:41:19 +02:00
Michael Yang
4dcceeffb7 let the template do the work 2023-10-18 13:12:00 -07:00
Michael Yang
019e4a4558 image: show parameters 2023-10-18 13:12:00 -07:00
Michael Yang
627d04d927 Merge pull request #827 from jmorganca/mxyng/template-adapters
model: native gotemplate adapter template
2023-10-18 13:11:25 -07:00
Michael Yang
940e8ebec3 Merge pull request #826 from jmorganca/mxyng/template-system
show: no template system if empty
2023-10-18 13:11:09 -07:00
Bruce MacDonald
565648f3f7 relay CUDA errors to the client (#825) 2023-10-18 15:36:56 -04:00
Arne Müller
90c49bed57 moved removal of leading space into Predict 2023-10-18 20:08:26 +02:00
Michael Yang
3a2477174f Merge pull request #822 from ggozad/fix-tags-api
Fix /api/tags for no models.
2023-10-18 09:34:00 -07:00
Yiorgis Gozadinos
8c6c2cbc8c When the .ollama folder is broken or there are no models return an empty list on /api/tags 2023-10-18 08:23:20 +02:00
Arne Müller
5dc0cff459 fix whitespace removal 2023-10-18 08:15:27 +02:00
Matt Williams
c5c8b4b16a added python rag news summary
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-17 16:41:28 -07:00
Michael Yang
8299bf76ed model: native gotemplate adapter template 2023-10-17 15:28:38 -07:00
Michael Yang
ee4979e510 show: no template system if empty 2023-10-17 15:25:43 -07:00
Michael Yang
08b0e04f40 Merge pull request #813 from jmorganca/mxyng/llama
refactor llm/llama.go
2023-10-17 14:05:58 -07:00
Michael Yang
b36b0b71f8 use cut prefix 2023-10-17 14:01:39 -07:00
Michael Yang
094df37563 remove unused struct 2023-10-17 14:01:38 -07:00
Bruce MacDonald
f3648fd206 Update llama.cpp gguf to latest (#710) 2023-10-17 16:55:16 -04:00
Bruce MacDonald
bd93a94abd fix MB VRAM log output (#824) 2023-10-17 15:35:16 -04:00
Michael Yang
f55bdb6f10 Merge pull request #799 from deichbewohner/jsonmarshaling
Fix JSON Marshal Escaping for Special Characters
2023-10-17 08:46:02 -07:00
Michael Yang
2870a9bfc8 Merge pull request #812 from jmorganca/mxyng/fix-format-string
fix: wrong format string type
2023-10-17 08:40:49 -07:00
Michael Yang
c031c211d1 Merge pull request #809 from jmorganca/mxyng/fix-gpu
fix: regression unsupported metal types
2023-10-17 08:40:40 -07:00
Andreas Wäscher
68391b0055 Add OllamaSharp for .NET (#811) 2023-10-17 11:31:48 -04:00
Alexander F. Rødseth
b7e137323a Fix a typo (#818) 2023-10-17 09:00:15 -04:00
Arne Müller
8fa3f366ad Removed newline trimming and used buffer directly in POST request. 2023-10-17 08:17:35 +02:00
Michael Yang
fddb303f23 fix: format string wrong type 2023-10-16 16:14:28 -07:00
Michael Yang
ad5ee20c7b Merge pull request #794 from ggozad/add_oterm
Add oterm to community integrations
2023-10-16 15:51:55 -07:00
Michael Yang
785b4eb5bf Merge branch 'main' into add_oterm 2023-10-16 15:51:44 -07:00
Michael Yang
16ede1b30b Merge pull request #801 from s-kostyaev/add-ellama-community-integration
Add ellama community integration
2023-10-16 15:51:25 -07:00
Michael Yang
17d6bbbb2a Merge pull request #810 from vieux/patch-1
Update install.sh
2023-10-16 15:50:57 -07:00
Victor Vieux
6481b7f34c Update install.sh, avoid ARCH: unbound variable 2023-10-16 14:40:24 -07:00
Michael Yang
cb4a80b693 fix: regression unsupported metal types
omitting `--n-gpu-layers` means use metal on macos which isn't correct
since ollama uses `num_gpu=0` to explicitly disable gpu for file types
that are not implemented in metal
2023-10-16 14:37:20 -07:00
Bruce MacDonald
68d7255bd3 show request to server rather than local check (#778) 2023-10-16 17:27:25 -04:00
Michael Yang
9ef2fce33a Merge pull request #768 from jmorganca/mxyng/bytes
fix memory check
2023-10-16 12:42:41 -07:00
Michael Yang
43eaba3d60 Merge pull request #787 from jmorganca/mxyng/server-version2
server: print version on start
2023-10-16 09:59:30 -07:00
Michael Yang
1af493c5a0 server: print version on start 2023-10-16 09:59:14 -07:00
Bruce MacDonald
a0c3e989de deprecate modelfile embed command (#759) 2023-10-16 11:07:37 -04:00
Sergey Kostyaev
7af0fdce48 add ellama community integration 2023-10-16 16:39:10 +07:00
Arne Müller
ee94693b1a handling unescaped json marshaling 2023-10-16 11:15:55 +02:00
Yiorgis Gozadinos
731dbdc1a5 Add oterm to community integrations 2023-10-15 23:21:17 +02:00
Jeffrey Morgan
06bcfbd629 cleanup docker section in readme 2023-10-15 02:33:25 -04:00
Jeffrey Morgan
7d7c2510f8 add docker exec command to readme 2023-10-15 02:31:15 -04:00
Jeffrey Morgan
f9b2f999ac update readme with docker setup and link to import.md 2023-10-15 02:23:03 -04:00
Jeffrey Morgan
c416087339 import.md: formatting and spelling 2023-10-15 01:39:46 -04:00
Jeffrey Morgan
6002cebd2c import.md: convert and quantize docs 2023-10-15 00:11:51 -04:00
Jeffrey Morgan
212bdc541c import.md: model architectures spelling 2023-10-15 00:07:58 -04:00
Jeffrey Morgan
dca6686273 add steps for creating a Modelfile and more example commands to import.md 2023-10-15 00:05:50 -04:00
Jeffrey Morgan
598621afab add push script for docker images 2023-10-14 14:24:39 -04:00
Matt Williams
6479f49c09 Merge pull request #773 from jmorganca/mattw/howtoquant
add how to quantize doc
2023-10-14 08:29:39 -07:00
Matt Williams
b2974a7095 applied mikes comments
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-14 08:29:24 -07:00
Jeffrey Morgan
832b4db9d4 Use correct url for auto updates 2023-10-13 19:04:42 -04:00
Bruce MacDonald
c43873f33b check update response (#785) 2023-10-13 18:05:46 -04:00
Michael Yang
11d82d7b9b update checkvram 2023-10-13 14:47:29 -07:00
Michael Yang
36fe2deebf only check system memory on macos 2023-10-13 14:47:29 -07:00
Michael Yang
4a8931f634 check total (system + video) memory 2023-10-13 14:47:29 -07:00
Michael Yang
bd6e38fb1a refactor memory check 2023-10-13 14:47:29 -07:00
Michael Yang
92189a5855 fix memory check 2023-10-13 14:47:29 -07:00
Michael Yang
d790bf9916 Merge pull request #783 from jmorganca/mxyng/fix-gpu-offloading
fix: offloading on low end GPUs
2023-10-13 14:36:44 -07:00
Michael Yang
35afac099a do not use gpu binary when num_gpu == 0 2023-10-13 14:32:12 -07:00
Michael Yang
811c3d1900 no gpu if vram < 2GB 2023-10-13 14:32:12 -07:00
Bruce MacDonald
3553d10769 check for newer updates (#784)
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-10-13 17:29:46 -04:00
Bruce MacDonald
6fe178134d improve api error handling (#781)
- remove new lines from llama.cpp error messages relayed to client
- check api option types and return error on wrong type
- change num layers from 95% VRAM to 92% VRAM
2023-10-13 16:57:10 -04:00
Jeffrey Morgan
d890890f66 use lower glibc versions in Dockerfile.build 2023-10-13 01:06:19 -04:00
Jeffrey Morgan
89ba19feca use Go 1.21.3 in Dockerfile 2023-10-12 23:23:12 -04:00
Jeffrey Morgan
6f58c77671 update Dockerfile.build for linux binary builds 2023-10-12 22:14:20 -04:00
Matt Williams
3c975f898f update doc to refer to docker image
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-12 15:57:50 -07:00
Matt Williams
9245c8a1df add how to quantize doc
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-12 15:34:57 -07:00
Michael Yang
7a537cdca9 Merge pull request #770 from jmorganca/mxyng/fix-download
fix download
2023-10-12 12:56:43 -07:00
Michael Yang
257ffeb997 fix download 2023-10-12 12:52:43 -07:00
Matt Williams
9b513bb6b1 Merge pull request #753 from jmorganca/mattw/examplereorg
rename the examples to be more descriptive
2023-10-12 11:24:12 -07:00
Matt Williams
042100f797 final rename
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-12 11:23:41 -07:00
Bruce MacDonald
7804b8fab9 validate api options fields from map (#711) 2023-10-12 11:18:11 -04:00
Bruce MacDonald
56497663c8 relay model runner error message to client (#720)
* give direction to user when runner fails
* also relay errors from timeout
* increase timeout to 3 minutes
2023-10-12 11:16:37 -04:00
Matt Williams
e1afcb8af2 simple gen to simple
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-11 21:29:07 -07:00
Matt Williams
385eeea357 remove with
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-11 21:26:11 -07:00
Matt Williams
8a41b244e8 add golang gen
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-11 21:20:50 -07:00
Jeffrey Morgan
92578798bb fix relative links in README.md 2023-10-11 19:24:06 -04:00
Michael Yang
788637918a Merge pull request #760 from jmorganca/mxyng/more-downloads
Mxyng/more downloads
2023-10-11 14:33:10 -07:00
Michael Yang
c413a55093 download: handle inner errors 2023-10-11 14:15:30 -07:00
Michael Yang
630bb75d2a dynamically size download parts based on file size 2023-10-11 14:10:25 -07:00
Michael Yang
a2055a1e93 update download 2023-10-11 14:10:25 -07:00
Michael Yang
b599946b74 add format bytes 2023-10-11 14:08:23 -07:00
Michael Yang
aca2d65b82 Merge pull request #757 from jmorganca/mxyng/format-time
cleanup format time
2023-10-11 11:12:29 -07:00
Michael Yang
b5e08e3373 cleanup format time 2023-10-11 11:09:27 -07:00
Bruce MacDonald
274d5a5fdf optional parameter to not stream response (#639)
* update streaming request accept header
* add optional stream param to request bodies
2023-10-11 12:54:27 -04:00
Matt Williams
fc6b49be32 add ts alternate to python langchain simplegen
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-11 09:50:15 -07:00
Bruce MacDonald
77295f716e prevent waiting on exited command (#752)
* prevent waiting on exited command
* close llama runner once
2023-10-11 12:32:13 -04:00
Matt Williams
615f7d1dea cleanup readme.
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-11 06:13:29 -07:00
Matt Williams
cdf5e106ae rename dirs
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-11 06:10:24 -07:00
Matt Williams
a85329f59a rename the models to be more descriptive
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-10 17:40:02 -07:00
Bruce MacDonald
f2ba1311aa improve vram safety with 5% vram memory buffer (#724)
* check free memory not total
* wait for subprocess to exit
2023-10-10 16:16:09 -04:00
Jeffrey Morgan
65dcd0ce35 always cleanup blob download (#747) 2023-10-10 13:12:29 -04:00
Michael Yang
0040f543a2 Merge pull request #743 from jmorganca/mxyng/http-proxy
handle upstream proxies
2023-10-10 09:59:06 -07:00
Matt Williams
767f9bdbbb Merge pull request #585 from jmorganca/matt/examplementors
add the example for ask the mentors
2023-10-09 13:58:14 -07:00
Costa Alexoglou
f7f5169c94 Update api.md (#741)
Avoid triple ticks in visual editor and also copied in clipboard.
2023-10-09 16:01:46 -04:00
Michael Yang
2cfffea02e handle client proxy 2023-10-09 12:33:47 -07:00
Michael Yang
f6e98334e4 handle upstream proxies 2023-10-09 11:42:36 -07:00
Jeffrey Morgan
ab0668293c llm: fix build on amd64 2023-10-06 14:39:54 -07:00
Bruce MacDonald
af4cf55884 not found error before pulling model (#718) 2023-10-06 16:06:20 -04:00
Bruce MacDonald
d6786f2945 add feedback for reading model metadata (#722) 2023-10-06 16:05:32 -04:00
Michael Yang
38dc2f79bc Merge pull request #626 from jmorganca/mxyng/concurrent-downloads
parallel chunked downloads
2023-10-06 13:01:29 -07:00
Michael Yang
cb961c87ca Merge pull request #679 from jamesbraza/modelfile-docs
`Modelfile` syntax highlighting
2023-10-06 12:59:45 -07:00
Michael Yang
0560b28a8d names 2023-10-06 12:56:56 -07:00
Michael Yang
10199c5987 replace done channel with file check 2023-10-06 12:56:56 -07:00
Michael Yang
288814d3e4 fix ref counts 2023-10-06 12:56:43 -07:00
Michael Yang
04733438da check head request response 2023-10-06 12:56:43 -07:00
Michael Yang
711e891f0f fix resumable downloads
glob returns files in lexical order which is not appropriate when
rebuilding the parts list
2023-10-06 12:56:43 -07:00
Michael Yang
090d08422b handle unexpected eofs 2023-10-06 12:56:43 -07:00
Michael Yang
5b84404c64 handle concurrent requests for the same blobs 2023-10-06 12:56:43 -07:00
Michael Yang
8544edca21 parallel chunked downloads 2023-10-06 12:56:43 -07:00
Bruce MacDonald
5d22319a2c rename server subprocess (#700)
- this makes it easier to see that the subprocess is associated with ollama
2023-10-06 10:15:42 -04:00
Bruce MacDonald
2130c0708b output type parsed from modelfile (#678) 2023-10-05 14:58:04 -04:00
Patrick Devine
61ff1946e6 revise help text (#706) 2023-10-05 11:36:07 -07:00
Bruce MacDonald
d06bc0cb6e enable q8, q5, 5_1, and f32 for linux gpu (#699) 2023-10-05 12:53:47 -04:00
Alexander F. Rødseth
d104b7e997 Fix go test./... issue: fmt.Println arg list ends with redundant newline (#705) 2023-10-05 11:11:04 -04:00
Bruce MacDonald
9e2de1bd2c increase streaming buffer size (#692) 2023-10-04 14:09:00 -04:00
Jeffrey Morgan
dc87e9c9ae update Dockerfile to pass GOFLAGS 2023-10-03 07:05:15 -07:00
Michael Yang
367cb68dc1 Merge pull request #686 from jmorganca/mxyng/starcoder
decode starcoder
2023-10-02 22:47:19 -07:00
Michael Yang
c02c0cd483 starcoder 2023-10-02 19:56:51 -07:00
Patrick Devine
1852755154 show a default message when license/parameters/system prompt/template aren't specified (#681) 2023-10-02 14:34:52 -07:00
James Braza
6f2ce74231 Got rif of all caps to show it can be lower case 2023-10-02 13:54:27 -07:00
James Braza
6edcc5c79f Using code highlighting syntax around Modelfile 2023-10-02 13:46:05 -07:00
Bruce MacDonald
b1f7123301 clean up num_gpu calculation code (#673) 2023-10-02 14:53:42 -04:00
Bruce MacDonald
1fbf3585d6 Relay default values to llama runner (#672)
* include seed in params for llama.cpp server and remove empty filter for temp

* relay default predict options to llama.cpp

- reorganize options to match predict request for readability

* omit empty stop

---------

Co-authored-by: hallh <hallh@users.noreply.github.com>
2023-10-02 14:53:16 -04:00
Patrick Devine
99d5161e8a don't wordwrap when stdout is redirected or piped (#662) 2023-10-02 11:50:55 -07:00
Michael
ea8380be45 add community project: Chatbot Ollama
add community project: Chatbot Ollama by @ivanfioravanti
2023-10-02 09:04:31 -07:00
Jeffrey Morgan
4f25092dc1 fix build_docker.sh permissions 2023-10-01 16:42:32 -07:00
Jiayu Liu
4fc10acce9 add some missing code directives in docs (#664) 2023-10-01 11:51:01 -07:00
Michael Yang
0a4f21c0a7 fix docker build (#659) 2023-09-30 13:34:01 -07:00
Jeffrey Morgan
9abb66254a docker: fix volume permission errors 2023-09-30 12:32:15 -07:00
Jay Nakrani
1d0ebe67e8 Document response stream chunk delimiter. (#632)
Document response stream chunk delimiter.
2023-09-29 21:45:52 -07:00
Bruce MacDonald
a1b2d95f96 remove unused push/pull params (#650) 2023-09-29 17:27:19 -04:00
Michael Yang
c0b1bf7537 Merge pull request #606 from jmorganca/mxyng/install.sh-2
ordered list of install locations
2023-09-29 11:30:46 -07:00
Michael Yang
cdfeb165ca Merge pull request #608 from jmorganca/mxyng/build
update build scripts
2023-09-29 11:30:25 -07:00
Michael Yang
92d454ec5f update build_darwin.sh 2023-09-29 11:29:23 -07:00
Michael Yang
9333b0cc82 Merge pull request #612 from jmorganca/mxyng/prune-empty-directories
prune empty directories
2023-09-29 11:23:39 -07:00
Bruce MacDonald
9771b1ec51 windows runner fixes (#637) 2023-09-29 11:47:55 -04:00
Patrick Devine
76db4a49cf allow the user to cancel generating with ctrl-C (#641) 2023-09-28 17:13:01 -07:00
Luc Stepniewski
4aa0976a2e Added missing return preventing SIGSEGV because of missing resp (#621)
Co-authored-by: Luc Stepniewski <luc@eclipse-fr.com>
2023-09-28 14:25:22 -07:00
Patrick Devine
92c20fdae6 fix error messages for unknown commands in the repl (#611) 2023-09-28 14:19:45 -07:00
Michael Yang
c951da7096 Merge pull request #634 from jmorganca/mxyng/int64
use int64 consistently
2023-09-28 14:17:47 -07:00
Bruce MacDonald
24d82a23a2 do not download updates multiple times (#633) 2023-09-28 15:29:17 -04:00
Michael Yang
f40b3de758 use int64 consistently 2023-09-28 11:07:24 -07:00
Michael
5f4008c296 Update README.md
adding in instruction to run mistral
2023-09-28 09:06:03 -07:00
Aaron Coffey
6ae33d8141 Update modelfile.md to reflect the usage of num_gpu. (#629) 2023-09-28 10:21:21 -04:00
Jeffrey Morgan
c5664c1fef Update faq.md 2023-09-27 13:49:43 -07:00
Bruce MacDonald
958a5a8184 revert fedora cuda version check 2023-09-27 15:12:29 -04:00
Michael Yang
8608eb4760 prune empty directories 2023-09-27 10:58:09 -07:00
Bruce MacDonald
a2b210130f fedora install fixes (#609) 2023-09-27 11:43:47 -04:00
Bruce MacDonald
ed20837f9a Update modelfile.md 2023-09-27 10:38:10 -04:00
James Braza
1db2a61dd0 Added num_predict to the options table (#614) 2023-09-27 10:26:08 -04:00
Jeffrey Morgan
2ded8ab206 use 11.8.0 nvidia dockerfile base image for now 2023-09-26 21:48:41 -07:00
Michael Yang
e6b3648bbf Merge pull request #616 from jmorganca/mxyng/fix-model-name 2023-09-26 20:54:18 -07:00
Michael Yang
0625e805f0 fix model name not matching 2023-09-26 19:50:04 -07:00
Michael Yang
c38ec5befb Merge pull request #598 from jmorganca/mxyng/help-exit
add painter message for exit
2023-09-26 15:17:40 -07:00
Michael Yang
c577721a43 Merge pull request #605 from jmorganca/mxyng/install.sh
do not unload nouveau driver
2023-09-26 09:53:05 -07:00
Michael Yang
29c056ea39 ordered list of install locations 2023-09-26 09:38:11 -07:00
Michael Yang
9fc3bba9cf do no unload nouveau driver 2023-09-26 09:36:54 -07:00
Michael Chiang
7774ed4ae6 Update README.md for linux + cleanup (#601)
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-09-25 23:44:53 -07:00
Michael Yang
11f920f209 Merge pull request #599 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 18:24:13 -07:00
Michael Yang
6e6b655956 update install.sh 2023-09-25 18:09:44 -07:00
Michael Yang
110ae89a6c Merge pull request #596 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 17:59:13 -07:00
Michael Yang
5e388f931e check cuda installed before installing 2023-09-25 17:56:43 -07:00
Michael Yang
d5ad41dd7b fix path for wsl user 2023-09-25 17:56:25 -07:00
Michael Yang
d294a11bc9 start service on exit instead of immediately 2023-09-25 17:54:02 -07:00
Michael Yang
93d887e4bc add painter message for exit 2023-09-25 16:30:22 -07:00
Jeffrey Morgan
5306b0269d Update linux.md 2023-09-25 16:10:32 -07:00
Michael Yang
7de0c8345d Merge pull request #595 from jmorganca/mxyng/install.sh
ignore systemctl is-system-running exit code
2023-09-25 15:49:47 -07:00
Michael Yang
1b9dcab3ab ignore systemctl is-system-running exit code 2023-09-25 15:47:45 -07:00
Bruce MacDonald
86279f4ae3 unbound max num gpu layers (#591)
---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-25 18:36:46 -04:00
Michael Yang
b934bf23e6 exit on unknown distro (#594) 2023-09-25 15:30:58 -07:00
Michael Yang
2b8ef455ad Merge pull request #593 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 14:09:40 -07:00
Michael Yang
0c5f47177c update install.sh 2023-09-25 14:01:44 -07:00
Michael Yang
1210db2924 Merge pull request #592 from jmorganca/mxyng/install.sh
fix dkms on debian
2023-09-25 12:59:01 -07:00
Michael Yang
d0854bf1e6 fix dkms on debian 2023-09-25 12:57:25 -07:00
Michael Yang
8396463255 Merge pull request #590 from jmorganca/mxyng/install.sh
fix dkms install
2023-09-25 12:17:31 -07:00
Michael Yang
a027bbf4d7 fix dkms install 2023-09-25 12:16:41 -07:00
Michael Yang
ed94a3dd02 Merge pull request #589 from jmorganca/mxyng/install.sh
update install.sh
2023-09-25 11:08:25 -07:00
Michael Yang
f14f62ab3b update install.sh 2023-09-25 11:05:38 -07:00
Jeffrey Morgan
0fb5268496 Update linux.md 2023-09-25 10:06:23 -07:00
Bruce MacDonald
c65edb1506 fix linux installer warning logs (#588) 2023-09-25 11:22:56 -04:00
Twan L
1605af32ec Added a new community project (#574) 2023-09-25 10:40:59 -04:00
Jeffrey Morgan
ee3032ad89 improvements to docs/linux.md 2023-09-24 21:50:07 -07:00
Jeffrey Morgan
5b7a27281d improvements to docs/linux.md 2023-09-24 21:38:23 -07:00
Jeffrey Morgan
d2a784e33e add docs/linux.md 2023-09-24 21:34:44 -07:00
Jeffrey Morgan
413a2e4f91 set DEBIAN_FRONTEND=noninteractive correctly 2023-09-24 20:35:42 -07:00
Matt Williams
a92fdff620 add the example for ask the mentors
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-09-24 15:58:32 -07:00
Patrick Devine
b5614f3ebc fix end-of-line issue with the new prompt (#582) 2023-09-23 17:20:30 -07:00
Jeffrey Morgan
8b2ba9cab8 minor improvements to install.sh 2023-09-23 11:20:39 -04:00
Jeffrey Morgan
e29662ab5c fix minor install script issues on debian 2023-09-23 10:25:47 -04:00
Bruce MacDonald
cbc40aa996 debian installer support (#579)
* debian installer support

- normalize os name to lowercase
- check needed commands are available
- dont check sudo when root user
- share common install commands
- support debian cuda install
- skip aarm cuda install
- system user shared home dir

* refactor and add other platforms (#580)

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-23 09:46:47 -04:00
Jeffrey Morgan
5cb82540c9 install.sh: update install url 2023-09-23 09:35:14 -04:00
Jeffrey Morgan
d7849a1dc9 add .env to .dockerignore 2023-09-23 00:53:48 -04:00
Jeffrey Morgan
01c44d687e add multi line strings to final prompt 2023-09-23 00:27:24 -04:00
Jeffrey Morgan
9b12a511ca check other request fields before load short circuit in /api/generate 2023-09-22 23:50:55 -04:00
Jeffrey Morgan
e20362e0d5 fix multi line input in ollama run 2023-09-22 23:49:35 -04:00
Patrick Devine
c928ceb927 add word wrapping for lines which are longer than the terminal width (#553) 2023-09-22 13:36:08 -07:00
Michael Yang
e1a0846483 Merge pull request #571 from jmorganca/mxyng/update-dockerfile
update dockerfile.cuda
2023-09-22 12:34:41 -07:00
Jeffrey Morgan
f997e29e45 Add Dockerfile.build for building linux binaries (#558)
Add `Dockerfile.build` for building linux binaries

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-22 15:20:12 -04:00
Patrick Devine
87d9efb364 switch to forked readline lib which doesn't wreck the repl prompt (#578) 2023-09-22 12:17:45 -07:00
Michael Yang
93d3a2568d replace dockerfile 2023-09-22 11:57:38 -07:00
Michael Yang
5a81390b24 update dockerfile.cuda 2023-09-22 11:57:38 -07:00
Michael Yang
a89ef99aed Merge pull request #575 from jmorganca/mxyng/fix-ipv6-only
fix ipv6 parse ip
2023-09-22 11:47:11 -07:00
Bruce MacDonald
dc0c725ceb ubuntu cuda drivers (#576) 2023-09-22 19:43:14 +01:00
Bruce MacDonald
5d71bda478 close llm on interrupt (#577) 2023-09-22 19:41:52 +01:00
Michael Yang
88897a90e4 fix ipv6 parse ip 2023-09-22 10:41:32 -07:00
Bruce MacDonald
9df31c3518 linux installer script (#534)
Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-22 17:01:03 +01:00
Michael Yang
2044f9d4da Merge pull request #570 from jmorganca/mxyng/head-request
fix HEAD request
2023-09-21 16:56:17 -07:00
Michael Yang
0d186f3b33 Merge pull request #569 from jmorganca/mxyng/update-submodules
silence warm up log
2023-09-21 16:52:42 -07:00
Michael Yang
82f5b66c01 register HEAD /api/tags 2023-09-21 16:38:03 -07:00
Michael Yang
c986694367 fix HEAD / request
HEAD request should respond like their GET counterparts except without a
response body.
2023-09-21 16:35:58 -07:00
Michael Yang
058d0cd04b silence warm up log 2023-09-21 14:53:33 -07:00
Michael Yang
ee1c994d15 update submodule (#567) 2023-09-21 16:22:23 -04:00
Bruce MacDonald
4cba75efc5 remove tmp directories created by previous servers (#559)
* remove tmp directories created by previous servers

* clean up on server stop

* Update routes.go

* Update server/routes.go

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>

* create top-level temp ollama dir

* check file exists before creating

---------

Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-21 20:38:49 +01:00
Michael Yang
8c83701e9f Merge pull request #566 from jmorganca/mxyng/api-check-model-exists
Use API to check if model exists and pull if necessary
2023-09-21 10:35:14 -07:00
Michael Yang
6137b12799 validate existence and pull model using api 2023-09-21 09:55:34 -07:00
Michael Yang
1fabba474b refactor default allow origins
this should be less error prone
2023-09-21 09:42:25 -07:00
Michael Yang
765770efdb Merge pull request #562 from jmorganca/mxyng/fix-ollama-host
fix OLLAMA_HOST parsing for ip6
2023-09-20 19:54:47 -07:00
Michael Yang
9297ff8330 fix OLLAMA_HOST parsing for ip6 2023-09-20 18:52:57 -07:00
Michael Yang
ee4fd16f2c Merge pull request #556 from jmorganca/pack-cuda
pack in cuda libs
2023-09-20 15:02:36 -07:00
Michael Yang
a9ed7cc6aa rename generate.go 2023-09-20 14:42:17 -07:00
Michael Yang
6c6a31a1e8 embed libraries using cmake 2023-09-20 14:41:57 -07:00
Bruce MacDonald
fc6ec356fc remove libcuda.so 2023-09-20 20:36:14 +01:00
Bruce MacDonald
1255bc9b45 only package 11.8 runner 2023-09-20 20:00:41 +01:00
Michael Yang
084e4c782a Merge pull request #557 from jmorganca/mxyng/cleanup
fix impossible condition
2023-09-20 11:51:01 -07:00
Michael Yang
58ffa03d8b fix impossible condition 2023-09-20 11:27:44 -07:00
Michael Yang
637f8bc6a5 Merge pull request #536 from jmorganca/mxyng/redirect-uploads
explicitly follow upload redirects
2023-09-20 11:27:03 -07:00
Michael Yang
499e9007a5 pick chunksize based on location 2023-09-20 11:10:24 -07:00
Bruce MacDonald
b9bb5ca288 use cuda_version 2023-09-20 17:58:16 +01:00
Bruce MacDonald
4e8be787c7 pack in cuda libs 2023-09-20 17:40:42 +01:00
Michael Yang
aa45d7c1df draft: explicitly follow upload redirects 2023-09-19 13:36:58 -07:00
Michael Yang
e35565c567 Merge pull request #555 from jmorganca/mxyng/fix-windows-startup
fix build
2023-09-19 10:51:58 -07:00
Michael Yang
a5520bfb42 fix build 2023-09-19 10:42:24 -07:00
Michael Yang
2627c464ba Merge pull request #554 from jmorganca/mxyng/fix-windows-startup
fix mkdir on windows
2023-09-19 09:42:12 -07:00
Michael Yang
b58d5d16b0 fix mkdir on windows 2023-09-19 09:41:13 -07:00
Patrick Devine
24580df958 only add a layer if there is actual data (#535) 2023-09-18 13:47:45 -07:00
Patrick Devine
80dd44e80a Cmd changes (#541) 2023-09-18 12:26:56 -07:00
James Braza
94e1d96b29 Updated README section on community projects for table (#550) 2023-09-18 15:22:50 -04:00
Bruce MacDonald
66003e1d05 subprocess improvements (#524)
* subprocess improvements

- increase start-up timeout
- when runner fails to start fail rather than timing out
- try runners in order rather than choosing 1 runner
- embed metal runner in metal dir rather than gpu
- refactor logging and error messages

* Update llama.go

* Update llama.go

* simplify by using glob
2023-09-18 15:16:32 -04:00
Michael Yang
c345053a8b Merge pull request #537 from jmorganca/mxyng/upload
fix error on upload chunk
2023-09-15 17:48:39 -07:00
Michael Yang
08d7c2a944 fix error on upload chunk 2023-09-15 15:59:30 -07:00
Michael Yang
bc9573dcb1 Merge pull request #530 from jmorganca/mxyng/progresswriter
implement ProgressWriter
2023-09-15 12:43:46 -07:00
Michael Yang
e53bc57d4d split uploadBlobChunked 2023-09-14 17:22:05 -07:00
Michael Yang
f0b398d17f implement ProgressWriter 2023-09-14 17:22:04 -07:00
Patrick Devine
8efbc5df55 DRAFT: add a simple python client to access ollama (#522) 2023-09-14 16:37:38 -07:00
Michael Yang
ccc3e9ac6d Merge pull request #531 from jmorganca/mxyng/content-length
set request.ContentLength
2023-09-14 13:33:11 -07:00
Michael Yang
daa4f096f9 set request.ContentLength
This informs the HTTP client the content length is known and disables
chunked Transfer-Encoding
2023-09-14 13:32:44 -07:00
Michael Yang
3ee85f1c6c Merge pull request #526 from jmorganca/mxyng/cleanup
remove unused
2023-09-14 13:10:59 -07:00
Bruce MacDonald
2540c9181c support for packaging in multiple cuda runners (#509)
* enable packaging multiple cuda versions
* use nvcc cuda version if available

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-14 15:08:13 -04:00
Michael Yang
83ffb154bc Merge pull request #507 from jmorganca/mxyng/build
update docker image
2023-09-14 11:25:59 -07:00
Michael Yang
9aa192c812 update cuda docker image 2023-09-14 11:25:20 -07:00
Matt Williams
fc8707686f Update API docs (#527)
* Update API docs

Signed-off-by: Matt Williams <m@technovangelist.com>

* strange TOC was getting auto generated

Signed-off-by: Matt Williams <m@technovangelist.com>

* Update docs/api.md

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update docs/api.md

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update docs/api.md

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update api.md

---------

Signed-off-by: Matt Williams <m@technovangelist.com>
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
Co-authored-by: Michael Chiang <mchiang0610@users.noreply.github.com>
2023-09-14 08:51:26 -07:00
Michael Yang
f89c23764b Merge pull request #525 from jmorganca/mxyng/falcon-decode
fix: add falcon.go
2023-09-13 15:08:47 -07:00
Michael Yang
e6881cabd0 remove unused 2023-09-13 14:48:33 -07:00
Michael Yang
d028853879 fix: add falcon.go 2023-09-13 14:47:37 -07:00
Michael Yang
949553db23 Merge pull request #519 from jmorganca/mxyng/decode
Mxyng/decode
2023-09-13 12:43:57 -07:00
Michael Yang
0c5a454361 fix model type for 70b 2023-09-12 15:12:59 -07:00
Bruce MacDonald
f59c4d03f7 fix ggml arm64 cuda build (#520) 2023-09-12 17:06:48 -04:00
Michael Yang
7dee25a07f fix falcon decode
get model and file type from bin file
2023-09-12 12:34:53 -07:00
Bruce MacDonald
f221637053 first pass at linux gpu support (#454)
* linux gpu support
* handle multiple gpus
* add cuda docker image (#488)
---------

Co-authored-by: Michael Yang <mxyng@pm.me>
2023-09-12 11:04:35 -04:00
Patrick Devine
45ac07cd02 create the blobs directory correctly (#508) 2023-09-11 14:54:52 -07:00
Jeffrey Morgan
7d749cc787 fix darwin build script 2023-09-11 16:31:46 -04:00
Patrick Devine
e7e91cd71c add autoprune to remove unused layers (#491) 2023-09-11 11:46:35 -07:00
Jeffrey Morgan
3920e15386 add model format to config layer (#497) 2023-09-09 17:53:44 -04:00
Michael Yang
41e976edde Merge pull request #492 from jmorganca/mxyng/nil-pointer
fix nil pointer dereference
2023-09-07 17:25:23 -07:00
Michael Yang
de227b620f fix nil pointer dereference 2023-09-07 17:24:31 -07:00
Michael Yang
63def6ca49 Merge pull request #487 from jmorganca/mxyng/dockerignore
update dockerignore
2023-09-07 14:16:17 -07:00
Michael Yang
738fe9c4aa Merge pull request #486 from jmorganca/mxyng/fix-push
fix: retry push on expired token
2023-09-07 13:58:34 -07:00
Michael Yang
a8da0bacbe update dockerignore 2023-09-07 13:36:25 -07:00
Michael Yang
bf146fb072 fix retry on unauthorized chunk 2023-09-07 12:02:04 -07:00
Michael Yang
f0f4943577 fix get auth token 2023-09-07 12:01:56 -07:00
Bruce MacDonald
09dd2aeff9 GGUF support (#441) 2023-09-07 13:55:37 -04:00
142 changed files with 60836 additions and 2701 deletions

View File

@@ -1,4 +1,8 @@
.vscode
ollama
app
dist
scripts
llm/llama.cpp/ggml
llm/llama.cpp/gguf
.env

12
.gitmodules vendored
View File

@@ -1,4 +1,10 @@
[submodule "llm/llama.cpp/ggml"]
path = llm/llama.cpp/ggml
url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty
path = llm/llama.cpp/ggml
url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty
shallow = true
[submodule "llm/llama.cpp/gguf"]
path = llm/llama.cpp/gguf
url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty
shallow = true

View File

@@ -1,21 +1,23 @@
FROM golang:alpine
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04
ARG TARGETARCH
ARG GOFLAGS="'-ldflags=-w -s'"
WORKDIR /go/src/github.com/jmorganca/ollama
RUN apk add --no-cache git build-base cmake
RUN apt-get update && apt-get install -y git build-essential cmake
ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.3.tar.gz
COPY . .
RUN go generate ./... && go build -ldflags '-linkmode external -extldflags "-static"' .
FROM alpine
ENV OLLAMA_HOST 0.0.0.0
RUN apk add --no-cache libstdc++
ARG USER=ollama
ARG GROUP=ollama
RUN addgroup $GROUP && adduser -D -G $GROUP $USER
ENV GOARCH=$TARGETARCH
ENV GOFLAGS=$GOFLAGS
RUN /usr/local/go/bin/go generate ./... \
&& /usr/local/go/bin/go build .
FROM ubuntu:22.04
RUN apt-get update && apt-get install -y ca-certificates
COPY --from=0 /go/src/github.com/jmorganca/ollama/ollama /bin/ollama
USER $USER:$GROUP
EXPOSE 11434
ENV OLLAMA_HOST 0.0.0.0
ENTRYPOINT ["/bin/ollama"]
CMD ["serve"]

31
Dockerfile.build Normal file
View File

@@ -0,0 +1,31 @@
# centos7 amd64 dependencies
FROM --platform=linux/amd64 nvidia/cuda:11.3.1-devel-centos7 AS base-amd64
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl && \
yum update -y && \
yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236 wget
RUN wget "https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.6-linux-x86_64.sh" -O cmake-installer.sh && chmod +x cmake-installer.sh && ./cmake-installer.sh --skip-license --prefix=/usr/local
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH
# centos8 arm64 dependencies
FROM --platform=linux/arm64 nvidia/cuda-arm64:11.3.1-devel-centos8 AS base-arm64
RUN sed -i -e 's/mirrorlist/#mirrorlist/g' -e 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-*
RUN yum install -y git cmake
FROM base-${TARGETARCH}
ARG TARGETARCH
ARG GOFLAGS="'-ldflags -w -s'"
# install go
ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.3.tar.gz
# build the final binary
WORKDIR /go/src/github.com/jmorganca/ollama
COPY . .
ENV GOOS=linux
ENV GOARCH=$TARGETARCH
ENV GOFLAGS=$GOFLAGS
RUN /usr/local/go/bin/go generate ./... && \
/usr/local/go/bin/go build .

198
README.md
View File

@@ -9,19 +9,31 @@
[![Discord](https://dcbadge.vercel.app/api/server/ollama?style=flat&compact=true)](https://discord.gg/ollama)
Run, create, and share large language models (LLMs).
Get up and running with large language models locally.
> Note: Ollama is in early preview. Please report any issues you find.
### macOS
## Download
[Download](https://ollama.ai/download/Ollama-darwin.zip)
- [Download](https://ollama.ai/download) for macOS
- Download for Windows and Linux (coming soon)
- Build [from source](#building)
### Windows
Coming soon!
### Linux & WSL2
```
curl https://ollama.ai/install.sh | sh
```
[Manual install instructions](https://github.com/jmorganca/ollama/blob/main/docs/linux.md)
### Docker
See the official [Docker image](https://hub.docker.com/r/ollama/ollama).
## Quickstart
To run and chat with [Llama 2](https://ai.meta.com/llama), the new model by Meta:
To run and chat with [Llama 2](https://ollama.ai/library/llama2):
```
ollama run llama2
@@ -33,83 +45,50 @@ Ollama supports a list of open-source models available on [ollama.ai/library](ht
Here are some example open-source models that can be downloaded:
| Model | Parameters | Size | Download |
| ------------------------ | ---------- | ----- | ------------------------------- |
| Llama2 | 7B | 3.8GB | `ollama pull llama2` |
| Llama2 13B | 13B | 7.3GB | `ollama pull llama2:13b` |
| Llama2 70B | 70B | 39GB | `ollama pull llama2:70b` |
| Llama2 Uncensored | 7B | 3.8GB | `ollama pull llama2-uncensored` |
| Code Llama | 7B | 3.8GB | `ollama pull codellama` |
| Orca Mini | 3B | 1.9GB | `ollama pull orca-mini` |
| Vicuna | 7B | 3.8GB | `ollama pull vicuna` |
| Nous-Hermes | 7B | 3.8GB | `ollama pull nous-hermes` |
| Nous-Hermes 13B | 13B | 7.3GB | `ollama pull nous-hermes:13b` |
| Wizard Vicuna Uncensored | 13B | 7.3GB | `ollama pull wizard-vicuna` |
| Model | Parameters | Size | Download |
| ------------------ | ---------- | ----- | ------------------------------ |
| Mistral | 7B | 4.1GB | `ollama run mistral` |
| Llama 2 | 7B | 3.8GB | `ollama run llama2` |
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
| Llama 2 Uncensored | 7B | 3.8GB | `ollama run llama2-uncensored` |
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
> Note: You should have at least 8 GB of RAM to run the 3B models, 16 GB to run the 7B models, and 32 GB to run the 13B models.
## Examples
## Customize your own model
### Pull a public model
### Import from GGUF
```
ollama pull llama2
```
Ollama supports importing GGUF models in the Modelfile:
> This command can also be used to update a local model. Only updated changes will be pulled.
1. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import.
### Run a model interactively
```
FROM ./vicuna-33b.Q4_0.gguf
```
```
ollama run llama2
>>> hi
Hello! How can I help you today?
```
2. Create the model in Ollama
For multiline input, you can wrap text with `"""`:
```
ollama create example -f Modelfile
```
```
>>> """Hello,
... world!
... """
I'm a basic program that prints the famous "Hello, world!" message to the console.
```
3. Run the model
### Run a model non-interactively
```
ollama run example
```
```
$ ollama run llama2 'tell me a joke'
Sure! Here's a quick one:
Why did the scarecrow win an award? Because he was outstanding in his field!
```
### Import from PyTorch or Safetensors
```
$ cat <<EOF >prompts.txt
tell me a joke about llamas
tell me another one
EOF
$ ollama run llama2 <prompts.txt
>>> tell me a joke about llamas
Why did the llama refuse to play hide-and-seek?
nobody likes to be hided!
See the [guide](docs/import.md) on importing models for more information.
>>> tell me another one
Sure, here's another one:
### Customize a prompt
Why did the llama go to the bar?
To have a hay-often good time!
```
### Run a model on contents of a text file
```
$ ollama run llama2 "summarize this file:" "$(cat README.md)"
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
```
### Customize a model
Pull a base model:
Models from the Ollama library can be customized with a prompt. The example
```
ollama pull llama2
@@ -138,30 +117,61 @@ ollama run mario
Hello! It's your friend Mario.
```
For more examples, see the [examples](./examples) directory. For more information on creating a Modelfile, see the [Modelfile](./docs/modelfile.md) documentation.
For more examples, see the [examples](examples) directory. For more information on working with a Modelfile, see the [Modelfile](docs/modelfile.md) documentation.
### Listing local models
## CLI Reference
### Create a model
`ollama create` is used to create a model from a Modelfile.
### Pull a model
```
ollama list
ollama pull llama2
```
### Removing local models
> This command can also be used to update a local model. Only the diff will be pulled.
### Remove a model
```
ollama rm llama2
```
## Model packages
### Copy a model
### Overview
```
ollama cp llama2 my-llama2
```
Ollama bundles model weights, configurations, and data into a single package, defined by a [Modelfile](./docs/modelfile.md).
### Multiline input
<picture>
<source media="(prefers-color-scheme: dark)" height="480" srcset="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
<img alt="logo" height="480" src="https://github.com/jmorganca/ollama/assets/251292/2fd96b5f-191b-45c1-9668-941cfad4eb70">
</picture>
For multiline input, you can wrap text with `"""`:
```
>>> """Hello,
... world!
... """
I'm a basic program that prints the famous "Hello, world!" message to the console.
```
### Pass in prompt as arguments
```
$ ollama run llama2 "summarize this file:" "$(cat README.md)"
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
```
### List models on your computer
```
ollama list
```
### Start Ollama
`ollama serve` is used when you want to start ollama without running the desktop application.
## Building
@@ -193,7 +203,7 @@ Finally, in a separate shell, run a model:
## REST API
> See the [API documentation](./docs/api.md) for all endpoints.
See the [API documentation](docs/api.md) for all endpoints.
Ollama has an API for running and managing models. For example to generate text from a model:
@@ -204,12 +214,22 @@ curl -X POST http://localhost:11434/api/generate -d '{
}'
```
## Community Projects using Ollama
## Community Integrations
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with a question-answering [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa).
- [Continue](https://github.com/continuedev/continue) - embeds Ollama inside Visual Studio Code. The extension lets you highlight code to add to the prompt, ask questions in the sidebar, and generate code inline.
- [LiteLLM](https://github.com/BerriAI/litellm) a lightweight python package to simplify LLM API calls
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot) - interact with Ollama as a chatbot on Discord.
- [Raycast Ollama](https://github.com/MassimilianoPasquini97/raycast_ollama) - Raycast extension to use Ollama for local llama inference on Raycast.
- [Simple HTML UI for Ollama](https://github.com/rtcfirefly/ollama-ui)
- [Emacs client](https://github.com/zweifisch/ollama) for Ollama
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
- [Continue](https://github.com/continuedev/continue)
- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
- [Dumbar](https://github.com/JerrySievert/Dumbar)
- [Emacs client](https://github.com/zweifisch/ollama)
- [oterm](https://github.com/ggozad/oterm)
- [Ellama Emacs client](https://github.com/s-kostyaev/ellama)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)

View File

@@ -7,25 +7,24 @@ import (
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
"runtime"
"strings"
"github.com/jmorganca/ollama/format"
"github.com/jmorganca/ollama/version"
)
const DefaultHost = "127.0.0.1:11434"
var (
envHost = os.Getenv("OLLAMA_HOST")
)
var envHost = os.Getenv("OLLAMA_HOST")
type Client struct {
Base url.URL
HTTP http.Client
Headers http.Header
base *url.URL
http http.Client
}
func checkError(resp *http.Response, body []byte) error {
@@ -44,34 +43,46 @@ func checkError(resp *http.Response, body []byte) error {
return apiError
}
// Host returns the default host to use for the client. It is determined in the following order:
// 1. The OLLAMA_HOST environment variable
// 2. The default host (localhost:11434)
func Host() string {
if envHost != "" {
return envHost
}
return DefaultHost
}
// FromEnv creates a new client using Host() as the host. An error is returns
// if the host is invalid.
func FromEnv() (*Client, error) {
h := Host()
if !strings.HasPrefix(h, "http://") && !strings.HasPrefix(h, "https://") {
h = "http://" + h
func ClientFromEnvironment() (*Client, error) {
scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://")
if !ok {
scheme, hostport = "http", os.Getenv("OLLAMA_HOST")
}
u, err := url.Parse(h)
host, port, err := net.SplitHostPort(hostport)
if err != nil {
return nil, fmt.Errorf("could not parse host: %w", err)
host, port = "127.0.0.1", "11434"
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
host = ip.String()
} else if hostport != "" {
host = hostport
}
}
if u.Port() == "" {
u.Host += ":11434"
client := Client{
base: &url.URL{
Scheme: scheme,
Host: net.JoinHostPort(host, port),
},
}
return &Client{Base: *u, HTTP: http.Client{}}, nil
mockRequest, err := http.NewRequest("HEAD", client.base.String(), nil)
if err != nil {
return nil, err
}
proxyURL, err := http.ProxyFromEnvironment(mockRequest)
if err != nil {
return nil, err
}
client.http = http.Client{
Transport: &http.Transport{
Proxy: http.ProxyURL(proxyURL),
},
}
return &client, nil
}
func (c *Client) do(ctx context.Context, method, path string, reqData, respData any) error {
@@ -86,7 +97,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
reqBody = bytes.NewReader(data)
}
requestURL := c.Base.JoinPath(path)
requestURL := c.base.JoinPath(path)
request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), reqBody)
if err != nil {
return err
@@ -96,11 +107,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
request.Header.Set("Accept", "application/json")
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
for k, v := range c.Headers {
request.Header[k] = v
}
respObj, err := c.HTTP.Do(request)
respObj, err := c.http.Do(request)
if err != nil {
return err
}
@@ -123,6 +130,8 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
return nil
}
const maxBufferSize = 512 * format.KiloByte
func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
var buf *bytes.Buffer
if data != nil {
@@ -134,23 +143,26 @@ func (c *Client) stream(ctx context.Context, method, path string, data any, fn f
buf = bytes.NewBuffer(bts)
}
requestURL := c.Base.JoinPath(path)
requestURL := c.base.JoinPath(path)
request, err := http.NewRequestWithContext(ctx, method, requestURL.String(), buf)
if err != nil {
return err
}
request.Header.Set("Content-Type", "application/json")
request.Header.Set("Accept", "application/json")
request.Header.Set("Accept", "application/x-ndjson")
request.Header.Set("User-Agent", fmt.Sprintf("ollama/%s (%s %s) Go/%s", version.Version, runtime.GOARCH, runtime.GOOS, runtime.Version()))
response, err := http.DefaultClient.Do(request)
response, err := c.http.Do(request)
if err != nil {
return err
}
defer response.Body.Close()
scanner := bufio.NewScanner(response.Body)
// increase the buffer size to avoid running out of space
scanBuf := make([]byte, 0, maxBufferSize)
scanner.Buffer(scanBuf, maxBufferSize)
for scanner.Scan() {
var errorResponse struct {
Error string `json:"error,omitempty"`

225
api/client.py Normal file
View File

@@ -0,0 +1,225 @@
import os
import json
import requests
BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
# The final response object will include statistics and additional data from the request. Use the callback function to override
# the default handler.
def generate(model_name, prompt, system=None, template=None, context=None, options=None, callback=None):
try:
url = f"{BASE_URL}/api/generate"
payload = {
"model": model_name,
"prompt": prompt,
"system": system,
"template": template,
"context": context,
"options": options
}
# Remove keys with None values
payload = {k: v for k, v in payload.items() if v is not None}
with requests.post(url, json=payload, stream=True) as response:
response.raise_for_status()
# Creating a variable to hold the context history of the final chunk
final_context = None
# Variable to hold concatenated response strings if no callback is provided
full_response = ""
# Iterating over the response line by line and displaying the details
for line in response.iter_lines():
if line:
# Parsing each line (JSON chunk) and extracting the details
chunk = json.loads(line)
# If a callback function is provided, call it with the chunk
if callback:
callback(chunk)
else:
# If this is not the last chunk, add the "response" field value to full_response and print it
if not chunk.get("done"):
response_piece = chunk.get("response", "")
full_response += response_piece
print(response_piece, end="", flush=True)
# Check if it's the last chunk (done is true)
if chunk.get("done"):
final_context = chunk.get("context")
# Return the full response and the final context
return full_response, final_context
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return None, None
# Create a model from a Modelfile. Use the callback function to override the default handler.
def create(model_name, model_path, callback=None):
try:
url = f"{BASE_URL}/api/create"
payload = {"name": model_name, "path": model_path}
# Making a POST request with the stream parameter set to True to handle streaming responses
with requests.post(url, json=payload, stream=True) as response:
response.raise_for_status()
# Iterating over the response line by line and displaying the status
for line in response.iter_lines():
if line:
# Parsing each line (JSON chunk) and extracting the status
chunk = json.loads(line)
if callback:
callback(chunk)
else:
print(f"Status: {chunk.get('status')}")
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
# Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple
# calls to will share the same download progress. Use the callback function to override the default handler.
def pull(model_name, insecure=False, callback=None):
try:
url = f"{BASE_URL}/api/pull"
payload = {
"name": model_name,
"insecure": insecure
}
# Making a POST request with the stream parameter set to True to handle streaming responses
with requests.post(url, json=payload, stream=True) as response:
response.raise_for_status()
# Iterating over the response line by line and displaying the details
for line in response.iter_lines():
if line:
# Parsing each line (JSON chunk) and extracting the details
chunk = json.loads(line)
# If a callback function is provided, call it with the chunk
if callback:
callback(chunk)
else:
# Print the status message directly to the console
print(chunk.get('status', ''), end='', flush=True)
# If there's layer data, you might also want to print that (adjust as necessary)
if 'digest' in chunk:
print(f" - Digest: {chunk['digest']}", end='', flush=True)
print(f" - Total: {chunk['total']}", end='', flush=True)
print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
else:
print()
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
# Push a model to the model registry. Use the callback function to override the default handler.
def push(model_name, insecure=False, callback=None):
try:
url = f"{BASE_URL}/api/push"
payload = {
"name": model_name,
"insecure": insecure
}
# Making a POST request with the stream parameter set to True to handle streaming responses
with requests.post(url, json=payload, stream=True) as response:
response.raise_for_status()
# Iterating over the response line by line and displaying the details
for line in response.iter_lines():
if line:
# Parsing each line (JSON chunk) and extracting the details
chunk = json.loads(line)
# If a callback function is provided, call it with the chunk
if callback:
callback(chunk)
else:
# Print the status message directly to the console
print(chunk.get('status', ''), end='', flush=True)
# If there's layer data, you might also want to print that (adjust as necessary)
if 'digest' in chunk:
print(f" - Digest: {chunk['digest']}", end='', flush=True)
print(f" - Total: {chunk['total']}", end='', flush=True)
print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
else:
print()
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
# List models that are available locally.
def list():
try:
response = requests.get(f"{BASE_URL}/api/tags")
response.raise_for_status()
data = response.json()
models = data.get('models', [])
return models
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return None
# Copy a model. Creates a model with another name from an existing model.
def copy(source, destination):
try:
# Create the JSON payload
payload = {
"source": source,
"destination": destination
}
response = requests.post(f"{BASE_URL}/api/copy", json=payload)
response.raise_for_status()
# If the request was successful, return a message indicating that the copy was successful
return "Copy successful"
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return None
# Delete a model and its data.
def delete(model_name):
try:
url = f"{BASE_URL}/api/delete"
payload = {"name": model_name}
response = requests.delete(url, json=payload)
response.raise_for_status()
return "Delete successful"
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return None
# Show info about a model.
def show(model_name):
try:
url = f"{BASE_URL}/api/show"
payload = {"name": model_name}
response = requests.post(url, json=payload)
response.raise_for_status()
# Parse the JSON response and return it
data = response.json()
return data
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return None
def heartbeat():
try:
url = f"{BASE_URL}/"
response = requests.head(url)
response.raise_for_status()
return "Ollama is running"
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return "Ollama is not running"

View File

@@ -3,7 +3,6 @@ package api
import (
"encoding/json"
"fmt"
"log"
"math"
"os"
"reflect"
@@ -37,6 +36,7 @@ type GenerateRequest struct {
System string `json:"system"`
Template string `json:"template"`
Context []int `json:"context,omitempty"`
Stream *bool `json:"stream,omitempty"`
Options map[string]interface{} `json:"options"`
}
@@ -53,8 +53,9 @@ type EmbeddingResponse struct {
}
type CreateRequest struct {
Name string `json:"name"`
Path string `json:"path"`
Name string `json:"name"`
Path string `json:"path"`
Stream *bool `json:"stream,omitempty"`
}
type DeleteRequest struct {
@@ -83,13 +84,14 @@ type PullRequest struct {
Insecure bool `json:"insecure,omitempty"`
Username string `json:"username"`
Password string `json:"password"`
Stream *bool `json:"stream,omitempty"`
}
type ProgressResponse struct {
Status string `json:"status"`
Digest string `json:"digest,omitempty"`
Total int `json:"total,omitempty"`
Completed int `json:"completed,omitempty"`
Total int64 `json:"total,omitempty"`
Completed int64 `json:"completed,omitempty"`
}
type PushRequest struct {
@@ -97,6 +99,7 @@ type PushRequest struct {
Insecure bool `json:"insecure,omitempty"`
Username string `json:"username"`
Password string `json:"password"`
Stream *bool `json:"stream,omitempty"`
}
type ListResponse struct {
@@ -106,7 +109,7 @@ type ListResponse struct {
type ModelResponse struct {
Name string `json:"name"`
ModifiedAt time.Time `json:"modified_at"`
Size int `json:"size"`
Size int64 `json:"size"`
Digest string `json:"digest"`
}
@@ -117,7 +120,7 @@ type TokenResponse struct {
type GenerateResponse struct {
Model string `json:"model"`
CreatedAt time.Time `json:"created_at"`
Response string `json:"response,omitempty"`
Response string `json:"response"`
Done bool `json:"done"`
Context []int `json:"context,omitempty"`
@@ -158,15 +161,10 @@ func (r *GenerateResponse) Summary() {
}
}
type Options struct {
Seed int `json:"seed,omitempty"`
// Backend options
UseNUMA bool `json:"numa,omitempty"`
// Model options
// Runner options which must be set when the model is loaded into memory
type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"`
NumKeep int `json:"num_keep,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
@@ -180,8 +178,15 @@ type Options struct {
EmbeddingOnly bool `json:"embedding_only,omitempty"`
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
NumThread int `json:"num_thread,omitempty"`
}
// Predict options
type Options struct {
Runner
// Predict options used at runtime
NumKeep int `json:"num_keep,omitempty"`
Seed int `json:"seed,omitempty"`
NumPredict int `json:"num_predict,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"`
@@ -197,10 +202,10 @@ type Options struct {
MirostatEta float32 `json:"mirostat_eta,omitempty"`
PenalizeNewline bool `json:"penalize_newline,omitempty"`
Stop []string `json:"stop,omitempty"`
NumThread int `json:"num_thread,omitempty"`
}
var ErrInvalidOpts = fmt.Errorf("invalid options")
func (opts *Options) FromMap(m map[string]interface{}) error {
valueOpts := reflect.ValueOf(opts).Elem() // names of the fields in the options struct
typeOpts := reflect.TypeOf(opts).Elem() // types of the fields in the options struct
@@ -214,6 +219,7 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
}
}
invalidOpts := []string{}
for key, val := range m {
if opt, ok := jsonOpts[key]; ok {
field := valueOpts.FieldByName(opt.Name)
@@ -231,44 +237,39 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
// when JSON unmarshals numbers, it uses float64, not int
field.SetInt(int64(t))
default:
log.Printf("could not convert model parameter %v to int, skipped", key)
return fmt.Errorf("option %q must be of type integer", key)
}
case reflect.Bool:
val, ok := val.(bool)
if !ok {
log.Printf("could not convert model parameter %v to bool, skipped", key)
continue
return fmt.Errorf("option %q must be of type boolean", key)
}
field.SetBool(val)
case reflect.Float32:
// JSON unmarshals to float64
val, ok := val.(float64)
if !ok {
log.Printf("could not convert model parameter %v to float32, skipped", key)
continue
return fmt.Errorf("option %q must be of type float32", key)
}
field.SetFloat(val)
case reflect.String:
val, ok := val.(string)
if !ok {
log.Printf("could not convert model parameter %v to string, skipped", key)
continue
return fmt.Errorf("option %q must be of type string", key)
}
field.SetString(val)
case reflect.Slice:
// JSON unmarshals to []interface{}, not []string
val, ok := val.([]interface{})
if !ok {
log.Printf("could not convert model parameter %v to slice, skipped", key)
continue
return fmt.Errorf("option %q must be of type array", key)
}
// convert []interface{} to []string
slice := make([]string, len(val))
for i, item := range val {
str, ok := item.(string)
if !ok {
log.Printf("could not convert model parameter %v to slice of strings, skipped", key)
continue
return fmt.Errorf("option %q must be of an array of strings", key)
}
slice[i] = str
}
@@ -277,45 +278,53 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
return fmt.Errorf("unknown type loading config params: %v", field.Kind())
}
}
} else {
invalidOpts = append(invalidOpts, key)
}
}
if len(invalidOpts) > 0 {
return fmt.Errorf("%w: %v", ErrInvalidOpts, strings.Join(invalidOpts, ", "))
}
return nil
}
func DefaultOptions() Options {
return Options{
Seed: -1,
UseNUMA: false,
NumCtx: 2048,
NumKeep: -1,
NumBatch: 512,
NumGPU: 1,
NumGQA: 1,
LowVRAM: false,
F16KV: true,
UseMMap: true,
UseMLock: false,
RopeFrequencyBase: 10000.0,
RopeFrequencyScale: 1.0,
EmbeddingOnly: true,
RepeatLastN: 64,
RepeatPenalty: 1.1,
FrequencyPenalty: 0.0,
PresencePenalty: 0.0,
// options set on request to runner
NumPredict: -1,
NumKeep: -1,
Temperature: 0.8,
TopK: 40,
TopP: 0.9,
TFSZ: 1.0,
TypicalP: 1.0,
RepeatLastN: 64,
RepeatPenalty: 1.1,
PresencePenalty: 0.0,
FrequencyPenalty: 0.0,
Mirostat: 0,
MirostatTau: 5.0,
MirostatEta: 0.1,
PenalizeNewline: true,
Seed: -1,
NumThread: 0, // let the runtime decide
Runner: Runner{
// options set when the model is loaded
NumCtx: 2048,
RopeFrequencyBase: 10000.0,
RopeFrequencyScale: 1.0,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false,
F16KV: true,
UseMLock: false,
UseMMap: true,
UseNUMA: false,
EmbeddingOnly: true,
},
}
}

View File

@@ -47,16 +47,6 @@ const config: ForgeConfig = {
},
rebuildConfig: {},
makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
publishers: [
new PublisherGithub({
repository: {
name: 'ollama',
owner: 'jmorganca',
},
draft: false,
prerelease: true,
}),
],
hooks: {
readPackageJson: async (_, packageJson) => {
return { ...packageJson, version: process.env.VERSION || packageJson.version }

992
app/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -46,7 +46,7 @@
"chmodr": "^1.2.0",
"copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.8.1",
"electron": "25.2.0",
"electron": "25.9.2",
"eslint": "^8.43.0",
"eslint-plugin-import": "^2.27.5",
"fork-ts-checker-webpack-plugin": "^7.3.0",

View File

@@ -5,7 +5,7 @@ import winston from 'winston'
import 'winston-daily-rotate-file'
import * as path from 'path'
import { analytics, id } from './telemetry'
import { v4 as uuidv4 } from 'uuid'
import { installed } from './install'
require('@electron/remote/main').initialize()
@@ -162,13 +162,56 @@ app.on('before-quit', () => {
}
})
const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
process.arch
}&version=${app.getVersion()}&id=${id()}`
let latest = ''
async function isNewReleaseAvailable() {
try {
const response = await fetch(updateURL)
if (!response.ok) {
return false
}
if (response.status === 204) {
return false
}
const data = await response.json()
const url = data?.url
if (!url) {
return false
}
if (latest === url) {
return false
}
latest = url
return true
} catch (error) {
logger.error(`update check failed - ${error}`)
return false
}
}
async function checkUpdate() {
const available = await isNewReleaseAvailable()
if (available) {
logger.info('checking for update')
autoUpdater.checkForUpdates()
}
}
function init() {
if (app.isPackaged) {
heartbeat()
autoUpdater.checkForUpdates()
checkUpdate()
setInterval(() => {
heartbeat()
autoUpdater.checkForUpdates()
checkUpdate()
}, 60 * 60 * 1000)
}
@@ -234,28 +277,22 @@ app.on('window-all-closed', () => {
}
})
// In this file you can include the rest of your app's specific main process
// code. You can also put them in separate files and import them here.
let aid = ''
try {
aid = id()
} catch (e) {}
function id(): string {
const id = store.get('id') as string
autoUpdater.setFeedURL({
url: `https://ollama.ai/api/update?os=${process.platform}&arch=${process.arch}&version=${app.getVersion()}&id=${aid}`,
})
if (id) {
return id
}
async function heartbeat() {
analytics.track({
anonymousId: aid,
event: 'heartbeat',
properties: {
version: app.getVersion(),
},
})
const uuid = uuidv4()
store.set('id', uuid)
return uuid
}
autoUpdater.setFeedURL({ url: updateURL })
autoUpdater.on('error', e => {
logger.error(`update check failed - ${e.message}`)
console.error(`update check failed - ${e.message}`)
})

View File

@@ -1,19 +0,0 @@
import { Analytics } from '@segment/analytics-node'
import { v4 as uuidv4 } from 'uuid'
import Store from 'electron-store'
const store = new Store()
export const analytics = new Analytics({ writeKey: process.env.TELEMETRY_WRITE_KEY || '<empty>' })
export function id(): string {
const id = store.get('id') as string
if (id) {
return id
}
const uuid = uuidv4()
store.set('id', uuid)
return uuid
}

View File

@@ -11,20 +11,21 @@ import (
"io"
"log"
"net"
"net/http"
"os"
"os/exec"
"path"
"os/signal"
"path/filepath"
"runtime"
"strings"
"syscall"
"time"
"github.com/chzyer/readline"
"github.com/dustin/go-humanize"
"github.com/olekukonko/tablewriter"
"github.com/pdevine/readline"
"github.com/spf13/cobra"
"golang.org/x/crypto/ssh"
"golang.org/x/term"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
@@ -33,6 +34,26 @@ import (
"github.com/jmorganca/ollama/version"
)
type Painter struct {
IsMultiLine bool
}
func (p Painter) Paint(line []rune, _ int) []rune {
termType := os.Getenv("TERM")
if termType == "xterm-256color" && len(line) == 0 {
var prompt string
if p.IsMultiLine {
prompt = "Use \"\"\" to end multi-line input"
} else {
prompt = "Send a message (/? for help)"
}
return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt)))
}
// add a space and a backspace to prevent the cursor from walking up the screen
line = append(line, []rune(" \b")...)
return line
}
func CreateHandler(cmd *cobra.Command, args []string) error {
filename, _ := cmd.Flags().GetString("file")
filename, err := filepath.Abs(filename)
@@ -40,7 +61,7 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -57,20 +78,14 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
spinner.Stop()
}
currentDigest = resp.Digest
switch {
case strings.Contains(resp.Status, "embeddings"):
bar = progressbar.Default(int64(resp.Total), resp.Status)
bar.Set(resp.Completed)
default:
// pulling
bar = progressbar.DefaultBytes(
int64(resp.Total),
resp.Status,
)
bar.Set(resp.Completed)
}
// pulling
bar = progressbar.DefaultBytes(
resp.Total,
resp.Status,
)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else {
currentDigest = ""
if spinner != nil {
@@ -98,39 +113,24 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
}
func RunHandler(cmd *cobra.Command, args []string) error {
insecure, err := cmd.Flags().GetBool("insecure")
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
mp := server.ParseModelPath(args[0])
models, err := client.List(context.Background())
if err != nil {
return err
}
if mp.ProtocolScheme == "http" && !insecure {
return fmt.Errorf("insecure protocol http")
}
fp, err := mp.GetManifestPath(false)
if err != nil {
return err
}
_, err = os.Stat(fp)
switch {
case errors.Is(err, os.ErrNotExist):
if err := pull(args[0], insecure); err != nil {
var apiStatusError api.StatusError
if !errors.As(err, &apiStatusError) {
return err
}
if apiStatusError.StatusCode != http.StatusBadGateway {
return err
}
canonicalModelPath := server.ParseModelPath(args[0])
for _, model := range models.Models {
if model.Name == canonicalModelPath.GetShortTagname() {
return RunGenerate(cmd, args)
}
case err != nil:
}
if err := PullHandler(cmd, args); err != nil {
return err
}
@@ -138,7 +138,7 @@ func RunHandler(cmd *cobra.Command, args []string) error {
}
func PushHandler(cmd *cobra.Command, args []string) error {
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -156,13 +156,13 @@ func PushHandler(cmd *cobra.Command, args []string) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
int64(resp.Total),
resp.Total,
fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
)
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
@@ -182,7 +182,7 @@ func PushHandler(cmd *cobra.Command, args []string) error {
}
func ListHandler(cmd *cobra.Command, args []string) error {
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -215,7 +215,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
}
func DeleteHandler(cmd *cobra.Command, args []string) error {
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -231,7 +231,7 @@ func DeleteHandler(cmd *cobra.Command, args []string) error {
}
func ShowHandler(cmd *cobra.Command, args []string) error {
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -309,7 +309,7 @@ func ShowHandler(cmd *cobra.Command, args []string) error {
}
func CopyHandler(cmd *cobra.Command, args []string) error {
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -332,7 +332,7 @@ func PullHandler(cmd *cobra.Command, args []string) error {
}
func pull(model string, insecure bool) error {
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -345,13 +345,13 @@ func pull(model string, insecure bool) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
int64(resp.Total),
resp.Total,
fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
)
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set(resp.Completed)
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
@@ -374,7 +374,20 @@ func pull(model string, insecure bool) error {
func RunGenerate(cmd *cobra.Command, args []string) error {
if len(args) > 1 {
// join all args into a single prompt
return generate(cmd, args[0], strings.Join(args[1:], " "))
wordWrap := false
if term.IsTerminal(int(os.Stdout.Fd())) {
wordWrap = true
}
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
if nowrap {
wordWrap = false
}
return generate(cmd, args[0], strings.Join(args[1:], " "), wordWrap)
}
if readline.IsTerminal(int(os.Stdin.Fd())) {
@@ -386,71 +399,110 @@ func RunGenerate(cmd *cobra.Command, args []string) error {
type generateContextKey string
func generate(cmd *cobra.Command, model, prompt string) error {
if len(strings.TrimSpace(prompt)) > 0 {
client, err := api.FromEnv()
if err != nil {
return err
func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
spinner := NewSpinner("")
go spinner.Spin(60 * time.Millisecond)
var latest api.GenerateResponse
generateContext, ok := cmd.Context().Value(generateContextKey("context")).([]int)
if !ok {
generateContext = []int{}
}
termWidth, _, err := term.GetSize(int(0))
if err != nil {
wordWrap = false
}
cancelCtx, cancel := context.WithCancel(context.Background())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT)
var abort bool
go func() {
<-sigChan
cancel()
abort = true
}()
var currentLineLength int
var wordBuffer string
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext}
fn := func(response api.GenerateResponse) error {
if !spinner.IsFinished() {
spinner.Finish()
}
spinner := NewSpinner("")
go spinner.Spin(60 * time.Millisecond)
latest = response
var latest api.GenerateResponse
if wordWrap {
for _, ch := range response.Response {
if currentLineLength+1 > termWidth-5 {
// backtrack the length of the last word and clear to the end of the line
fmt.Printf("\x1b[%dD\x1b[K\n", len(wordBuffer))
fmt.Printf("%s%c", wordBuffer, ch)
currentLineLength = len(wordBuffer) + 1
} else {
fmt.Print(string(ch))
currentLineLength += 1
generateContext, ok := cmd.Context().Value(generateContextKey("context")).([]int)
if !ok {
generateContext = []int{}
}
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext}
fn := func(response api.GenerateResponse) error {
if !spinner.IsFinished() {
spinner.Finish()
switch ch {
case ' ':
wordBuffer = ""
case '\n':
currentLineLength = 0
default:
wordBuffer += string(ch)
}
}
}
latest = response
} else {
fmt.Print(response.Response)
}
return nil
}
if err := client.Generate(cancelCtx, &request, fn); err != nil {
if strings.Contains(err.Error(), "context canceled") && abort {
spinner.Finish()
return nil
}
if err := client.Generate(context.Background(), &request, fn); err != nil {
if strings.Contains(err.Error(), "failed to load model") {
// tell the user to check the server log, if it exists locally
home, nestedErr := os.UserHomeDir()
if nestedErr != nil {
// return the original error
return err
}
logPath := filepath.Join(home, ".ollama", "logs", "server.log")
if _, nestedErr := os.Stat(logPath); nestedErr == nil {
err = fmt.Errorf("%w\nFor more details, check the error logs at %s", err, logPath)
}
}
return err
}
fmt.Println()
fmt.Println()
if !latest.Done {
return errors.New("unexpected end of response")
}
verbose, err := cmd.Flags().GetBool("verbose")
if err != nil {
return err
}
if verbose {
latest.Summary()
}
ctx := cmd.Context()
ctx = context.WithValue(ctx, generateContextKey("context"), latest.Context)
cmd.SetContext(ctx)
return err
}
if prompt != "" {
fmt.Println()
fmt.Println()
}
if !latest.Done {
if abort {
return nil
}
return errors.New("unexpected end of response")
}
verbose, err := cmd.Flags().GetBool("verbose")
if err != nil {
return err
}
if verbose {
latest.Summary()
}
ctx := cmd.Context()
ctx = context.WithValue(ctx, generateContextKey("context"), latest.Context)
cmd.SetContext(ctx)
return nil
}
@@ -461,19 +513,21 @@ func generateInteractive(cmd *cobra.Command, model string) error {
return err
}
// load the model
if err := generate(cmd, model, "", false); err != nil {
return err
}
completer := readline.NewPrefixCompleter(
readline.PcItem("/help"),
readline.PcItem("/list"),
readline.PcItem("/set",
readline.PcItem("history"),
readline.PcItem("nohistory"),
readline.PcItem("wordwrap"),
readline.PcItem("nowordwrap"),
readline.PcItem("verbose"),
readline.PcItem("quiet"),
readline.PcItem("mode",
readline.PcItem("vim"),
readline.PcItem("emacs"),
readline.PcItem("default"),
),
),
readline.PcItem("/show",
readline.PcItem("license"),
@@ -487,11 +541,41 @@ func generateInteractive(cmd *cobra.Command, model string) error {
)
usage := func() {
fmt.Fprintln(os.Stderr, "commands:")
fmt.Fprintln(os.Stderr, completer.Tree(" "))
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set Set session variables")
fmt.Fprintln(os.Stderr, " /show Show model information")
fmt.Fprintln(os.Stderr, " /bye Exit")
fmt.Fprintln(os.Stderr, " /?, /help Help for a command")
fmt.Fprintln(os.Stderr, "")
fmt.Fprintln(os.Stderr, "Use \"\"\" to begin a multi-line message.")
fmt.Fprintln(os.Stderr, "")
}
usageSet := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set history Enable history")
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
fmt.Fprintln(os.Stderr, "")
}
usageShow := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /show license Show model license")
fmt.Fprintln(os.Stderr, " /show modelfile Show Modelfile for this model")
fmt.Fprintln(os.Stderr, " /show parameters Show parameters for this model")
fmt.Fprintln(os.Stderr, " /show system Show system prompt")
fmt.Fprintln(os.Stderr, " /show template Show prompt template")
fmt.Fprintln(os.Stderr, "")
}
var painter Painter
config := readline.Config{
Painter: &painter,
Prompt: ">>> ",
HistoryFile: filepath.Join(home, ".ollama", "history"),
AutoComplete: completer,
@@ -503,6 +587,21 @@ func generateInteractive(cmd *cobra.Command, model string) error {
}
defer scanner.Close()
var wordWrap bool
termType := os.Getenv("TERM")
if termType == "xterm-256color" {
wordWrap = true
}
// override wrapping if the user turned it off
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
if nowrap {
wordWrap = false
}
var multiLineBuffer string
var isMultiLine bool
@@ -513,7 +612,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
return nil
case errors.Is(err, readline.ErrInterrupt):
if line == "" {
return nil
fmt.Println("Use Ctrl-D or /bye to exit.")
}
continue
@@ -527,6 +626,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case isMultiLine:
if strings.HasSuffix(line, `"""`) {
isMultiLine = false
painter.IsMultiLine = isMultiLine
multiLineBuffer += strings.TrimSuffix(line, `"""`)
line = multiLineBuffer
multiLineBuffer = ""
@@ -537,6 +637,7 @@ func generateInteractive(cmd *cobra.Command, model string) error {
}
case strings.HasPrefix(line, `"""`):
isMultiLine = true
painter.IsMultiLine = isMultiLine
multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
scanner.SetPrompt("... ")
continue
@@ -545,84 +646,115 @@ func generateInteractive(cmd *cobra.Command, model string) error {
if err := ListHandler(cmd, args[1:]); err != nil {
return err
}
continue
case strings.HasPrefix(line, "/set"):
args := strings.Fields(line)
if len(args) > 1 {
switch args[1] {
case "history":
scanner.HistoryEnable()
continue
case "nohistory":
scanner.HistoryDisable()
continue
case "wordwrap":
wordWrap = true
fmt.Println("Set 'wordwrap' mode.")
case "nowordwrap":
wordWrap = false
fmt.Println("Set 'nowordwrap' mode.")
case "verbose":
cmd.Flags().Set("verbose", "true")
continue
fmt.Println("Set 'verbose' mode.")
case "quiet":
cmd.Flags().Set("verbose", "false")
continue
fmt.Println("Set 'quiet' mode.")
case "mode":
if len(args) > 2 {
switch args[2] {
case "vim":
scanner.SetVimMode(true)
continue
case "emacs", "default":
scanner.SetVimMode(false)
continue
default:
usage()
continue
}
} else {
usage()
continue
}
default:
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
}
} else {
usage()
continue
usageSet()
}
case strings.HasPrefix(line, "/show"):
args := strings.Fields(line)
if len(args) > 1 {
resp, err := server.GetModelInfo(model)
client, err := api.ClientFromEnvironment()
if err != nil {
fmt.Println("error: couldn't connect to ollama server")
return err
}
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: model})
if err != nil {
fmt.Println("error: couldn't get model")
continue
return err
}
switch args[1] {
case "license":
fmt.Println(resp.License)
if resp.License == "" {
fmt.Print("No license was specified for this model.\n\n")
} else {
fmt.Println(resp.License)
}
case "modelfile":
fmt.Println(resp.Modelfile)
case "parameters":
fmt.Println(resp.Parameters)
if resp.Parameters == "" {
fmt.Print("No parameters were specified for this model.\n\n")
} else {
fmt.Println(resp.Parameters)
}
case "system":
fmt.Println(resp.System)
if resp.System == "" {
fmt.Print("No system prompt was specified for this model.\n\n")
} else {
fmt.Println(resp.System)
}
case "template":
fmt.Println(resp.Template)
if resp.Template == "" {
fmt.Print("No prompt template was specified for this model.\n\n")
} else {
fmt.Println(resp.Template)
}
default:
fmt.Println("error: unknown command")
fmt.Printf("Unknown command '/show %s'. Type /? for help\n", args[1])
}
} else {
usageShow()
}
case strings.HasPrefix(line, "/help"), strings.HasPrefix(line, "/?"):
args := strings.Fields(line)
if len(args) > 1 {
switch args[1] {
case "set", "/set":
usageSet()
case "show", "/show":
usageShow()
}
continue
} else {
usage()
continue
}
case line == "/help", line == "/?":
usage()
continue
case line == "/exit", line == "/bye":
return nil
case strings.HasPrefix(line, "/"):
args := strings.Fields(line)
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
}
if err := generate(cmd, model, line); err != nil {
return err
if len(line) > 0 && line[0] != '/' {
if err := generate(cmd, model, line, wordWrap); err != nil {
return err
}
}
}
}
@@ -632,7 +764,7 @@ func generateBatch(cmd *cobra.Command, model string) error {
for scanner.Scan() {
prompt := scanner.Text()
fmt.Printf(">>> %s\n", prompt)
if err := generate(cmd, model, prompt); err != nil {
if err := generate(cmd, model, prompt, false); err != nil {
return err
}
}
@@ -641,28 +773,19 @@ func generateBatch(cmd *cobra.Command, model string) error {
}
func RunServer(cmd *cobra.Command, _ []string) error {
host, port := "127.0.0.1", "11434"
parts := strings.Split(os.Getenv("OLLAMA_HOST"), ":")
if ip := net.ParseIP(parts[0]); ip != nil {
host = ip.String()
}
if len(parts) > 1 {
port = parts[1]
}
// deprecated: include port in OLLAMA_HOST
if p := os.Getenv("OLLAMA_PORT"); p != "" {
port = p
}
err := initializeKeypair()
host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
if err != nil {
host, port = "127.0.0.1", "11434"
if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil {
host = ip.String()
}
}
if err := initializeKeypair(); err != nil {
return err
}
ln, err := net.Listen("tcp", fmt.Sprintf("%s:%s", host, port))
ln, err := net.Listen("tcp", net.JoinHostPort(host, port))
if err != nil {
return err
}
@@ -672,6 +795,21 @@ func RunServer(cmd *cobra.Command, _ []string) error {
origins = strings.Split(o, ",")
}
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
if err := server.PruneLayers(); err != nil {
return err
}
manifestsPath, err := server.GetManifestPath()
if err != nil {
return err
}
if err := server.PruneDirectory(manifestsPath); err != nil {
return err
}
}
return server.Serve(ln, origins)
}
@@ -697,7 +835,7 @@ func initializeKeypair() error {
return err
}
err = os.MkdirAll(path.Dir(privKeyPath), 0o700)
err = os.MkdirAll(filepath.Dir(privKeyPath), 0o755)
if err != nil {
return fmt.Errorf("could not create directory %w", err)
}
@@ -756,7 +894,7 @@ func startMacApp(client *api.Client) error {
}
func checkServerHeartbeat(_ *cobra.Command, _ []string) error {
client, err := api.FromEnv()
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
@@ -794,7 +932,7 @@ func NewCLI() *cobra.Command {
createCmd := &cobra.Command{
Use: "create MODEL",
Short: "Create a model from a Modelfile",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: CreateHandler,
}
@@ -804,7 +942,7 @@ func NewCLI() *cobra.Command {
showCmd := &cobra.Command{
Use: "show MODEL",
Short: "Show information for a model",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: ShowHandler,
}
@@ -825,18 +963,20 @@ func NewCLI() *cobra.Command {
runCmd.Flags().Bool("verbose", false, "Show timings for response")
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
serveCmd := &cobra.Command{
Use: "serve",
Aliases: []string{"start"},
Short: "Start ollama",
Args: cobra.ExactArgs(0),
RunE: RunServer,
}
pullCmd := &cobra.Command{
Use: "pull MODEL",
Short: "Pull a model from a registry",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: PullHandler,
}
@@ -846,7 +986,7 @@ func NewCLI() *cobra.Command {
pushCmd := &cobra.Command{
Use: "push MODEL",
Short: "Push a model to a registry",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: PushHandler,
}
@@ -862,15 +1002,15 @@ func NewCLI() *cobra.Command {
}
copyCmd := &cobra.Command{
Use: "cp",
Use: "cp SOURCE TARGET",
Short: "Copy a model",
Args: cobra.MinimumNArgs(2),
Args: cobra.ExactArgs(2),
PreRunE: checkServerHeartbeat,
RunE: CopyHandler,
}
deleteCmd := &cobra.Command{
Use: "rm",
Use: "rm MODEL [MODEL...]",
Short: "Remove a model",
Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat,

View File

@@ -3,26 +3,32 @@
## Endpoints
- [Generate a completion](#generate-a-completion)
- [Create a model](#create-a-model)
- [List local models](#list-local-models)
- [Copy a model](#copy-a-model)
- [Delete a model](#delete-a-model)
- [Pull a model](#pull-a-model)
- [Generate embeddings](#generate-embeddings)
- [Create a Model](#create-a-model)
- [List Local Models](#list-local-models)
- [Show Model Information](#show-model-information)
- [Copy a Model](#copy-a-model)
- [Delete a Model](#delete-a-model)
- [Pull a Model](#pull-a-model)
- [Push a Model](#push-a-model)
- [Generate Embeddings](#generate-embeddings)
## Conventions
### Model names
Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and if not provided will default to `latest`. The tag is used to identify a specific version.
Model names follow a `model:tag` format. Some examples are `orca-mini:3b-q4_1` and `llama2:70b`. The tag is optional and, if not provided, will default to `latest`. The tag is used to identify a specific version.
### Durations
All durations are returned in nanoseconds.
### Streaming responses
Certain endpoints stream responses as JSON objects delineated with the newline (`\n`) character.
## Generate a completion
```
```shell
POST /api/generate
```
@@ -33,16 +39,17 @@ Generate a response for a given prompt with a provided model. This is a streamin
- `model`: (required) the [model name](#model-names)
- `prompt`: the prompt to generate a response for
Advanced parameters:
Advanced parameters (optional):
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `system`: system prompt to (overrides what is defined in the `Modelfile`)
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
- `stream`: if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request
```
```shell
curl -X POST http://localhost:11434/api/generate -d '{
"model": "llama2:7b",
"prompt": "Why is the sky blue?"
@@ -73,6 +80,7 @@ The final response in the stream also includes additional data about the generat
- `eval_count`: number of tokens the response
- `eval_duration`: time in nanoseconds spent generating the response
- `context`: an encoding of the conversation used in this response, this can be sent in the next request to keep a conversational memory
- `response`: empty if the response was streamed, if not streamed, this will contain the full response
To calculate how fast the response is generated in tokens per second (token/s), divide `eval_count` / `eval_duration`.
@@ -80,6 +88,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
{
"model": "llama2:7b",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "",
"context": [1, 2, 3],
"done": true,
"total_duration": 5589157167,
@@ -95,7 +104,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
## Create a Model
```
```shell
POST /api/create
```
@@ -105,10 +114,11 @@ Create a model from a [`Modelfile`](./modelfile.md)
- `name`: name of the model to create
- `path`: path to the Modelfile
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request
```
```shell
curl -X POST http://localhost:11434/api/create -d '{
"name": "mario",
"path": "~/Modelfile"
@@ -117,7 +127,7 @@ curl -X POST http://localhost:11434/api/create -d '{
### Response
A stream of JSON objects. When finished, `status` is `success`
A stream of JSON objects. When finished, `status` is `success`.
```json
{
@@ -127,7 +137,7 @@ A stream of JSON objects. When finished, `status` is `success`
## List Local Models
```
```shell
GET /api/tags
```
@@ -135,7 +145,7 @@ List models that are available locally.
### Request
```
```shell
curl http://localhost:11434/api/tags
```
@@ -158,9 +168,40 @@ curl http://localhost:11434/api/tags
}
```
## Show Model Information
```shell
POST /api/show
```
Show details about a model including modelfile, template, parameters, license, and system prompt.
### Parameters
- `name`: name of the model to show
### Request
```shell
curl http://localhost:11434/api/show -d '{
"name": "llama2:7b"
}'
```
### Response
```json
{
"license": "<contents of license block>",
"modelfile": "# Modelfile generated by \"ollama show\"\n# To build a new Modelfile based on this one, replace the FROM line with:\n# FROM llama2:latest\n\nFROM /Users/username/.ollama/models/blobs/sha256:8daa9615cce30c259a9555b1cc250d461d1bc69980a274b44d7eda0be78076d8\nTEMPLATE \"\"\"[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] \"\"\"\nSYSTEM \"\"\"\"\"\"\nPARAMETER stop [INST]\nPARAMETER stop [/INST]\nPARAMETER stop <<SYS>>\nPARAMETER stop <</SYS>>\n",
"parameters": "stop [INST]\nstop [/INST]\nstop <<SYS>>\nstop <</SYS>>",
"template": "[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>\n\n{{ end }}{{ .Prompt }} [/INST] "
}
```
## Copy a Model
```
```shell
POST /api/copy
```
@@ -168,7 +209,7 @@ Copy a model. Creates a model with another name from an existing model.
### Request
```
```shell
curl http://localhost:11434/api/copy -d '{
"source": "llama2:7b",
"destination": "llama2-backup"
@@ -177,7 +218,7 @@ curl http://localhost:11434/api/copy -d '{
## Delete a Model
```
```shell
DELETE /api/delete
```
@@ -189,7 +230,7 @@ Delete a model and its data.
### Request
```
```shell
curl -X DELETE http://localhost:11434/api/delete -d '{
"name": "llama2:13b"
}'
@@ -197,19 +238,21 @@ curl -X DELETE http://localhost:11434/api/delete -d '{
## Pull a Model
```
```shell
POST /api/pull
```
Download a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple calls to will share the same download progress.
Download a model from the ollama library. Cancelled pulls are resumed from where they left off, and multiple calls will share the same download progress.
### Parameters
- `name`: name of the model to pull
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request
```
```shell
curl -X POST http://localhost:11434/api/pull -d '{
"name": "llama2:7b"
}'
@@ -225,9 +268,66 @@ curl -X POST http://localhost:11434/api/pull -d '{
}
```
## Push a Model
```shell
POST /api/push
```
Upload a model to a model library. Requires registering for ollama.ai and adding a public key first.
### Parameters
- `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
### Request
```shell
curl -X POST http://localhost:11434/api/push -d '{
"name": "mattw/pygmalion:latest"
}'
```
### Response
Streaming response that starts with:
```json
{ "status": "retrieving manifest" }
```
and then:
```json
{
"status": "starting upload",
"digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
"total": 1928429856
}
```
Then there is a series of uploading responses:
```json
{
"status": "starting upload",
"digest": "sha256:bc07c81de745696fdf5afca05e065818a8149fb0c77266fb584d9b2cba3711ab",
"total": 1928429856
}
```
Finally, when the upload is complete:
```json
{"status":"pushing manifest"}
{"status":"success"}
```
## Generate Embeddings
```
```shell
POST /api/embeddings
```
@@ -244,7 +344,7 @@ Advanced parameters:
### Request
```
```shell
curl -X POST http://localhost:11434/api/embeddings -d '{
"model": "llama2:7b",
"prompt": "Here is an article about llamas..."
@@ -255,7 +355,7 @@ curl -X POST http://localhost:11434/api/embeddings -d '{
```json
{
"embeddings": [
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]

View File

@@ -6,24 +6,34 @@
Install required tools:
```
- cmake version 3.24 or higher
- go version 1.20 or higher
- gcc version 11.4.0 or higher
```bash
brew install go cmake gcc
```
Get the required libraries:
```
```bash
go generate ./...
```
Then build ollama:
```
```bash
go build .
```
Now you can run `ollama`:
```
```bash
./ollama
```
## Building on Linux with GPU support
- Install cmake and nvidia-cuda-toolkit
- run `go generate ./...`
- run `go build .`

View File

@@ -1,17 +1,34 @@
# FAQ
## How can I view the logs?
On macOS:
```
cat ~/.ollama/logs/server.log
```
On Linux:
```
journalctl -u ollama
```
If you're running `ollama serve` directly, the logs will be printed to the console.
## How can I expose the Ollama server?
```
```bash
OLLAMA_HOST=0.0.0.0:11435 ollama serve
```
By default, Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0`. To support more origins, you can use the `OLLAMA_ORIGINS` environment variable:
```
```bash
OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
```
## Where are models stored?
Raw model data is stored under `~/.ollama/models`.
- macOS: Raw model data is stored under `~/.ollama/models`.
- Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`

163
docs/import.md Normal file
View File

@@ -0,0 +1,163 @@
# Import a model
This guide walks through importing a PyTorch, Safetensors or GGUF model.
## Supported models
Ollama supports a set of model architectures, with support for more coming soon:
- Llama & Mistral
- Falcon & RW
- GPT-NeoX
- BigCode
To view a model's architecture, check the `config.json` file in its HuggingFace repo. You should see an entry under `architectures` (e.g. `LlamaForCausalLM`).
## Importing
### Step 1: Clone the HuggingFace repository (optional)
If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
```
git lfs install
git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
cd Mistral-7B-Instruct-v0.1
```
### Step 2: Convert and quantize to a `.bin` file (optional, for PyTorch and Safetensors)
If the model is in PyTorch or Safetensors format, a [Docker image](https://hub.docker.com/r/ollama/quantize) with the tooling required to convert and quantize models is available.
First, Install [Docker](https://www.docker.com/get-started/).
Next, to convert and quantize your model, run:
```
docker run --rm -v .:/model ollama/quantize -q q4_0 /model
```
This will output two files into the directory:
- `f16.bin`: the model converted to GGUF
- `q4_0.bin` the model quantized to a 4-bit quantization (we will use this file to create the Ollama model)
### Step 3: Write a `Modelfile`
Next, create a `Modelfile` for your model. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
```
FROM ./q4_0.bin
```
(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
```
FROM ./q4_0.bin
TEMPLATE "[INST] {{ .Prompt }} [/INST]"
```
### Step 4: Create the Ollama model
Finally, create a model from your `Modelfile`:
```
ollama create example -f Modelfile
```
Next, test the model with `ollama run`:
```
ollama run example "What is your favourite condiment?"
```
### Step 5: Publish your model (optional early alpha)
Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
1. Create [an account](https://ollama.ai/signup)
2. Run `cat ~/.ollama/id_ed25519.pub` to view your Ollama public key. Copy this to the clipboard.
3. Add your public key to your [Ollama account](https://ollama.ai/settings/keys)
Next, copy your model to your username's namespace:
```
ollama cp example <your username>/example
```
Then push the model:
```
ollama push <your username>/example
```
After publishing, your model will be available at `https://ollama.ai/<your username>/example`.
## Quantization reference
The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
- `q2_K`
- `q3_K`
- `q3_K_S`
- `q3_K_M`
- `q3_K_L`
- `q4_0` (recommended)
- `q4_1`
- `q4_K`
- `q4_K_S`
- `q4_K_M`
- `q5_0`
- `q5_1`
- `q5_K`
- `q5_K_S`
- `q5_K_M`
- `q6_K`
- `q8_0`
## Manually converting & quantizing models
### Prerequisites
Start by cloning the `llama.cpp` repo to your machine in another directory:
```
git clone https://github.com/ggerganov/llama.cpp.git
cd llama.cpp
```
Next, install the Python dependencies:
```
pip install -r requirements.txt
```
Finally, build the `quantize` tool:
```
make quantize
```
### Convert the model
Run the correct conversion script for your model architecture:
```shell
# LlamaForCausalLM or MistralForCausalLM
python convert.py <path to model directory>
# FalconForCausalLM
python convert-falcon-hf-to-gguf.py <path to model directory>
# GPTNeoXForCausalLM
python convert-falcon-hf-to-gguf.py <path to model directory>
# GPTBigCodeForCausalLM
python convert-starcoder-hf-to-gguf.py <path to model directory>
```
### Quantize the model
```
quantize <path to model dir>/ggml-model-f32.bin <path to model dir>/q4_0.bin q4_0
```

82
docs/linux.md Normal file
View File

@@ -0,0 +1,82 @@
# Installing Ollama on Linux
> Note: A one line installer for Ollama is available by running:
>
> ```bash
> curl https://ollama.ai/install.sh | sh
> ```
## Download the `ollama` binary
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
```bash
sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
```
## Start Ollama
Start Ollama by running `ollama serve`:
```bash
ollama serve
```
Once Ollama is running, run a model in another terminal session:
```bash
ollama run llama2
```
## Install CUDA drivers (optional for Nvidia GPUs)
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
Verify that the drivers are installed by running the following command, which should print details about your GPU:
```bash
nvidia-smi
```
## Adding Ollama as a startup service (optional)
Create a user for Ollama:
```bash
sudo useradd -r -s /bin/false -m -d /usr/share/ollama ollama
```
Create a service file in `/etc/systemd/system/ollama.service`:
```ini
[Unit]
Description=Ollama Service
After=network-online.target
[Service]
ExecStart=/usr/bin/ollama serve
User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="HOME=/usr/share/ollama"
[Install]
WantedBy=default.target
```
Then start the service:
```bash
sudo systemctl daemon-reload
sudo systemctl enable ollama
```
### Viewing logs
To view logs of Ollama running as a startup service, run:
```bash
journalctl -u ollama
```

View File

@@ -1,6 +1,6 @@
# Ollama Model File
> Note: this model file syntax is in development
> Note: this `Modelfile` syntax is in development
A model file is the blueprint to create and share models with Ollama.
@@ -12,7 +12,6 @@ A model file is the blueprint to create and share models with Ollama.
- [FROM (Required)](#from-required)
- [Build from llama2](#build-from-llama2)
- [Build from a bin file](#build-from-a-bin-file)
- [EMBED](#embed)
- [PARAMETER](#parameter)
- [Valid Parameters and Values](#valid-parameters-and-values)
- [TEMPLATE](#template)
@@ -24,7 +23,7 @@ A model file is the blueprint to create and share models with Ollama.
## Format
The format of the Modelfile:
The format of the `Modelfile`:
```modelfile
# comment
@@ -42,9 +41,9 @@ INSTRUCTION arguments
## Examples
An example of a model file creating a mario blueprint:
An example of a `Modelfile` creating a mario blueprint:
```
```modelfile
FROM llama2
# sets the temperature to 1 [higher is more creative, lower is more coherent]
PARAMETER temperature 1
@@ -57,9 +56,9 @@ SYSTEM You are Mario from super mario bros, acting as an assistant.
To use this:
1. Save it as a file (eg. `Modelfile`)
2. `ollama create NAME -f <location of the file eg. ./Modelfile>'`
3. `ollama run NAME`
1. Save it as a file (e.g. `Modelfile`)
2. `ollama create choose-a-model-name -f <location of the file e.g. ./Modelfile>'`
3. `ollama run choose-a-model-name`
4. Start using the model!
More examples are available in the [examples directory](../examples).
@@ -68,44 +67,34 @@ More examples are available in the [examples directory](../examples).
### FROM (Required)
The FROM instruction defines the base model to use when creating a model.
The `FROM` instruction defines the base model to use when creating a model.
```
```modelfile
FROM <model name>:<tag>
```
#### Build from llama2
```
```modelfile
FROM llama2
```
A list of available base models:
<https://github.com/jmorganca/ollama#model-library>
#### Build from a bin file
#### Build from a `bin` file
```
```modelfile
FROM ./ollama-model.bin
```
This bin file location should be specified as an absolute path or relative to the Modelfile location.
### EMBED
The EMBED instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
```
FROM <model name>:<tag>
EMBED <file path>.txt
EMBED <different file path>.txt
EMBED <path to directory>/*.txt
```
This bin file location should be specified as an absolute path or relative to the `Modelfile` location.
### PARAMETER
The `PARAMETER` instruction defines a parameter that can be set when the model is run.
```
```modelfile
PARAMETER <parameter> <parametervalue>
```
@@ -118,19 +107,21 @@ PARAMETER <parameter> <parametervalue>
| mirostat_tau | Controls the balance between coherence and diversity of the output. A lower value will result in more focused and coherent text. (Default: 5.0) | float | mirostat_tau 5.0 |
| num_ctx | Sets the size of the context window used to generate the next token. (Default: 2048) | int | num_ctx 4096 |
| num_gqa | The number of GQA groups in the transformer layer. Required for some models, for example it is 8 for llama2:70b | int | num_gqa 1 |
| num_gpu | The number of GPUs to use. On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 1 |
| num_gpu | The number of layers to send to the GPU(s). On macOS it defaults to 1 to enable metal support, 0 to disable. | int | num_gpu 50 |
| num_thread | Sets the number of threads to use during computation. By default, Ollama will detect this for optimal performance. It is recommended to set this value to the number of physical CPU cores your system has (as opposed to the logical number of cores). | int | num_thread 8 |
| repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 |
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 |
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
| stop | Sets the stop sequences to use. | string | stop "AI assistant:" |
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
| top_p | Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9) | float | top_p 0.9 |
### TEMPLATE
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific.
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model.
#### Template Variables
@@ -140,7 +131,7 @@ PARAMETER <parameter> <parametervalue>
| `{{ .Prompt }}` | The incoming prompt, this is not specified in the model file and will be set based on input. |
| `{{ .First }}` | A boolean value used to render specific template information for the first generation of a session. |
```
```modelfile
TEMPLATE """
{{- if .First }}
### System:
@@ -160,7 +151,7 @@ SYSTEM """<system message>"""
The `SYSTEM` instruction specifies the system prompt to be used in the template, if applicable.
```
```modelfile
SYSTEM """<system message>"""
```
@@ -168,7 +159,7 @@ SYSTEM """<system message>"""
The `ADAPTER` instruction specifies the LoRA adapter to apply to the base model. The value of this instruction should be an absolute path or a path relative to the Modelfile and the file must be in a GGML file format. The adapter should be tuned from the base model otherwise the behaviour is undefined.
```
```modelfile
ADAPTER ./ollama-lora.bin
```
@@ -176,7 +167,7 @@ ADAPTER ./ollama-lora.bin
The `LICENSE` instruction allows you to specify the legal license under which the model used with this Modelfile is shared or distributed.
```
```modelfile
LICENSE """
<license text>
"""
@@ -184,5 +175,5 @@ LICENSE """
## Notes
- the **modelfile is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
- the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
- Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.

171
examples/.gitignore vendored Normal file
View File

@@ -0,0 +1,171 @@
node_modules
# OSX
.DS_STORE
# Models
models/
# Local Chroma db
.chroma/
db/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

View File

@@ -1,15 +1,3 @@
# Examples
This directory contains different examples of using Ollama
To create a model:
```
ollama create example -f <example file>
```
To run a model:
```
ollama run example
```
This directory contains different examples of using Ollama.

View File

View File

@@ -0,0 +1,27 @@
package main
import (
"bytes"
"fmt"
"io"
"log"
"net/http"
"os"
)
func main() {
body := []byte(`{"model":"mistral"}`)
resp, err := http.Post("http://localhost:11434/api/generate", "application/json", bytes.NewBuffer(body))
if err != nil {
fmt.Print(err.Error())
os.Exit(1)
}
responseData, err := io.ReadAll(resp.Body)
if err != nil {
log.Fatal(err)
}
fmt.Println(string(responseData))
}

View File

@@ -0,0 +1,21 @@
# LangChain
This example is a basic "hello world" of using LangChain with Ollama using Node.js and Typescript.
## Setup
```shell
npm install
```
## Run
```shell
ts-node main.ts
```
Running this example will print the response for "hello":
```plaintext
Hello! It's nice to meet you. hopefully you are having a great day! Is there something I can help you with or would you like to chat?
```

View File

@@ -0,0 +1,15 @@
import { Ollama} from 'langchain/llms/ollama';
async function main() {
const ollama = new Ollama({
model: 'mistral'
// other parameters can be found at https://js.langchain.com/docs/api/llms_ollama/classes/Ollama
})
const stream = await ollama.stream("Hello");
for await (const chunk of stream) {
process.stdout.write(chunk);
}
}
main();

View File

@@ -0,0 +1,997 @@
{
"name": "with-langchain-typescript-simplegenerate",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"langchain": "^0.0.165"
},
"devDependencies": {
"typescript": "^5.2.2"
}
},
"node_modules/@anthropic-ai/sdk": {
"version": "0.6.2",
"resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.6.2.tgz",
"integrity": "sha512-fB9PUj9RFT+XjkL+E9Ol864ZIJi+1P8WnbHspN3N3/GK2uSzjd0cbVIKTGgf4v3N8MwaQu+UWnU7C4BG/fap/g==",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"digest-fetch": "^1.3.0",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
}
},
"node_modules/@types/node": {
"version": "18.18.4",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.18.4.tgz",
"integrity": "sha512-t3rNFBgJRugIhackit2mVcLfF6IRc0JE4oeizPQL8Zrm8n2WY/0wOdpOPhdtG0V9Q2TlW/axbF1MJ6z+Yj/kKQ=="
},
"node_modules/@types/node-fetch": {
"version": "2.6.6",
"resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.6.tgz",
"integrity": "sha512-95X8guJYhfqiuVVhRFxVQcf4hW/2bCuoPwDasMf/531STFoNoWTT7YDnWdXHEZKqAGUigmpG31r2FE70LwnzJw==",
"dependencies": {
"@types/node": "*",
"form-data": "^4.0.0"
}
},
"node_modules/@types/retry": {
"version": "0.12.0",
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
"integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA=="
},
"node_modules/@types/uuid": {
"version": "9.0.5",
"resolved": "https://registry.npmjs.org/@types/uuid/-/uuid-9.0.5.tgz",
"integrity": "sha512-xfHdwa1FMJ082prjSJpoEI57GZITiQz10r3vEJCHa2khEFQjKy91aWKz6+zybzssCvXUwE1LQWgWVwZ4nYUvHQ=="
},
"node_modules/abort-controller": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
"integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
"dependencies": {
"event-target-shim": "^5.0.0"
},
"engines": {
"node": ">=6.5"
}
},
"node_modules/agentkeepalive": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.5.0.tgz",
"integrity": "sha512-5GG/5IbQQpC9FpkRGsSvZI5QYeSCzlJHdpBQntCsuTOxhKD8lqKhrleg2Yi7yvMIf82Ycmmqln9U8V9qwEiJew==",
"dependencies": {
"humanize-ms": "^1.2.1"
},
"engines": {
"node": ">= 8.0.0"
}
},
"node_modules/ansi-styles": {
"version": "5.2.0",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
"integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
}
},
"node_modules/argparse": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz",
"integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q=="
},
"node_modules/asynckit": {
"version": "0.4.0",
"resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
"integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
},
"node_modules/base-64": {
"version": "0.1.0",
"resolved": "https://registry.npmjs.org/base-64/-/base-64-0.1.0.tgz",
"integrity": "sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA=="
},
"node_modules/base64-js": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
]
},
"node_modules/binary-extensions": {
"version": "2.2.0",
"resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
"integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==",
"engines": {
"node": ">=8"
}
},
"node_modules/binary-search": {
"version": "1.3.6",
"resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
"integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
},
"node_modules/camelcase": {
"version": "6.3.0",
"resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz",
"integrity": "sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==",
"engines": {
"node": ">=10"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/charenc": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz",
"integrity": "sha512-yrLQ/yVUFXkzg7EDQsPieE/53+0RlaWTs+wBrvW36cyilJ2SaDWfl4Yj7MtLTXleV9uEKefbAGUPv2/iWSooRA==",
"engines": {
"node": "*"
}
},
"node_modules/combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"dependencies": {
"delayed-stream": "~1.0.0"
},
"engines": {
"node": ">= 0.8"
}
},
"node_modules/commander": {
"version": "10.0.1",
"resolved": "https://registry.npmjs.org/commander/-/commander-10.0.1.tgz",
"integrity": "sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==",
"engines": {
"node": ">=14"
}
},
"node_modules/crypt": {
"version": "0.0.2",
"resolved": "https://registry.npmjs.org/crypt/-/crypt-0.0.2.tgz",
"integrity": "sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==",
"engines": {
"node": "*"
}
},
"node_modules/decamelize": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/decamelize/-/decamelize-1.2.0.tgz",
"integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/delayed-stream": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
"integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
"engines": {
"node": ">=0.4.0"
}
},
"node_modules/digest-fetch": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/digest-fetch/-/digest-fetch-1.3.0.tgz",
"integrity": "sha512-CGJuv6iKNM7QyZlM2T3sPAdZWd/p9zQiRNS9G+9COUCwzWFTs0Xp8NF5iePx7wtvhDykReiRRrSeNb4oMmB8lA==",
"dependencies": {
"base-64": "^0.1.0",
"md5": "^2.3.0"
}
},
"node_modules/event-target-shim": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
"integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==",
"engines": {
"node": ">=6"
}
},
"node_modules/eventemitter3": {
"version": "4.0.7",
"resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
"integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw=="
},
"node_modules/expr-eval": {
"version": "2.0.2",
"resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz",
"integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg=="
},
"node_modules/flat": {
"version": "5.0.2",
"resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
"integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==",
"bin": {
"flat": "cli.js"
}
},
"node_modules/form-data": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
"integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
"dependencies": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.8",
"mime-types": "^2.1.12"
},
"engines": {
"node": ">= 6"
}
},
"node_modules/form-data-encoder": {
"version": "1.7.2",
"resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz",
"integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="
},
"node_modules/formdata-node": {
"version": "4.4.1",
"resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz",
"integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==",
"dependencies": {
"node-domexception": "1.0.0",
"web-streams-polyfill": "4.0.0-beta.3"
},
"engines": {
"node": ">= 12.20"
}
},
"node_modules/humanize-ms": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz",
"integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==",
"dependencies": {
"ms": "^2.0.0"
}
},
"node_modules/is-any-array": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
"integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ=="
},
"node_modules/is-buffer": {
"version": "1.1.6",
"resolved": "https://registry.npmjs.org/is-buffer/-/is-buffer-1.1.6.tgz",
"integrity": "sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w=="
},
"node_modules/js-tiktoken": {
"version": "1.0.7",
"resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.7.tgz",
"integrity": "sha512-biba8u/clw7iesNEWLOLwrNGoBP2lA+hTaBLs/D45pJdUPFXyxD6nhcDVtADChghv4GgyAiMKYMiRx7x6h7Biw==",
"dependencies": {
"base64-js": "^1.5.1"
}
},
"node_modules/js-yaml": {
"version": "4.1.0",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz",
"integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==",
"dependencies": {
"argparse": "^2.0.1"
},
"bin": {
"js-yaml": "bin/js-yaml.js"
}
},
"node_modules/jsonpointer": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz",
"integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/langchain": {
"version": "0.0.165",
"resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.165.tgz",
"integrity": "sha512-CpbNpjwaE+9lzjdw+pZz0VgnRrFivEgr7CVp9dDaAb5JpaJAA4V2v6uQ9ZPN+TSqupTQ79HFn2sfyZVEl2EG7Q==",
"dependencies": {
"@anthropic-ai/sdk": "^0.6.2",
"ansi-styles": "^5.0.0",
"binary-extensions": "^2.2.0",
"camelcase": "6",
"decamelize": "^1.2.0",
"expr-eval": "^2.0.2",
"flat": "^5.0.2",
"js-tiktoken": "^1.0.7",
"js-yaml": "^4.1.0",
"jsonpointer": "^5.0.1",
"langchainhub": "~0.0.6",
"langsmith": "~0.0.31",
"ml-distance": "^4.0.0",
"object-hash": "^3.0.0",
"openai": "~4.4.0",
"openapi-types": "^12.1.3",
"p-queue": "^6.6.2",
"p-retry": "4",
"uuid": "^9.0.0",
"yaml": "^2.2.1",
"zod": "^3.22.3",
"zod-to-json-schema": "^3.20.4"
},
"engines": {
"node": ">=18"
},
"peerDependencies": {
"@aws-crypto/sha256-js": "^5.0.0",
"@aws-sdk/client-bedrock-runtime": "^3.422.0",
"@aws-sdk/client-dynamodb": "^3.310.0",
"@aws-sdk/client-kendra": "^3.352.0",
"@aws-sdk/client-lambda": "^3.310.0",
"@aws-sdk/client-s3": "^3.310.0",
"@aws-sdk/client-sagemaker-runtime": "^3.310.0",
"@aws-sdk/client-sfn": "^3.310.0",
"@aws-sdk/credential-provider-node": "^3.388.0",
"@azure/storage-blob": "^12.15.0",
"@clickhouse/client": "^0.0.14",
"@cloudflare/ai": "^1.0.12",
"@elastic/elasticsearch": "^8.4.0",
"@getmetal/metal-sdk": "*",
"@getzep/zep-js": "^0.7.0",
"@gomomento/sdk": "^1.23.0",
"@google-ai/generativelanguage": "^0.2.1",
"@google-cloud/storage": "^6.10.1",
"@huggingface/inference": "^1.5.1",
"@mozilla/readability": "*",
"@notionhq/client": "^2.2.10",
"@opensearch-project/opensearch": "*",
"@pinecone-database/pinecone": "^1.1.0",
"@planetscale/database": "^1.8.0",
"@qdrant/js-client-rest": "^1.2.0",
"@raycast/api": "^1.55.2",
"@smithy/eventstream-codec": "^2.0.5",
"@smithy/protocol-http": "^3.0.6",
"@smithy/signature-v4": "^2.0.10",
"@smithy/util-utf8": "^2.0.0",
"@supabase/postgrest-js": "^1.1.1",
"@supabase/supabase-js": "^2.10.0",
"@tensorflow-models/universal-sentence-encoder": "*",
"@tensorflow/tfjs-converter": "*",
"@tensorflow/tfjs-core": "*",
"@upstash/redis": "^1.20.6",
"@vercel/postgres": "^0.5.0",
"@writerai/writer-sdk": "^0.40.2",
"@xata.io/client": "^0.25.1",
"@xenova/transformers": "^2.5.4",
"@zilliz/milvus2-sdk-node": ">=2.2.7",
"apify-client": "^2.7.1",
"axios": "*",
"cassandra-driver": "^4.6.4",
"cheerio": "^1.0.0-rc.12",
"chromadb": "*",
"cohere-ai": ">=6.0.0",
"d3-dsv": "^2.0.0",
"epub2": "^3.0.1",
"faiss-node": "^0.3.0",
"fast-xml-parser": "^4.2.7",
"firebase-admin": "^11.9.0",
"google-auth-library": "^8.9.0",
"googleapis": "^126.0.1",
"hnswlib-node": "^1.4.2",
"html-to-text": "^9.0.5",
"ignore": "^5.2.0",
"ioredis": "^5.3.2",
"jsdom": "*",
"llmonitor": "*",
"lodash": "^4.17.21",
"mammoth": "*",
"mongodb": "^5.2.0",
"mysql2": "^3.3.3",
"neo4j-driver": "*",
"node-llama-cpp": "*",
"notion-to-md": "^3.1.0",
"pdf-parse": "1.1.1",
"peggy": "^3.0.2",
"pg": "^8.11.0",
"pg-copy-streams": "^6.0.5",
"pickleparser": "^0.1.0",
"playwright": "^1.32.1",
"portkey-ai": "^0.1.11",
"puppeteer": "^19.7.2",
"redis": "^4.6.4",
"replicate": "^0.18.0",
"sonix-speech-recognition": "^2.1.1",
"srt-parser-2": "^1.2.2",
"typeorm": "^0.3.12",
"typesense": "^1.5.3",
"usearch": "^1.1.1",
"vectordb": "^0.1.4",
"voy-search": "0.6.2",
"weaviate-ts-client": "^1.4.0",
"web-auth-library": "^1.0.3",
"youtube-transcript": "^1.0.6",
"youtubei.js": "^5.8.0"
},
"peerDependenciesMeta": {
"@aws-crypto/sha256-js": {
"optional": true
},
"@aws-sdk/client-bedrock-runtime": {
"optional": true
},
"@aws-sdk/client-dynamodb": {
"optional": true
},
"@aws-sdk/client-kendra": {
"optional": true
},
"@aws-sdk/client-lambda": {
"optional": true
},
"@aws-sdk/client-s3": {
"optional": true
},
"@aws-sdk/client-sagemaker-runtime": {
"optional": true
},
"@aws-sdk/client-sfn": {
"optional": true
},
"@aws-sdk/credential-provider-node": {
"optional": true
},
"@azure/storage-blob": {
"optional": true
},
"@clickhouse/client": {
"optional": true
},
"@cloudflare/ai": {
"optional": true
},
"@elastic/elasticsearch": {
"optional": true
},
"@getmetal/metal-sdk": {
"optional": true
},
"@getzep/zep-js": {
"optional": true
},
"@gomomento/sdk": {
"optional": true
},
"@google-ai/generativelanguage": {
"optional": true
},
"@google-cloud/storage": {
"optional": true
},
"@huggingface/inference": {
"optional": true
},
"@mozilla/readability": {
"optional": true
},
"@notionhq/client": {
"optional": true
},
"@opensearch-project/opensearch": {
"optional": true
},
"@pinecone-database/pinecone": {
"optional": true
},
"@planetscale/database": {
"optional": true
},
"@qdrant/js-client-rest": {
"optional": true
},
"@raycast/api": {
"optional": true
},
"@smithy/eventstream-codec": {
"optional": true
},
"@smithy/protocol-http": {
"optional": true
},
"@smithy/signature-v4": {
"optional": true
},
"@smithy/util-utf8": {
"optional": true
},
"@supabase/postgrest-js": {
"optional": true
},
"@supabase/supabase-js": {
"optional": true
},
"@tensorflow-models/universal-sentence-encoder": {
"optional": true
},
"@tensorflow/tfjs-converter": {
"optional": true
},
"@tensorflow/tfjs-core": {
"optional": true
},
"@upstash/redis": {
"optional": true
},
"@vercel/postgres": {
"optional": true
},
"@writerai/writer-sdk": {
"optional": true
},
"@xata.io/client": {
"optional": true
},
"@xenova/transformers": {
"optional": true
},
"@zilliz/milvus2-sdk-node": {
"optional": true
},
"apify-client": {
"optional": true
},
"axios": {
"optional": true
},
"cassandra-driver": {
"optional": true
},
"cheerio": {
"optional": true
},
"chromadb": {
"optional": true
},
"cohere-ai": {
"optional": true
},
"d3-dsv": {
"optional": true
},
"epub2": {
"optional": true
},
"faiss-node": {
"optional": true
},
"fast-xml-parser": {
"optional": true
},
"firebase-admin": {
"optional": true
},
"google-auth-library": {
"optional": true
},
"googleapis": {
"optional": true
},
"hnswlib-node": {
"optional": true
},
"html-to-text": {
"optional": true
},
"ignore": {
"optional": true
},
"ioredis": {
"optional": true
},
"jsdom": {
"optional": true
},
"llmonitor": {
"optional": true
},
"lodash": {
"optional": true
},
"mammoth": {
"optional": true
},
"mongodb": {
"optional": true
},
"mysql2": {
"optional": true
},
"neo4j-driver": {
"optional": true
},
"node-llama-cpp": {
"optional": true
},
"notion-to-md": {
"optional": true
},
"pdf-parse": {
"optional": true
},
"peggy": {
"optional": true
},
"pg": {
"optional": true
},
"pg-copy-streams": {
"optional": true
},
"pickleparser": {
"optional": true
},
"playwright": {
"optional": true
},
"portkey-ai": {
"optional": true
},
"puppeteer": {
"optional": true
},
"redis": {
"optional": true
},
"replicate": {
"optional": true
},
"sonix-speech-recognition": {
"optional": true
},
"srt-parser-2": {
"optional": true
},
"typeorm": {
"optional": true
},
"typesense": {
"optional": true
},
"usearch": {
"optional": true
},
"vectordb": {
"optional": true
},
"voy-search": {
"optional": true
},
"weaviate-ts-client": {
"optional": true
},
"web-auth-library": {
"optional": true
},
"youtube-transcript": {
"optional": true
},
"youtubei.js": {
"optional": true
}
}
},
"node_modules/langchainhub": {
"version": "0.0.6",
"resolved": "https://registry.npmjs.org/langchainhub/-/langchainhub-0.0.6.tgz",
"integrity": "sha512-SW6105T+YP1cTe0yMf//7kyshCgvCTyFBMTgH2H3s9rTAR4e+78DA/BBrUL/Mt4Q5eMWui7iGuAYb3pgGsdQ9w=="
},
"node_modules/langsmith": {
"version": "0.0.42",
"resolved": "https://registry.npmjs.org/langsmith/-/langsmith-0.0.42.tgz",
"integrity": "sha512-sFuN+e7E+pPBIRaRgFqZh/BRBWNHTZNAwi6uj4kydQawooCZYoJmM5snOkiQrhVSvAhgu6xFhLvmfvkPcKzD7w==",
"dependencies": {
"@types/uuid": "^9.0.1",
"commander": "^10.0.1",
"p-queue": "^6.6.2",
"p-retry": "4",
"uuid": "^9.0.0"
},
"bin": {
"langsmith": "dist/cli/main.cjs"
}
},
"node_modules/md5": {
"version": "2.3.0",
"resolved": "https://registry.npmjs.org/md5/-/md5-2.3.0.tgz",
"integrity": "sha512-T1GITYmFaKuO91vxyoQMFETst+O71VUPEU3ze5GNzDm0OWdP8v1ziTaAEPUr/3kLsY3Sftgz242A1SetQiDL7g==",
"dependencies": {
"charenc": "0.0.2",
"crypt": "0.0.2",
"is-buffer": "~1.1.6"
}
},
"node_modules/mime-db": {
"version": "1.52.0",
"resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
"integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
"engines": {
"node": ">= 0.6"
}
},
"node_modules/mime-types": {
"version": "2.1.35",
"resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
"integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
"dependencies": {
"mime-db": "1.52.0"
},
"engines": {
"node": ">= 0.6"
}
},
"node_modules/ml-array-mean": {
"version": "1.1.6",
"resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
"integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==",
"dependencies": {
"ml-array-sum": "^1.1.6"
}
},
"node_modules/ml-array-sum": {
"version": "1.1.6",
"resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz",
"integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==",
"dependencies": {
"is-any-array": "^2.0.0"
}
},
"node_modules/ml-distance": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.1.tgz",
"integrity": "sha512-feZ5ziXs01zhyFUUUeZV5hwc0f5JW0Sh0ckU1koZe/wdVkJdGxcP06KNQuF0WBTj8FttQUzcvQcpcrOp/XrlEw==",
"dependencies": {
"ml-array-mean": "^1.1.6",
"ml-distance-euclidean": "^2.0.0",
"ml-tree-similarity": "^1.0.0"
}
},
"node_modules/ml-distance-euclidean": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz",
"integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q=="
},
"node_modules/ml-tree-similarity": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz",
"integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==",
"dependencies": {
"binary-search": "^1.3.5",
"num-sort": "^2.0.0"
}
},
"node_modules/ms": {
"version": "2.1.3",
"resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
"integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="
},
"node_modules/node-domexception": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz",
"integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/jimmywarting"
},
{
"type": "github",
"url": "https://paypal.me/jimmywarting"
}
],
"engines": {
"node": ">=10.5.0"
}
},
"node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
}
},
"node_modules/num-sort": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz",
"integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==",
"engines": {
"node": ">=8"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/object-hash": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz",
"integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==",
"engines": {
"node": ">= 6"
}
},
"node_modules/openai": {
"version": "4.4.0",
"resolved": "https://registry.npmjs.org/openai/-/openai-4.4.0.tgz",
"integrity": "sha512-JN0t628Kh95T0IrXl0HdBqnlJg+4Vq0Bnh55tio+dfCnyzHvMLiWyCM9m726MAJD2YkDU4/8RQB6rNbEq9ct2w==",
"dependencies": {
"@types/node": "^18.11.18",
"@types/node-fetch": "^2.6.4",
"abort-controller": "^3.0.0",
"agentkeepalive": "^4.2.1",
"digest-fetch": "^1.3.0",
"form-data-encoder": "1.7.2",
"formdata-node": "^4.3.2",
"node-fetch": "^2.6.7"
},
"bin": {
"openai": "bin/cli"
}
},
"node_modules/openapi-types": {
"version": "12.1.3",
"resolved": "https://registry.npmjs.org/openapi-types/-/openapi-types-12.1.3.tgz",
"integrity": "sha512-N4YtSYJqghVu4iek2ZUvcN/0aqH1kRDuNqzcycDxhOUpg7GdvLa2F3DgS6yBNhInhv2r/6I0Flkn7CqL8+nIcw=="
},
"node_modules/p-finally": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz",
"integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==",
"engines": {
"node": ">=4"
}
},
"node_modules/p-queue": {
"version": "6.6.2",
"resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz",
"integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==",
"dependencies": {
"eventemitter3": "^4.0.4",
"p-timeout": "^3.2.0"
},
"engines": {
"node": ">=8"
},
"funding": {
"url": "https://github.com/sponsors/sindresorhus"
}
},
"node_modules/p-retry": {
"version": "4.6.2",
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
"integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
"dependencies": {
"@types/retry": "0.12.0",
"retry": "^0.13.1"
},
"engines": {
"node": ">=8"
}
},
"node_modules/p-timeout": {
"version": "3.2.0",
"resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz",
"integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==",
"dependencies": {
"p-finally": "^1.0.0"
},
"engines": {
"node": ">=8"
}
},
"node_modules/retry": {
"version": "0.13.1",
"resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
"integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
"engines": {
"node": ">= 4"
}
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
},
"node_modules/typescript": {
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz",
"integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==",
"dev": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
},
"node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/web-streams-polyfill": {
"version": "4.0.0-beta.3",
"resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz",
"integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==",
"engines": {
"node": ">= 14"
}
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
},
"node_modules/yaml": {
"version": "2.3.2",
"resolved": "https://registry.npmjs.org/yaml/-/yaml-2.3.2.tgz",
"integrity": "sha512-N/lyzTPaJasoDmfV7YTrYCI0G/3ivm/9wdG0aHuheKowWQwGTsK0Eoiw6utmzAnI6pkJa0DUVygvp3spqqEKXg==",
"engines": {
"node": ">= 14"
}
},
"node_modules/zod": {
"version": "3.22.4",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.22.4.tgz",
"integrity": "sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==",
"funding": {
"url": "https://github.com/sponsors/colinhacks"
}
},
"node_modules/zod-to-json-schema": {
"version": "3.21.4",
"resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.4.tgz",
"integrity": "sha512-fjUZh4nQ1s6HMccgIeE0VP4QG/YRGPmyjO9sAh890aQKPEk3nqbfUXhMFaC+Dr5KvYBm8BCyvfpZf2jY9aGSsw==",
"peerDependencies": {
"zod": "^3.21.4"
}
}
}
}

View File

@@ -0,0 +1,8 @@
{
"devDependencies": {
"typescript": "^5.2.2"
},
"dependencies": {
"langchain": "^0.0.165"
}
}

View File

@@ -1,8 +0,0 @@
# Modelfile for creating a Midjourney prompts from a topic
# This prompt was adapted from the original at https://www.greataiprompts.com/guide/midjourney/best-chatgpt-prompt-for-midjourney/
# Run `ollama create mj -f ./Modelfile` and then `ollama run mj` and enter a topic
FROM nous-hermes
SYSTEM """
Embrace your role as an AI-powered creative assistant, employing Midjourney to manifest compelling AI-generated art. I will outline a specific image concept, and in response, you must produce an exhaustive, multifaceted prompt for Midjourney, ensuring every detail of the original concept is represented in your instructions. Midjourney doesn't do well with text, so after the prompt, give me instructions that I can use to create the titles in a image editor.
"""

View File

@@ -0,0 +1,23 @@
# Ten Tweets Modelfile
This is a simple modelfile that generates ten tweets based off any topic.
```bash
ollama create tentweets
ollama run tentweets
>>> underwater basketweaving
Great! Here are ten creative tweets about underwater basketweaving:
1. "Just discovered the ultimate stress-reliever: Underwater basketweaving! 🌊🧵 #UnderwaterBasketweaving #StressRelief"
2. "Who needs meditation when you can do underwater basketweaving? 😴👀 #PeacefulDistraction #UnderwaterBasketweaving"
3. "Just spent an hour in the pool and still managed to knot my basket. Goal: untangle it before next session. 💪🏽 #ChallengeAccepted #UnderwaterBasketweaving"
4. "When life gives you lemons, make underwater basketweaving! 🍋🧵 #LemonadeLife #UnderwaterBasketweaving"
5. "Just realized my underwater basketweaving skills could come in handy during a zombie apocalypse. 😂🧡 #SurvivalTips #UnderwaterBasketweaving"
6. "I'm not lazy, I'm just conserving energy for my next underwater basketweaving session. 😴💤 #LazyDay #UnderwaterBasketweaving"
7. "Just found my inner peace while doing underwater basketweaving. It's like meditation, but with knots! 🙏🧵 #Mindfulness #UnderwaterBasketweaving"
8. "Why study for exams when you can do underwater basketweaving and forget all your worries? 😜🧵 #ProcrastinationStation #UnderwaterBasketweaving"
9. "Just had to cut my underwater basketweaving session short due to a sudden urge to breathe. 🤯🌊 #AquaticAdventures #UnderwaterBasketweaving"
10. "I'm not sure what's more impressive: my underwater basketweaving skills or the fact that I didn't drown trying to make this tweet. 😅🧵 #Accomplishment
#UnderwaterBasketweaving"
```

View File

@@ -1,7 +1,7 @@
# Modelfile for creating a devops engineer assistant
# Run `ollama create devops-engineer -f ./Modelfile` and then `ollama run devops-engineer` and enter a topic
FROM llama2:13b
FROM mistral
PARAMETER temperature 1
SYSTEM """
You are a senior devops engineer, acting as an assistant. You offer help with cloud technologies like: Terraform, AWS, kubernetes, python. You answer with code examples when possible

View File

@@ -0,0 +1,40 @@
# DevOps Engineer Modelfile
This is a simple modelfile that takes will respond to any questions that a senior DevOps engineer should know how to solve.
````text
ollama create devopsengineer
ollama run devopsengineer
>>> how can i proxy all connections from inside the office through a single server that caches large files. Provide a config file for nginx to achieve this?
Here is an example configuration file for Nginx that can be used to proxy all connections from inside the office network to a centralized cache server:
```perl
http {
upstream cache_server {
server <cache_server_ip>;
server <cache_server_ip>;
server <cache_server_ip>;
}
server {
listen 80;
server_name localhost;
location / {
proxy_pass http://cache_server;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
}
}
}
```
In this configuration, we define an `upstream` block that lists the IP addresses of the cache servers. We then define a server block that listens on port 80 and routes all
traffic to the `/` location to the cache servers using the `proxy_pass` directive. The `proxy_set_header` directives are used to preserve the source IP address of the client
request when forwarding it to the cache server.
To use this configuration, you would need to replace the placeholder `<cache_server_ip>` with the actual IP addresses of your cache servers. You would also need to make sure
that the cache servers are configured to accept incoming connections from the Nginx server and handle requests for files.
````

View File

Before

Width:  |  Height:  |  Size: 446 KiB

After

Width:  |  Height:  |  Size: 446 KiB

View File

@@ -0,0 +1,11 @@
# Modelfile for creating a Midjourney prompts from a topic
# This prompt was adapted from the original at https://www.greataiprompts.com/guide/midjourney/best-chatgpt-prompt-for-midjourney/
# Run `ollama create mj -f ./Modelfile` and then `ollama run mj` and enter a topic
FROM zephyr
PARAMETER temperature 0.8
PARAMETER top_k 500
PARAMETER top_p 0.9
SYSTEM """
Embrace your role as a creative illustrator. Based on a concept provided, you must produce a single paragraph with a multifaceted description of an image, ensuring significant details of the concept and more is represented in your instructions. You do not need to write complete sentences but rather short concepts with the following information: the level of detail that should be represented, an artistic style and maybe a specific name of a painter or illustrator, the ideal color pallete, lighting, mood, perspective, the setting, time of day, weather, the season, the time period, location, materials, the textures, patterns, lines, brushstrokes, techniques, the medium, the genre, the rendering style. Don't include everything and keep the description length under 250 words.
"""

View File

@@ -0,0 +1,11 @@
# Midjourney Prompt Generator Modelfile
This simple modelfile will help create a prompt to feed to Midjourney.
```text
ollama create midjourney
ollama run midjourney
>>> a sports car in the mountains.
A sleek, high-performance automobile cuts through a serpentine mountain landscape. The concept is a classic illustration of speed and power, depicted in the style of pop art by Andy Warhol. The color palette is dominated by bold, primary hues of red, blue, and yellow, with striking accent colors of white, black, and metallic shades. The lighting is bright and focused, casting sharp shadows on the rugged terrain. A sense of excitement and anticipation permeates throughout the scene, as the car navigates a treacherous course through the winding road. The perspective is low, allowing for a full view of the vehicle's sleek lines and intricate details. The setting takes place in the afternoon during a sunny day in autumn, as evidenced by the vibrant foliage on the mountainside. The time period is modern, with nods to classic car design. The materials are primarily digital, allowing for smooth curves and sharp contrasts. The textures are sleek and polished, with meticulously detailed lines and brushstrokes that accentuate the car's aerodynamic design. The patterns consist of geometric shapes and bold stripes, adding to the car's dynamic appeal. The genre is modern realism, with a focus on precision and detail. The rendering style is highly technical, capturing the nuances and subtleties of the vehicle and its surroundings in breathtaking detail.
```

View File

@@ -0,0 +1,20 @@
# Recipe Maker Modelfile
Simple modelfile to generate a recipe from a short list of ingredients.
```
ollama create recipemaker
ollama run recipemaker
>>> chilli pepper, white chocolate, kale
Ingredients:
- 1 small chili pepper
- 4 squares of white chocolate
- handful of kale leaves
Instructions:
1. In a blender or food processor, puree the chilies and white chocolate until smooth.
2. Add the chopped kale leaves to the blender and pulse until well combined.
3. Serve immediately as a dip for crackers or use it as an ingredient in your favorite recipe. The mixture of spicy chili pepper with sweet white chocolate and nutritious
kale will make your taste buds dance with delight!
```

View File

@@ -1,4 +1,4 @@
FROM llama2
FROM mistral
SYSTEM """
You are an experienced Devops engineer focused on docker. When given specifications for a particular need or application you know the best way to host that within a docker container. For instance if someone tells you they want an nginx server to host files located at /web you will answer as follows

View File

@@ -0,0 +1,22 @@
# News Summarizer
This example goes through a series of steps:
1. You choose a topic area (e.g., "news", "NVidia", "music", etc.).
2. Gets the most recent articles on that topic from various sources.
3. Uses Ollama to summarize each article.
4. Creates chunks of sentences from each article.
5. Uses Sentence Transformers to generate embeddings for each of those chunks.
6. You enter a question regarding the summaries shown.
7. Uses Sentence Transformers to generate an embedding for that question.
8. Uses the embedded question to find the most similar chunks.
9. Feeds all that to Ollama to generate a good answer to your question based on these news articles.
This example lets you pick from a few different topic areas, then summarize the most recent x articles for that topic. It then creates chunks of sentences from each article and then generates embeddings for each of those chunks.
You can run the example like this:
```bash
pip install -r requirements.txt
python summ.py
```

View File

@@ -0,0 +1,9 @@
beautifulsoup4==4.12.2
feedparser==6.0.10
mattsollamatools==0.0.8
newspaper3k==0.2.8
nltk==3.8.1
numpy==1.24.3
Requests==2.31.0
scikit_learn==1.3.0
sentence_transformers==2.2.2

View File

@@ -0,0 +1,86 @@
import curses
import json
from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search
import requests
from sentence_transformers import SentenceTransformer
from mattsollamatools import chunker
if __name__ == "__main__":
chosen_topic = curses.wrapper(menu)
print("Here is your news summary:\n")
urls = getUrls(chosen_topic, n=5)
model = SentenceTransformer('all-MiniLM-L6-v2')
allEmbeddings = []
for url in urls:
article={}
article['embeddings'] = []
article['url'] = url
text = getArticleText(url)
summary = get_summary(text)
chunks = chunker(text) # Use the chunk_text function from web_utils
embeddings = model.encode(chunks)
for (chunk, embedding) in zip(chunks, embeddings):
item = {}
item['source'] = chunk
item['embedding'] = embedding.tolist() # Convert NumPy array to list
item['sourcelength'] = len(chunk)
article['embeddings'].append(item)
allEmbeddings.append(article)
print(f"{summary}\n")
while True:
context = []
# Input a question from the user
question = input("Enter your question about the news, or type quit: ")
if question.lower() == 'quit':
break
# Embed the user's question
question_embedding = model.encode([question])
# Perform KNN search to find the best matches (indices and source text)
best_matches = knn_search(question_embedding, allEmbeddings, k=10)
sourcetext=""
for i, (index, source_text) in enumerate(best_matches, start=1):
sourcetext += f"{i}. Index: {index}, Source Text: {source_text}"
systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
url = "http://localhost:11434/api/generate"
payload = {
"model": "mistral-openorca",
"prompt": question,
"system": systemPrompt,
"stream": False,
"context": context
}
# Convert the payload to a JSON string
payload_json = json.dumps(payload)
# Set the headers to specify JSON content
headers = {
"Content-Type": "application/json"
}
# Send the POST request
response = requests.post(url, data=payload_json, headers=headers)
# Check the response
if response.status_code == 200:
output = json.loads(response.text)
context = output['context']
print(output['response']+ "\n")
else:
print(f"Request failed with status code {response.status_code}")

View File

@@ -0,0 +1,108 @@
import curses
import feedparser
import requests
import unicodedata
import json
from newspaper import Article
from bs4 import BeautifulSoup
from nltk.tokenize import sent_tokenize, word_tokenize
import numpy as np
from sklearn.neighbors import NearestNeighbors
from mattsollamatools import chunker
# Create a dictionary to store topics and their URLs
topic_urls = {
"Mac": "https://9to5mac.com/guides/mac/feed",
"News": "http://www.npr.org/rss/rss.php?id=1001",
"Nvidia": "https://nvidianews.nvidia.com/releases.xml",
"Raspberry Pi": "https://www.raspberrypi.com/news/feed/",
"Music": "https://www.billboard.com/c/music/music-news/feed/"
}
# Use curses to create a menu of topics
def menu(stdscr):
chosen_topic = get_url_for_topic(stdscr)
url = topic_urls[chosen_topic] if chosen_topic in topic_urls else "Topic not found"
stdscr.addstr(len(topic_urls) + 3, 0, f"Selected URL for {chosen_topic}: {url}")
stdscr.refresh()
return chosen_topic
# You have chosen a topic. Now return the url for that topic
def get_url_for_topic(stdscr):
curses.curs_set(0) # Hide the cursor
stdscr.clear()
stdscr.addstr(0, 0, "Choose a topic using the arrow keys (Press Enter to select):")
# Create a list of topics
topics = list(topic_urls.keys())
current_topic = 0
while True:
for i, topic in enumerate(topics):
if i == current_topic:
stdscr.addstr(i + 2, 2, f"> {topic}")
else:
stdscr.addstr(i + 2, 2, f" {topic}")
stdscr.refresh()
key = stdscr.getch()
if key == curses.KEY_DOWN and current_topic < len(topics) - 1:
current_topic += 1
elif key == curses.KEY_UP and current_topic > 0:
current_topic -= 1
elif key == 10: # Enter key
return topic_urls[topics[current_topic]]
# Get the last N URLs from an RSS feed
def getUrls(feed_url, n=20):
feed = feedparser.parse(feed_url)
entries = feed.entries[-n:]
urls = [entry.link for entry in entries]
return urls
# Often there are a bunch of ads and menus on pages for a news article. This uses newspaper3k to get just the text of just the article.
def getArticleText(url):
article = Article(url)
article.download()
article.parse()
return article.text
def get_summary(text):
systemPrompt = "Write a concise summary of the text, return your responses with 5 lines that cover the key points of the text given."
prompt = text
url = "http://localhost:11434/api/generate"
payload = {
"model": "mistral-openorca",
"prompt": prompt,
"system": systemPrompt,
"stream": False
}
payload_json = json.dumps(payload)
headers = {"Content-Type": "application/json"}
response = requests.post(url, data=payload_json, headers=headers)
return json.loads(response.text)["response"]
# Perform K-nearest neighbors (KNN) search
def knn_search(question_embedding, embeddings, k=5):
X = np.array([item['embedding'] for article in embeddings for item in article['embeddings']])
source_texts = [item['source'] for article in embeddings for item in article['embeddings']]
# Fit a KNN model on the embeddings
knn = NearestNeighbors(n_neighbors=k, metric='cosine')
knn.fit(X)
# Find the indices and distances of the k-nearest neighbors
distances, indices = knn.kneighbors(question_embedding, n_neighbors=k)
# Get the indices and source texts of the best matches
best_matches = [(indices[0][i], source_texts[indices[0][i]]) for i in range(k)]
return best_matches

View File

@@ -0,0 +1,2 @@
node_modules
package-lock.json

View File

@@ -0,0 +1,21 @@
# Ask the Mentors
This example demonstrates how one would create a set of 'mentors' you can have a conversation with. The mentors are generated using the `character-generator.ts` file. This will use **Stable Beluga 70b** to create a bio and list of verbal ticks and common phrases used by each person. Then `mentors.ts` will take a question, and choose three of the 'mentors' and start a conversation with them. Occasionally, they will talk to each other, and other times they will just deliver a set of monologues. It's fun to see what they do and say.
## Usage
```bash
ts-node ./character-generator.ts "Lorne Greene"
```
This will create `lornegreene/Modelfile`. Now you can create a model with this command:
```bash
ollama create lornegreene -f lornegreene/Modelfile
```
If you want to add your own mentors, you will have to update the code to look at your namespace instead of **mattw**. Also set the list of mentors to include yours.
```bash
ts-node ./mentors.ts "What is a Jackalope?"
```

View File

@@ -0,0 +1,26 @@
import { Ollama } from 'ollama-node'
import fs from 'fs';
import path from 'path';
async function characterGenerator() {
const character = process.argv[2];
console.log(`You are creating a character for ${character}.`);
const foldername = character.replace(/\s/g, '').toLowerCase();
const directory = path.join(__dirname, foldername);
if (!fs.existsSync(directory)) {
fs.mkdirSync(directory, { recursive: true });
}
const ollama = new Ollama();
ollama.setModel("stablebeluga2:70b-q4_K_M");
const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);
const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
if (err) throw err;
console.log('The file has been saved!');
});
}
characterGenerator();

View File

@@ -0,0 +1,59 @@
import { Ollama } from 'ollama-node';
const mentorCount = 3;
const ollama = new Ollama();
function getMentors(): string[] {
const mentors = ['Gary Vaynerchuk', 'Kanye West', 'Martha Stewart', 'Neil deGrasse Tyson', 'Owen Wilson', 'Ronald Reagan', 'Donald Trump', 'Barack Obama', 'Jeff Bezos'];
const chosenMentors: string[] = [];
for (let i = 0; i < mentorCount; i++) {
const mentor = mentors[Math.floor(Math.random() * mentors.length)];
chosenMentors.push(mentor);
mentors.splice(mentors.indexOf(mentor), 1);
}
return chosenMentors;
}
function getMentorFileName(mentor: string): string {
const model = mentor.toLowerCase().replace(/\s/g, '');
return `mattw/${model}`;
}
async function getSystemPrompt(mentor: string, isLast: boolean, question: string): Promise<string> {
ollama.setModel(getMentorFileName(mentor));
const info = await ollama.showModelInfo()
let SystemPrompt = info.system || '';
SystemPrompt += ` You should continue the conversation as if you were ${mentor} and acknowledge the people before you in the conversation. You should adopt their mannerisms and tone, but also not use language they wouldn't use. If they are not known to know about the concept in the question, don't offer an answer. Your answer should be no longer than 1 paragraph. And definitely try not to sound like anyone else. Don't repeat any slang or phrases already used. And if it is a question the original ${mentor} wouldn't have know the answer to, just say that you don't know, in the style of ${mentor}. And think about the time the person lived. Don't use terminology that they wouldn't have used.`
if (isLast) {
SystemPrompt += ` End your answer with something like I hope our answers help you out`;
} else {
SystemPrompt += ` Remember, this is a conversation, so you don't need a conclusion, but end your answer with a question related to the first question: "${question}".`;
}
return SystemPrompt;
}
async function main() {
const mentors = getMentors();
const question = process.argv[2];
let theConversation = `Here is the conversation so far.\nYou: ${question}\n`
for await (const mentor of mentors) {
const SystemPrompt = await getSystemPrompt(mentor, mentor === mentors[mentorCount - 1], question);
ollama.setModel(getMentorFileName(mentor));
ollama.setSystemPrompt(SystemPrompt);
let output = '';
process.stdout.write(`\n${mentor}: `);
for await (const chunk of ollama.streamingGenerate(theConversation + `Continue the conversation as if you were ${mentor} on the question "${question}".`)) {
if (chunk.response) {
output += chunk.response;
process.stdout.write(chunk.response);
} else {
process.stdout.write('\n');
}
}
theConversation += `${mentor}: ${output}\n\n`
}
}
main();

View File

@@ -0,0 +1,7 @@
{
"dependencies": {
"fs": "^0.0.1-security",
"ollama-node": "^0.0.3",
"path": "^0.12.7"
}
}

23
format/bytes.go Normal file
View File

@@ -0,0 +1,23 @@
package format
import "fmt"
const (
Byte = 1
KiloByte = Byte * 1000
MegaByte = KiloByte * 1000
GigaByte = MegaByte * 1000
)
func HumanBytes(b int64) string {
switch {
case b > GigaByte:
return fmt.Sprintf("%d GB", b/GigaByte)
case b > MegaByte:
return fmt.Sprintf("%d MB", b/MegaByte)
case b > KiloByte:
return fmt.Sprintf("%d KB", b/KiloByte)
default:
return fmt.Sprintf("%d B", b)
}
}

View File

@@ -7,26 +7,14 @@ import (
"time"
)
// HumanDuration returns a human-readable approximation of a duration
// (eg. "About a minute", "4 hours ago", etc.).
// Modified version of github.com/docker/go-units.HumanDuration
func HumanDuration(d time.Duration) string {
return HumanDurationWithCase(d, true)
}
// HumanDurationWithCase returns a human-readable approximation of a
// duration (eg. "About a minute", "4 hours ago", etc.). but allows
// you to specify whether the first word should be capitalized
// (eg. "About" vs. "about")
func HumanDurationWithCase(d time.Duration, useCaps bool) string {
// humanDuration returns a human-readable approximation of a
// duration (eg. "About a minute", "4 hours ago", etc.).
func humanDuration(d time.Duration) string {
seconds := int(d.Seconds())
switch {
case seconds < 1:
if useCaps {
return "Less than a second"
}
return "less than a second"
return "Less than a second"
case seconds == 1:
return "1 second"
case seconds < 60:
@@ -36,10 +24,7 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string {
minutes := int(d.Minutes())
switch {
case minutes == 1:
if useCaps {
return "About a minute"
}
return "about a minute"
return "About a minute"
case minutes < 60:
return fmt.Sprintf("%d minutes", minutes)
}
@@ -47,10 +32,7 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string {
hours := int(math.Round(d.Hours()))
switch {
case hours == 1:
if useCaps {
return "About an hour"
}
return "about an hour"
return "About an hour"
case hours < 48:
return fmt.Sprintf("%d hours", hours)
case hours < 24*7*2:
@@ -65,77 +47,22 @@ func HumanDurationWithCase(d time.Duration, useCaps bool) string {
}
func HumanTime(t time.Time, zeroValue string) string {
return humanTimeWithCase(t, zeroValue, true)
return humanTime(t, zeroValue)
}
func HumanTimeLower(t time.Time, zeroValue string) string {
return humanTimeWithCase(t, zeroValue, false)
return strings.ToLower(humanTime(t, zeroValue))
}
func humanTimeWithCase(t time.Time, zeroValue string, useCaps bool) string {
func humanTime(t time.Time, zeroValue string) string {
if t.IsZero() {
return zeroValue
}
delta := time.Since(t)
if delta < 0 {
return HumanDurationWithCase(-delta, useCaps) + " from now"
return humanDuration(-delta) + " from now"
}
return HumanDurationWithCase(delta, useCaps) + " ago"
}
// ExcatDuration returns a human readable hours/minutes/seconds or milliseconds format of a duration
// the most precise level of duration is milliseconds
func ExactDuration(d time.Duration) string {
if d.Seconds() < 1 {
if d.Milliseconds() == 1 {
return fmt.Sprintf("%d millisecond", d.Milliseconds())
}
return fmt.Sprintf("%d milliseconds", d.Milliseconds())
}
var readableDur strings.Builder
dur := d.String()
// split the default duration string format of 0h0m0s into something nicer to read
h := strings.Split(dur, "h")
if len(h) > 1 {
hours := h[0]
if hours == "1" {
readableDur.WriteString(fmt.Sprintf("%s hour ", hours))
} else {
readableDur.WriteString(fmt.Sprintf("%s hours ", hours))
}
dur = h[1]
}
m := strings.Split(dur, "m")
if len(m) > 1 {
mins := m[0]
switch mins {
case "0":
// skip
case "1":
readableDur.WriteString(fmt.Sprintf("%s minute ", mins))
default:
readableDur.WriteString(fmt.Sprintf("%s minutes ", mins))
}
dur = m[1]
}
s := strings.Split(dur, "s")
if len(s) > 0 {
sec := s[0]
switch sec {
case "0":
// skip
case "1":
readableDur.WriteString(fmt.Sprintf("%s second ", sec))
default:
readableDur.WriteString(fmt.Sprintf("%s seconds ", sec))
}
}
return strings.TrimSpace(readableDur.String())
return humanDuration(delta) + " ago"
}

View File

@@ -11,92 +11,25 @@ func assertEqual(t *testing.T, a interface{}, b interface{}) {
}
}
func TestHumanDuration(t *testing.T) {
day := 24 * time.Hour
week := 7 * day
month := 30 * day
year := 365 * day
assertEqual(t, "Less than a second", HumanDuration(450*time.Millisecond))
assertEqual(t, "Less than a second", HumanDurationWithCase(450*time.Millisecond, true))
assertEqual(t, "less than a second", HumanDurationWithCase(450*time.Millisecond, false))
assertEqual(t, "1 second", HumanDuration(1*time.Second))
assertEqual(t, "45 seconds", HumanDuration(45*time.Second))
assertEqual(t, "46 seconds", HumanDuration(46*time.Second))
assertEqual(t, "59 seconds", HumanDuration(59*time.Second))
assertEqual(t, "About a minute", HumanDuration(60*time.Second))
assertEqual(t, "About a minute", HumanDurationWithCase(1*time.Minute, true))
assertEqual(t, "about a minute", HumanDurationWithCase(1*time.Minute, false))
assertEqual(t, "3 minutes", HumanDuration(3*time.Minute))
assertEqual(t, "35 minutes", HumanDuration(35*time.Minute))
assertEqual(t, "35 minutes", HumanDuration(35*time.Minute+40*time.Second))
assertEqual(t, "45 minutes", HumanDuration(45*time.Minute))
assertEqual(t, "45 minutes", HumanDuration(45*time.Minute+40*time.Second))
assertEqual(t, "46 minutes", HumanDuration(46*time.Minute))
assertEqual(t, "59 minutes", HumanDuration(59*time.Minute))
assertEqual(t, "About an hour", HumanDuration(1*time.Hour))
assertEqual(t, "About an hour", HumanDurationWithCase(1*time.Hour+29*time.Minute, true))
assertEqual(t, "about an hour", HumanDurationWithCase(1*time.Hour+29*time.Minute, false))
assertEqual(t, "2 hours", HumanDuration(1*time.Hour+31*time.Minute))
assertEqual(t, "2 hours", HumanDuration(1*time.Hour+59*time.Minute))
assertEqual(t, "3 hours", HumanDuration(3*time.Hour))
assertEqual(t, "3 hours", HumanDuration(3*time.Hour+29*time.Minute))
assertEqual(t, "4 hours", HumanDuration(3*time.Hour+31*time.Minute))
assertEqual(t, "4 hours", HumanDuration(3*time.Hour+59*time.Minute))
assertEqual(t, "4 hours", HumanDuration(3*time.Hour+60*time.Minute))
assertEqual(t, "24 hours", HumanDuration(24*time.Hour))
assertEqual(t, "36 hours", HumanDuration(1*day+12*time.Hour))
assertEqual(t, "2 days", HumanDuration(2*day))
assertEqual(t, "7 days", HumanDuration(7*day))
assertEqual(t, "13 days", HumanDuration(13*day+5*time.Hour))
assertEqual(t, "2 weeks", HumanDuration(2*week))
assertEqual(t, "2 weeks", HumanDuration(2*week+4*day))
assertEqual(t, "3 weeks", HumanDuration(3*week))
assertEqual(t, "4 weeks", HumanDuration(4*week))
assertEqual(t, "4 weeks", HumanDuration(4*week+3*day))
assertEqual(t, "4 weeks", HumanDuration(1*month))
assertEqual(t, "6 weeks", HumanDuration(1*month+2*week))
assertEqual(t, "2 months", HumanDuration(2*month))
assertEqual(t, "2 months", HumanDuration(2*month+2*week))
assertEqual(t, "3 months", HumanDuration(3*month))
assertEqual(t, "3 months", HumanDuration(3*month+1*week))
assertEqual(t, "5 months", HumanDuration(5*month+2*week))
assertEqual(t, "13 months", HumanDuration(13*month))
assertEqual(t, "23 months", HumanDuration(23*month))
assertEqual(t, "24 months", HumanDuration(24*month))
assertEqual(t, "2 years", HumanDuration(24*month+2*week))
assertEqual(t, "3 years", HumanDuration(3*year+2*month))
}
func TestHumanTime(t *testing.T) {
now := time.Now()
t.Run("zero value", func(t *testing.T) {
assertEqual(t, HumanTime(time.Time{}, "never"), "never")
})
t.Run("time in the future", func(t *testing.T) {
v := now.Add(48 * time.Hour)
assertEqual(t, HumanTime(v, ""), "2 days from now")
})
t.Run("time in the past", func(t *testing.T) {
v := now.Add(-48 * time.Hour)
assertEqual(t, HumanTime(v, ""), "2 days ago")
})
}
func TestExactDuration(t *testing.T) {
assertEqual(t, "1 millisecond", ExactDuration(1*time.Millisecond))
assertEqual(t, "10 milliseconds", ExactDuration(10*time.Millisecond))
assertEqual(t, "1 second", ExactDuration(1*time.Second))
assertEqual(t, "10 seconds", ExactDuration(10*time.Second))
assertEqual(t, "1 minute", ExactDuration(1*time.Minute))
assertEqual(t, "10 minutes", ExactDuration(10*time.Minute))
assertEqual(t, "1 hour", ExactDuration(1*time.Hour))
assertEqual(t, "10 hours", ExactDuration(10*time.Hour))
assertEqual(t, "1 hour 1 second", ExactDuration(1*time.Hour+1*time.Second))
assertEqual(t, "1 hour 10 seconds", ExactDuration(1*time.Hour+10*time.Second))
assertEqual(t, "1 hour 1 minute", ExactDuration(1*time.Hour+1*time.Minute))
assertEqual(t, "1 hour 10 minutes", ExactDuration(1*time.Hour+10*time.Minute))
assertEqual(t, "1 hour 1 minute 1 second", ExactDuration(1*time.Hour+1*time.Minute+1*time.Second))
assertEqual(t, "10 hours 10 minutes 10 seconds", ExactDuration(10*time.Hour+10*time.Minute+10*time.Second))
t.Run("soon", func(t *testing.T) {
v := now.Add(800 * time.Millisecond)
assertEqual(t, HumanTime(v, ""), "Less than a second from now")
})
}

4
go.mod
View File

@@ -8,7 +8,9 @@ require (
github.com/mattn/go-runewidth v0.0.14
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db
github.com/olekukonko/tablewriter v0.0.5
github.com/pdevine/readline v1.5.2
github.com/spf13/cobra v1.7.0
golang.org/x/sync v0.3.0
)
require github.com/rivo/uniseg v0.2.0 // indirect
@@ -16,7 +18,6 @@ require github.com/rivo/uniseg v0.2.0 // indirect
require (
github.com/bytedance/sonic v1.9.1 // indirect
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
github.com/chzyer/readline v1.5.1
github.com/gabriel-vasile/mimetype v1.4.2 // indirect
github.com/gin-contrib/cors v1.4.0
github.com/gin-contrib/sse v0.1.0 // indirect
@@ -44,7 +45,6 @@ require (
golang.org/x/sys v0.11.0 // indirect
golang.org/x/term v0.10.0
golang.org/x/text v0.10.0 // indirect
gonum.org/v1/gonum v0.13.0
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

9
go.sum
View File

@@ -6,8 +6,6 @@ github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhD
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM=
github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ=
github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI=
github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk=
github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04=
github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
@@ -80,6 +78,8 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
github.com/pdevine/readline v1.5.2 h1:oz6Y5GdTmhPG+08hhxcAvtHitSANWuA2100Sppb38xI=
github.com/pdevine/readline v1.5.2/go.mod h1:na/LbuE5PYwxI7GyopWdIs3U8HVe89lYlNTFTXH3wOw=
github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
@@ -120,12 +120,13 @@ golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
golang.org/x/exp v0.0.0-20230321023759-10a507213a29 h1:ooxPy7fPvB4kwsA2h+iBNHkAbp/4JxTSwCmvdjEYmug=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
@@ -144,8 +145,6 @@ golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.13.0 h1:a0T3bh+7fhRyqeNbiC3qVHYmkiQgit3wnNan/2c0HMM=
gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=

20
llm/falcon.go Normal file
View File

@@ -0,0 +1,20 @@
package llm
const (
falconModelType7B = 32
falconModelType40B = 60
falconModelType180B = 80
)
func falconModelType(numLayer uint32) string {
switch numLayer {
case 32:
return "7B"
case 60:
return "40B"
case 80:
return "180B"
default:
return "unknown"
}
}

View File

@@ -3,72 +3,96 @@ package llm
import (
"encoding/binary"
"errors"
"fmt"
"io"
)
type ModelFamily string
type ModelType uint32
const (
ModelType3B ModelType = 26
ModelType7B ModelType = 32
ModelType13B ModelType = 40
ModelType34B ModelType = 48
ModelType30B ModelType = 60
ModelType65B ModelType = 80
)
func (mt ModelType) String() string {
switch mt {
case ModelType3B:
return "3B"
case ModelType7B:
return "7B"
case ModelType13B:
return "13B"
case ModelType34B:
return "34B"
case ModelType30B:
return "30B"
case ModelType65B:
return "65B"
default:
return "Unknown"
}
}
type FileType interface {
String() string
}
type GGML struct {
magic uint32
container
model
}
const (
fileTypeF32 uint32 = iota
fileTypeF16
fileTypeQ4_0
fileTypeQ4_1
fileTypeQ4_1_F16
fileTypeQ8_0 uint32 = iota + 2
fileTypeQ5_0
fileTypeQ5_1
fileTypeQ2_K
fileTypeQ3_K_S
fileTypeQ3_K_M
fileTypeQ3_K_L
fileTypeQ4_K_S
fileTypeQ4_K_M
fileTypeQ5_K_S
fileTypeQ5_K_M
fileTypeQ6_K
)
func fileType(fileType uint32) string {
switch fileType {
case fileTypeF32:
return "F32"
case fileTypeF16:
return "F16"
case fileTypeQ4_0:
return "Q4_0"
case fileTypeQ4_1:
return "Q4_1"
case fileTypeQ4_1_F16:
return "Q4_1_F16"
case fileTypeQ8_0:
return "Q8_0"
case fileTypeQ5_0:
return "Q5_0"
case fileTypeQ5_1:
return "Q5_1"
case fileTypeQ2_K:
return "Q2_K"
case fileTypeQ3_K_S:
return "Q3_K_S"
case fileTypeQ3_K_M:
return "Q3_K_M"
case fileTypeQ3_K_L:
return "Q3_K_L"
case fileTypeQ4_K_S:
return "Q4_K_S"
case fileTypeQ4_K_M:
return "Q4_K_M"
case fileTypeQ5_K_S:
return "Q5_K_S"
case fileTypeQ5_K_M:
return "Q5_K_M"
case fileTypeQ6_K:
return "Q6_K"
default:
return "unknown"
}
}
type model interface {
ModelFamily() ModelFamily
ModelType() ModelType
FileType() FileType
ModelFamily() string
ModelType() string
FileType() string
NumLayers() int64
}
type container interface {
Name() string
Decode(io.Reader) error
Decode(io.Reader) (model, error)
}
type containerGGML struct {
}
type containerGGML struct{}
func (c *containerGGML) Name() string {
return "ggml"
}
func (c *containerGGML) Decode(r io.Reader) error {
return nil
func (c *containerGGML) Decode(r io.Reader) (model, error) {
return nil, nil
}
type containerGGMF struct {
@@ -79,18 +103,18 @@ func (c *containerGGMF) Name() string {
return "ggmf"
}
func (c *containerGGMF) Decode(r io.Reader) error {
func (c *containerGGMF) Decode(r io.Reader) (model, error) {
var version uint32
binary.Read(r, binary.LittleEndian, &version)
switch version {
case 1:
default:
return errors.New("invalid version")
return nil, errors.New("invalid version")
}
c.version = version
return nil
return nil, nil
}
type containerGGJT struct {
@@ -101,18 +125,22 @@ func (c *containerGGJT) Name() string {
return "ggjt"
}
func (c *containerGGJT) Decode(r io.Reader) error {
func (c *containerGGJT) Decode(r io.Reader) (model, error) {
var version uint32
binary.Read(r, binary.LittleEndian, &version)
switch version {
case 1, 2, 3:
default:
return errors.New("invalid version")
return nil, errors.New("invalid version")
}
c.version = version
return nil
// different model types may have different layouts for hyperparameters
var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
return &llama, nil
}
type containerLORA struct {
@@ -123,32 +151,34 @@ func (c *containerLORA) Name() string {
return "ggla"
}
func (c *containerLORA) Decode(r io.Reader) error {
func (c *containerLORA) Decode(r io.Reader) (model, error) {
var version uint32
binary.Read(r, binary.LittleEndian, &version)
switch version {
case 1:
default:
return errors.New("invalid version")
return nil, errors.New("invalid version")
}
c.version = version
return nil
return nil, nil
}
const (
// / Magic constant for `ggml` files (unversioned).
// Magic constant for `ggml` files (unversioned).
FILE_MAGIC_GGML = 0x67676d6c
// / Magic constant for `ggml` files (versioned, ggmf).
// Magic constant for `ggml` files (versioned, ggmf).
FILE_MAGIC_GGMF = 0x67676d66
// / Magic constant for `ggml` files (versioned, ggjt).
// Magic constant for `ggml` files (versioned, ggjt).
FILE_MAGIC_GGJT = 0x67676a74
// / Magic constant for `ggla` files (LoRA adapter).
// Magic constant for `ggla` files (LoRA adapter).
FILE_MAGIC_GGLA = 0x67676C61
// Magic constant for `gguf` files (versioned, gguf)
FILE_MAGIC_GGUF = 0x46554747
)
func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) {
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
var ggml GGML
binary.Read(r, binary.LittleEndian, &ggml.magic)
@@ -161,24 +191,18 @@ func DecodeGGML(r io.ReadSeeker, hint ModelFamily) (*GGML, error) {
ggml.container = &containerGGJT{}
case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{}
case FILE_MAGIC_GGUF:
ggml.container = &containerGGUF{}
default:
return nil, errors.New("invalid file magic")
}
if err := ggml.Decode(r); err != nil {
model, err := ggml.Decode(r)
if err != nil {
return nil, err
}
// different model types may have different layouts for hyperparameters
switch hint {
case ModelFamilyLlama:
var llama llamaModel
binary.Read(r, binary.LittleEndian, &llama.hyperparameters)
ggml.model = &llama
// TODO: sanity check hyperparameters
default:
return nil, fmt.Errorf("unsupported model type: %s", hint)
}
ggml.model = model
// final model type
return &ggml, nil

View File

@@ -1,687 +0,0 @@
package llm
import (
"bufio"
"bytes"
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"log"
"math/rand"
"net/http"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/jmorganca/ollama/api"
)
const ModelFamilyLlama ModelFamily = "llama"
//go:embed llama.cpp/ggml/build/*/bin/*
var llamaCppEmbed embed.FS
var (
ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
)
var (
ggmlInit sync.Once
ggmlRunnerPath string
)
func osPath(llamaPath string) string {
if runtime.GOOS == "windows" {
return path.Join(llamaPath, "Release")
}
return llamaPath
}
func initGGML() {
ggmlInit.Do(func() {
tmpDir, err := os.MkdirTemp("", "llama-*")
if err != nil {
log.Fatalf("llama.cpp: failed to create temp dir: %v", err)
}
llamaPath := osPath(ggmlGPU)
if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
llamaPath = osPath(ggmlCPU)
if _, err := fs.Stat(llamaCppEmbed, llamaPath); err != nil {
log.Fatalf("llama.cpp executable not found")
}
}
files := []string{"server"}
switch runtime.GOOS {
case "windows":
files = []string{"server.exe"}
case "darwin":
if llamaPath == osPath(ggmlGPU) {
files = append(files, "ggml-metal.metal")
}
}
for _, f := range files {
srcPath := path.Join(llamaPath, f)
destPath := filepath.Join(tmpDir, f)
srcFile, err := llamaCppEmbed.Open(srcPath)
if err != nil {
log.Fatalf("read llama.cpp %s: %v", f, err)
}
defer srcFile.Close()
destFile, err := os.OpenFile(destPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
log.Fatalf("write llama.cpp %s: %v", f, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil {
log.Fatalf("copy llama.cpp %s: %v", f, err)
}
}
ggmlRunnerPath = filepath.Join(tmpDir, "server")
if runtime.GOOS == "windows" {
ggmlRunnerPath = filepath.Join(tmpDir, "server.exe")
}
})
}
type ModelRunner struct {
Path string // path to the model runner executable
}
func ggmlRunner() ModelRunner {
initGGML()
return ModelRunner{Path: ggmlRunnerPath}
}
type llamaModel struct {
hyperparameters llamaHyperparameters
}
func (llm *llamaModel) ModelFamily() ModelFamily {
return ModelFamilyLlama
}
func (llm *llamaModel) ModelType() ModelType {
switch llm.hyperparameters.NumLayer {
case 26:
return ModelType3B
case 32:
return ModelType7B
case 40:
return ModelType13B
case 48:
return ModelType34B
case 60:
return ModelType30B
case 80:
return ModelType65B
}
// TODO: find a better default
return ModelType7B
}
func (llm *llamaModel) FileType() FileType {
return llm.hyperparameters.FileType
}
type llamaHyperparameters struct {
// NumVocab is the size of the model's vocabulary.
NumVocab uint32
// NumEmbd is the size of the model's embedding layer.
NumEmbd uint32
NumMult uint32
NumHead uint32
// NumLayer is the number of layers in the model.
NumLayer uint32
NumRot uint32
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
FileType llamaFileType
}
type llamaFileType uint32
const (
llamaFileTypeF32 llamaFileType = iota
llamaFileTypeF16
llamaFileTypeQ4_0
llamaFileTypeQ4_1
llamaFileTypeQ4_1_F16
llamaFileTypeQ8_0 llamaFileType = iota + 2
llamaFileTypeQ5_0
llamaFileTypeQ5_1
llamaFileTypeQ2_K
llamaFileTypeQ3_K_S
llamaFileTypeQ3_K_M
llamaFileTypeQ3_K_L
llamaFileTypeQ4_K_S
llamaFileTypeQ4_K_M
llamaFileTypeQ5_K_S
llamaFileTypeQ5_K_M
llamaFileTypeQ6_K
)
func (ft llamaFileType) String() string {
switch ft {
case llamaFileTypeF32:
return "F32"
case llamaFileTypeF16:
return "F16"
case llamaFileTypeQ4_0:
return "Q4_0"
case llamaFileTypeQ4_1:
return "Q4_1"
case llamaFileTypeQ4_1_F16:
return "Q4_1_F16"
case llamaFileTypeQ8_0:
return "Q8_0"
case llamaFileTypeQ5_0:
return "Q5_0"
case llamaFileTypeQ5_1:
return "Q5_1"
case llamaFileTypeQ2_K:
return "Q2_K"
case llamaFileTypeQ3_K_S:
return "Q3_K_S"
case llamaFileTypeQ3_K_M:
return "Q3_K_M"
case llamaFileTypeQ3_K_L:
return "Q3_K_L"
case llamaFileTypeQ4_K_S:
return "Q4_K_S"
case llamaFileTypeQ4_K_M:
return "Q4_K_M"
case llamaFileTypeQ5_K_S:
return "Q5_K_S"
case llamaFileTypeQ5_K_M:
return "Q5_K_M"
case llamaFileTypeQ6_K:
return "Q6_K"
default:
return "Unknown"
}
}
type Running struct {
Port int
Cmd *exec.Cmd
Cancel context.CancelFunc
}
type llama struct {
api.Options
Running
}
func newLlama(model string, adapters []string, runner ModelRunner, opts api.Options) (*llama, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
}
if _, err := os.Stat(runner.Path); err != nil {
return nil, err
}
if len(adapters) > 1 {
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
}
params := []string{
"--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--gqa", fmt.Sprintf("%d", opts.NumGQA),
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", opts.NumGPU),
"--embedding",
}
if len(adapters) > 0 {
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
params = append(params, "--lora", adapters[0])
}
if opts.NumThread > 0 {
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
}
if !opts.F16KV {
params = append(params, "--memory-f32")
}
if opts.UseMLock {
params = append(params, "--mlock")
}
if !opts.UseMMap {
params = append(params, "--no-mmap")
}
if opts.UseNUMA {
params = append(params, "--numa")
}
// start the llama.cpp server with a retry in case the port is already in use
for try := 0; try < 3; try++ {
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
ctx, cancel := context.WithCancel(context.Background())
cmd := exec.CommandContext(
ctx,
runner.Path,
append(params, "--port", strconv.Itoa(port))...,
)
cmd.Stdout = os.Stderr
cmd.Stderr = os.Stderr
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel}}
if err := waitForServer(llm); err != nil {
log.Printf("error starting llama.cpp server: %v", err)
llm.Close()
// try again
continue
}
// server started successfully
return llm, nil
}
return nil, fmt.Errorf("max retry exceeded starting llama.cpp")
}
func waitForServer(llm *llama) error {
log.Print("starting llama.cpp server")
var stderr bytes.Buffer
llm.Cmd.Stderr = &stderr
err := llm.Cmd.Start()
if err != nil {
return fmt.Errorf("error starting the external llama.cpp server: %w", err)
}
exitChan := make(chan error, 1)
// the server is a long running process, watch for it exiting to keep track of something going wrong
go func() {
err := llm.Cmd.Wait()
log.Print(stderr.String())
exitChan <- err
}()
// wait for the server to start responding
start := time.Now()
expiresAt := time.Now().Add(30 * time.Second)
ticker := time.NewTicker(100 * time.Millisecond)
log.Print("waiting for llama.cpp server to start responding")
for {
select {
case <-ticker.C:
if time.Now().After(expiresAt) {
return fmt.Errorf("llama.cpp server did not start responding within 30 seconds, retrying")
}
if err := llm.Ping(context.Background()); err == nil {
log.Printf("llama.cpp server started in %f seconds", time.Since(start).Seconds())
return nil
}
case err := <-exitChan:
return fmt.Errorf("llama.cpp server exited unexpectedly: %w", err)
}
}
}
func (llm *llama) Close() {
llm.Running.Cmd.Cancel()
}
func (llm *llama) SetOptions(opts api.Options) {
llm.Options = opts
}
type GenerationSettings struct {
FrequencyPenalty float64 `json:"frequency_penalty"`
IgnoreEOS bool `json:"ignore_eos"`
LogitBias []interface{} `json:"logit_bias"`
Mirostat int `json:"mirostat"`
MirostatEta float64 `json:"mirostat_eta"`
MirostatTau float64 `json:"mirostat_tau"`
Model string `json:"model"`
NCtx int `json:"n_ctx"`
NKeep int `json:"n_keep"`
NPredict int `json:"n_predict"`
NProbs int `json:"n_probs"`
PenalizeNl bool `json:"penalize_nl"`
PresencePenalty float64 `json:"presence_penalty"`
RepeatLastN int `json:"repeat_last_n"`
RepeatPenalty float64 `json:"repeat_penalty"`
Seed uint32 `json:"seed"`
Stop []string `json:"stop"`
Stream bool `json:"stream"`
Temp float64 `json:"temp"`
TfsZ float64 `json:"tfs_z"`
TopK int `json:"top_k"`
TopP float64 `json:"top_p"`
TypicalP float64 `json:"typical_p"`
}
type Timings struct {
PredictedN int `json:"predicted_n"`
PredictedMS float64 `json:"predicted_ms"`
PromptN int `json:"prompt_n"`
PromptMS float64 `json:"prompt_ms"`
}
type Prediction struct {
Content string `json:"content"`
Model string `json:"model"`
Prompt string `json:"prompt"`
Stop bool `json:"stop"`
Timings `json:"timings"`
}
type PredictRequest struct {
Stream bool `json:"stream"`
NPredict int `json:"n_predict,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"`
TfsZ float32 `json:"tfs_z,omitempty"`
TypicalP float32 `json:"typical_p,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatTau float32 `json:"mirostat_tau,omitempty"`
MirostatEta float32 `json:"mirostat_eta,omitempty"`
PenalizeNl bool `json:"penalize_nl,omitempty"`
NKeep int `json:"n_keep,omitempty"`
Seed int `json:"seed,omitempty"`
Prompt string `json:"prompt,omitempty"`
NProbs int `json:"n_probs,omitempty"`
LogitBias map[int]float32 `json:"logit_bias,omitempty"`
IgnoreEos bool `json:"ignore_eos,omitempty"`
Stop []string `json:"stop,omitempty"`
}
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
prevConvo, err := llm.Decode(ctx, prevContext)
if err != nil {
return err
}
var nextContext strings.Builder
nextContext.WriteString(prevConvo)
nextContext.WriteString(prompt)
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
predReq := PredictRequest{
Prompt: nextContext.String(),
Stream: true,
NPredict: llm.NumPredict,
NKeep: llm.NumKeep,
Temperature: llm.Temperature,
TopK: llm.TopK,
TopP: llm.TopP,
TfsZ: llm.TFSZ,
TypicalP: llm.TypicalP,
RepeatLastN: llm.RepeatLastN,
RepeatPenalty: llm.RepeatPenalty,
PresencePenalty: llm.PresencePenalty,
FrequencyPenalty: llm.FrequencyPenalty,
Mirostat: llm.Mirostat,
MirostatTau: llm.MirostatTau,
MirostatEta: llm.MirostatEta,
PenalizeNl: llm.PenalizeNewline,
Stop: llm.Stop,
}
data, err := json.Marshal(predReq)
if err != nil {
return fmt.Errorf("error marshaling data: %v", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return fmt.Errorf("error creating POST request: %v", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("POST predict: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed reading llm error response: %w", err)
}
log.Printf("llm predict error: %s", bodyBytes)
return fmt.Errorf("%s", bodyBytes)
}
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
select {
case <-ctx.Done():
// This handles the request cancellation
return ctx.Err()
default:
line := scanner.Text()
if line == "" {
continue
}
// Read data from the server-side event stream
if strings.HasPrefix(line, "data: ") {
evt := line[6:]
var p Prediction
if err := json.Unmarshal([]byte(evt), &p); err != nil {
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
}
if p.Content != "" {
fn(api.GenerateResponse{Response: p.Content})
nextContext.WriteString(p.Content)
}
if p.Stop {
embd, err := llm.Encode(ctx, nextContext.String())
if err != nil {
return fmt.Errorf("encoding context: %v", err)
}
fn(api.GenerateResponse{
Done: true,
Context: embd,
PromptEvalCount: p.PromptN,
PromptEvalDuration: parseDurationMs(p.PromptMS),
EvalCount: p.PredictedN,
EvalDuration: parseDurationMs(p.PredictedMS),
})
return nil
}
}
}
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("error reading llm response: %v", err)
}
return nil
}
type TokenizeRequest struct {
Content string `json:"content"`
}
type TokenizeResponse struct {
Tokens []int `json:"tokens"`
}
func (llm *llama) Encode(ctx context.Context, prompt string) ([]int, error) {
endpoint := fmt.Sprintf("http://127.0.0.1:%d/tokenize", llm.Port)
data, err := json.Marshal(TokenizeRequest{Content: prompt})
if err != nil {
return nil, fmt.Errorf("marshaling encode data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return nil, fmt.Errorf("encode request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("do encode request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read encode request: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm encode error: %s", body)
return nil, fmt.Errorf("%s", body)
}
var encoded TokenizeResponse
if err := json.Unmarshal(body, &encoded); err != nil {
return nil, fmt.Errorf("unmarshal encode response: %w", err)
}
return encoded.Tokens, nil
}
type DetokenizeRequest struct {
Tokens []int `json:"tokens"`
}
type DetokenizeResponse struct {
Content string `json:"content"`
}
func (llm *llama) Decode(ctx context.Context, tokens []int) (string, error) {
if len(tokens) == 0 {
return "", nil
}
endpoint := fmt.Sprintf("http://127.0.0.1:%d/detokenize", llm.Port)
data, err := json.Marshal(DetokenizeRequest{Tokens: tokens})
if err != nil {
return "", fmt.Errorf("marshaling decode data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return "", fmt.Errorf("decode request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", fmt.Errorf("do decode request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("read decode request: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm decode error: %s", body)
return "", fmt.Errorf("%s", body)
}
var decoded DetokenizeResponse
if err := json.Unmarshal(body, &decoded); err != nil {
return "", fmt.Errorf("unmarshal encode response: %w", err)
}
// decoded content contains a leading whitespace
decoded.Content, _ = strings.CutPrefix(decoded.Content, "")
return decoded.Content, nil
}
type EmbeddingRequest struct {
Content string `json:"content"`
}
type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"`
}
func (llm *llama) Embedding(ctx context.Context, input string) ([]float64, error) {
endpoint := fmt.Sprintf("http://127.0.0.1:%d/embedding", llm.Port)
data, err := json.Marshal(TokenizeRequest{Content: input})
if err != nil {
return nil, fmt.Errorf("error marshaling embed data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return nil, fmt.Errorf("error creating embed request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("POST embedding: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading embed response: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm encode error: %s", body)
return nil, fmt.Errorf("%s", body)
}
var embedding EmbeddingResponse
if err := json.Unmarshal(body, &embedding); err != nil {
return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
}
return embedding.Embedding, nil
}
// Ping checks that the server subprocess is still running and responding to requests
func (llm *llama) Ping(ctx context.Context) error {
resp, err := http.Head(fmt.Sprintf("http://127.0.0.1:%d", llm.Running.Port))
if err != nil {
return fmt.Errorf("ping resp: %w", err)
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected ping status: %s", resp.Status)
}
return nil
}

383
llm/gguf.go Normal file
View File

@@ -0,0 +1,383 @@
package llm
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
)
type containerGGUF struct {
Version uint32
V1 struct {
NumTensor uint32
NumKV uint32
}
V2 struct {
NumTensor uint64
NumKV uint64
}
}
func (c *containerGGUF) Name() string {
return "gguf"
}
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
binary.Read(r, binary.LittleEndian, &c.Version)
switch c.Version {
case 1:
binary.Read(r, binary.LittleEndian, &c.V1)
case 2:
binary.Read(r, binary.LittleEndian, &c.V2)
default:
return nil, errors.New("invalid version")
}
model := newGGUFModel(c)
if err := model.Decode(r); err != nil {
return nil, err
}
return model, nil
}
const (
ggufTypeUint8 uint32 = iota
ggufTypeInt8
ggufTypeUint16
ggufTypeInt16
ggufTypeUint32
ggufTypeInt32
ggufTypeFloat32
ggufTypeBool
ggufTypeString
ggufTypeArray
ggufTypeUint64
ggufTypeInt64
ggufTypeFloat64
)
type kv map[string]any
type ggufModel struct {
*containerGGUF
kv
}
func newGGUFModel(container *containerGGUF) *ggufModel {
return &ggufModel{
containerGGUF: container,
kv: make(kv),
}
}
func (llm *ggufModel) NumKV() uint64 {
if llm.Version == 1 {
return uint64(llm.V1.NumKV)
}
return llm.V2.NumKV
}
func (llm *ggufModel) ModelFamily() string {
t, ok := llm.kv["general.architecture"].(string)
if ok {
return t
}
return "unknown"
}
func (llm *ggufModel) ModelType() string {
switch llm.ModelFamily() {
case "llama":
if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
heads, headsOK := llm.kv["llama.head_count"].(uint32)
headKVs, headsKVsOK := llm.kv["llama.head_count_kv"].(uint32)
if headsOK && headsKVsOK && heads/headKVs == 8 {
return "70B"
}
return llamaModelType(blocks)
}
case "falcon":
if blocks, ok := llm.kv["falcon.block_count"].(uint32); ok {
return falconModelType(blocks)
}
case "starcoder":
if blocks, ok := llm.kv["starcoder.block_count"].(uint32); ok {
return starCoderModelType(blocks)
}
}
return "unknown"
}
func (llm *ggufModel) FileType() string {
t, ok := llm.kv["general.file_type"].(uint32)
if ok {
return fileType(t)
}
return "unknown"
}
func (llm *ggufModel) Decode(r io.Reader) error {
read := llm.readString
if llm.Version == 1 {
read = llm.readStringV1
}
for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := read(r)
if err != nil {
return err
}
vtype := llm.readU32(r)
var v any
switch vtype {
case ggufTypeUint8:
v = llm.readU8(r)
case ggufTypeInt8:
v = llm.readI8(r)
case ggufTypeUint16:
v = llm.readU16(r)
case ggufTypeInt16:
v = llm.readI16(r)
case ggufTypeUint32:
v = llm.readU32(r)
case ggufTypeInt32:
v = llm.readI32(r)
case ggufTypeUint64:
v = llm.readU64(r)
case ggufTypeInt64:
v = llm.readI64(r)
case ggufTypeFloat32:
v = llm.readF32(r)
case ggufTypeFloat64:
v = llm.readF64(r)
case ggufTypeBool:
v = llm.readBool(r)
case ggufTypeString:
fn := llm.readString
if llm.Version == 1 {
fn = llm.readStringV1
}
s, err := fn(r)
if err != nil {
return err
}
v = s
case ggufTypeArray:
fn := llm.readArray
if llm.Version == 1 {
fn = llm.readArrayV1
}
a, err := fn(r)
if err != nil {
return err
}
v = a
default:
return fmt.Errorf("invalid type: %d", vtype)
}
llm.kv[k] = v
}
return nil
}
func (llm *ggufModel) NumLayers() int64 {
value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
if !exists {
return 0
}
v := value.(uint32)
return int64(v)
}
func (ggufModel) readU8(r io.Reader) uint8 {
var u8 uint8
binary.Read(r, binary.LittleEndian, &u8)
return u8
}
func (ggufModel) readI8(r io.Reader) int8 {
var i8 int8
binary.Read(r, binary.LittleEndian, &i8)
return i8
}
func (ggufModel) readU16(r io.Reader) uint16 {
var u16 uint16
binary.Read(r, binary.LittleEndian, &u16)
return u16
}
func (ggufModel) readI16(r io.Reader) int16 {
var i16 int16
binary.Read(r, binary.LittleEndian, &i16)
return i16
}
func (ggufModel) readU32(r io.Reader) uint32 {
var u32 uint32
binary.Read(r, binary.LittleEndian, &u32)
return u32
}
func (ggufModel) readI32(r io.Reader) int32 {
var i32 int32
binary.Read(r, binary.LittleEndian, &i32)
return i32
}
func (ggufModel) readU64(r io.Reader) uint64 {
var u64 uint64
binary.Read(r, binary.LittleEndian, &u64)
return u64
}
func (ggufModel) readI64(r io.Reader) int64 {
var i64 int64
binary.Read(r, binary.LittleEndian, &i64)
return i64
}
func (ggufModel) readF32(r io.Reader) float32 {
var f32 float32
binary.Read(r, binary.LittleEndian, &f32)
return f32
}
func (ggufModel) readF64(r io.Reader) float64 {
var f64 float64
binary.Read(r, binary.LittleEndian, &f64)
return f64
}
func (ggufModel) readBool(r io.Reader) bool {
var b bool
binary.Read(r, binary.LittleEndian, &b)
return b
}
func (ggufModel) readStringV1(r io.Reader) (string, error) {
var nameLength uint32
binary.Read(r, binary.LittleEndian, &nameLength)
var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
return "", err
}
// gguf v1 strings are null-terminated
b.Truncate(b.Len() - 1)
return b.String(), nil
}
func (llm ggufModel) readString(r io.Reader) (string, error) {
var nameLength uint64
binary.Read(r, binary.LittleEndian, &nameLength)
var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
return "", err
}
return b.String(), nil
}
func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
atype := llm.readU32(r)
n := llm.readU32(r)
for i := 0; uint32(i) < n; i++ {
switch atype {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:
arr = append(arr, llm.readI16(r))
case ggufTypeUint32:
arr = append(arr, llm.readU32(r))
case ggufTypeInt32:
arr = append(arr, llm.readI32(r))
case ggufTypeFloat32:
arr = append(arr, llm.readF32(r))
case ggufTypeBool:
arr = append(arr, llm.readBool(r))
case ggufTypeString:
s, err := llm.readStringV1(r)
if err != nil {
return nil, err
}
arr = append(arr, s)
default:
return nil, fmt.Errorf("invalid array type: %d", atype)
}
}
return
}
func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
atype := llm.readU32(r)
n := llm.readU64(r)
for i := 0; uint64(i) < n; i++ {
switch atype {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:
arr = append(arr, llm.readI16(r))
case ggufTypeUint32:
arr = append(arr, llm.readU32(r))
case ggufTypeInt32:
arr = append(arr, llm.readI32(r))
case ggufTypeUint64:
arr = append(arr, llm.readU64(r))
case ggufTypeInt64:
arr = append(arr, llm.readI64(r))
case ggufTypeFloat32:
arr = append(arr, llm.readF32(r))
case ggufTypeFloat64:
arr = append(arr, llm.readF64(r))
case ggufTypeBool:
arr = append(arr, llm.readBool(r))
case ggufTypeString:
s, err := llm.readString(r)
if err != nil {
return nil, err
}
arr = append(arr, s)
default:
return nil, fmt.Errorf("invalid array type: %d", atype)
}
}
return
}

View File

@@ -1,13 +0,0 @@
//go:build !darwin
// +build !darwin
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cpu --target server --config Release

View File

@@ -1,10 +1,18 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View File

@@ -1,10 +1,18 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../ggml_patch/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../ggml_patch/0002-34B-model-support.patch
//go:generate git -C ggml apply ../ggml_patch/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../ggml_patch/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake --fresh -S ggml -B ggml/build/gpu -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/gpu --target server --config Release
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/metal --target server --config Release
//go:generate mv ggml/build/metal/bin/server ggml/build/metal/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build gguf/build/metal --target server --config Release
//go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner

View File

@@ -0,0 +1,26 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
//go:generate git -C ggml apply ../patches/0001-copy-cuda-runtime-libraries.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cuda --target server --config Release
//go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build gguf/build/cuda --target server --config Release
//go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner

View File

@@ -0,0 +1,16 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate cmd /c move ggml\build\cpu\bin\Release\server.exe ggml\build\cpu\bin\Release\ollama-runner.exe
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe

View File

@@ -1,32 +0,0 @@
From 8c0ea847ac1460bca534d92266e3471cb31471be Mon Sep 17 00:00:00 2001
From: Bruce MacDonald <brucewmacdonald@gmail.com>
Date: Tue, 5 Sep 2023 16:05:08 -0400
Subject: [PATCH] metal: add missing barriers for mul-mat #2699
---
ggml-metal.metal | 2 ++
1 file changed, 2 insertions(+)
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 3f31252..ce3541f 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
//load data and store to threadgroup memory
half4x4 temp_a;
dequantize_func(x, il, temp_a);
+ threadgroup_barrier(mem_flags::mem_threadgroup);
#pragma unroll(16)
for (int i = 0; i < 16; i++) {
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
@@ -1895,6 +1896,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
}
} else {
// block is smaller than 64x32, we should avoid writing data outside of the matrix
+ threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
for (int i = 0; i < 8; i++) {
--
2.39.2 (Apple Git-143)

1
llm/llama.cpp/gguf Submodule

Submodule llm/llama.cpp/gguf added at 40e5ce054f

View File

@@ -0,0 +1,27 @@
From 5dd02993e8cc2ce309157736b95bb572f274a3fd Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 20 Sep 2023 14:19:52 -0700
Subject: [PATCH] copy cuda runtime libraries
---
CMakeLists.txt | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 824d9f2..dd24137 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -274,6 +274,10 @@ if (LLAMA_CUBLAS)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
endif()
+ configure_file(${CUDAToolkit_LIBRARY_DIR}/libcudart.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcudart.so.${CUDAToolkit_VERSION_MAJOR}.0 COPYONLY)
+ configure_file(${CUDAToolkit_LIBRARY_DIR}/libcublas.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcublas.so.${CUDAToolkit_VERSION_MAJOR} COPYONLY)
+ configure_file(${CUDAToolkit_LIBRARY_DIR}/libcublasLt.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcublasLt.so.${CUDAToolkit_VERSION_MAJOR} COPYONLY)
+
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
--
2.42.0

View File

@@ -0,0 +1,25 @@
From 8dbb5449db259a9c24796e7927d89bee98b6c8f5 Mon Sep 17 00:00:00 2001
From: Bruce MacDonald <brucewmacdonald@gmail.com>
Date: Thu, 5 Oct 2023 11:21:12 -0400
Subject: [PATCH] remove warm up logging
---
common/common.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/common/common.cpp b/common/common.cpp
index 7370017..c4433fe 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -839,8 +839,6 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
}
{
- LOG("warming up the model with an empty run\n");
-
std::vector<llama_token> tmp = { llama_token_bos(lctx), llama_token_eos(lctx), };
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
llama_kv_cache_tokens_rm(lctx, -1, -1);
--
2.39.2 (Apple Git-143)

View File

@@ -0,0 +1,32 @@
From 1e3bc523d8053a77df3ac7126a84d0297ee97ef6 Mon Sep 17 00:00:00 2001
From: Kylin <56434533+KyL0N@users.noreply.github.com>
Date: Tue, 22 Aug 2023 15:14:23 +0800
Subject: [PATCH] ggml : support CUDA's half type for aarch64(#1455) (#2670)
* ggml: support CUDA's half type for aarch64(#1455)
support CUDA's half type for aarch64 in ggml_fp16_t definition
* ggml: use __CUDACC__ to recognise nvcc compiler
---
ggml.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ggml.h b/ggml.h
index 544ad2d..0ec7ec5 100644
--- a/ggml.h
+++ b/ggml.h
@@ -259,8 +259,9 @@
extern "C" {
#endif
-#ifdef __ARM_NEON
- // we use the built-in 16-bit float type
+#if defined(__ARM_NEON) && defined(__CUDACC__)
+ typedef half ggml_fp16_t;
+#elif defined(__ARM_NEON)
typedef __fp16 ggml_fp16_t;
#else
typedef uint16_t ggml_fp16_t;
--
2.39.2 (Apple Git-143)

752
llm/llama.go Normal file
View File

@@ -0,0 +1,752 @@
package llm
import (
"bufio"
"bytes"
"context"
"embed"
"encoding/json"
"errors"
"fmt"
"io"
"io/fs"
"log"
"math/rand"
"net/http"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"time"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
)
//go:embed llama.cpp/*/build/*/bin/*
var llamaCppEmbed embed.FS
type ModelRunner struct {
Path string // path to the model runner executable
Accelerated bool
}
func chooseRunners(workDir, runnerType string) []ModelRunner {
buildPath := path.Join("llama.cpp", runnerType, "build")
var runners []ModelRunner
// set the runners based on the OS
// IMPORTANT: the order of the runners in the array is the priority order
switch runtime.GOOS {
case "darwin":
runners = []ModelRunner{
{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")},
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
case "linux":
runners = []ModelRunner{
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
case "windows":
// TODO: select windows GPU runner here when available
runners = []ModelRunner{
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
}
default:
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
runners = []ModelRunner{
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
}
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
for _, r := range runners {
// find all the files in the runner's bin directory
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r.Path), "*"))
if err != nil {
// this is expected, ollama may be compiled without all runners packed in
log.Printf("%s runner not found: %v", r.Path, err)
continue
}
for _, f := range files {
runnerAvailable = true
srcFile, err := llamaCppEmbed.Open(f)
if err != nil {
log.Fatalf("read llama runner %s: %v", f, err)
}
defer srcFile.Close()
// create the directory in case it does not exist, filepath.Dir() converts the file path to the OS's format
destPath := filepath.Join(workDir, filepath.Dir(f))
if err := os.MkdirAll(destPath, 0o755); err != nil {
log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
}
// create the path to the destination file, filepath.Base() converts the file path to the OS's format
destFile := filepath.Join(destPath, filepath.Base(f))
_, err = os.Stat(destFile)
switch {
case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
log.Fatalf("write llama runner %s: %v", f, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil {
log.Fatalf("copy llama runner %s: %v", f, err)
}
case err != nil:
log.Fatalf("stat llama runner %s: %v", f, err)
}
}
}
if !runnerAvailable {
log.Fatalf("%s runner not found", runnerType)
}
// return the runners to try in priority order
localRunnersByPriority := []ModelRunner{}
for _, r := range runners {
// clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
Path: filepath.Clean(path.Join(workDir, r.Path)),
Accelerated: r.Accelerated,
})
}
return localRunnersByPriority
}
type llamaModel struct {
hyperparameters llamaHyperparameters
}
func (llm *llamaModel) ModelFamily() string {
return "llama"
}
func llamaModelType(numLayer uint32) string {
switch numLayer {
case 26:
return "3B"
case 32:
return "7B"
case 40:
return "13B"
case 48:
return "34B"
case 60:
return "30B"
case 80:
return "65B"
default:
return "unknown"
}
}
func (llm *llamaModel) ModelType() string {
return llamaModelType(llm.hyperparameters.NumLayer)
}
func (llm *llamaModel) FileType() string {
return fileType(llm.hyperparameters.FileType)
}
func (llm *llamaModel) NumLayers() int64 {
return int64(llm.hyperparameters.NumLayer)
}
type llamaHyperparameters struct {
// NumVocab is the size of the model's vocabulary.
NumVocab uint32
// NumEmbd is the size of the model's embedding layer.
NumEmbd uint32
NumMult uint32
NumHead uint32
// NumLayer is the number of layers in the model.
NumLayer uint32
NumRot uint32
// FileType describes the quantization level of the model, e.g. Q4_0, Q5_K, etc.
FileType uint32
}
type Running struct {
Port int
Cmd *exec.Cmd
Cancel context.CancelFunc
exitOnce sync.Once
exitCh chan error // channel to receive the exit status of the subprocess
*StatusWriter // captures error messages from the llama runner process
}
type llama struct {
api.Options
Running
}
var errNoGPU = errors.New("nvidia-smi command failed")
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (int64, error) {
cmd := exec.Command("nvidia-smi", "--query-gpu=memory.free", "--format=csv,noheader,nounits")
var stdout bytes.Buffer
cmd.Stdout = &stdout
err := cmd.Run()
if err != nil {
return 0, errNoGPU
}
var freeMiB int64
scanner := bufio.NewScanner(&stdout)
for scanner.Scan() {
line := scanner.Text()
vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
}
freeMiB += vram
}
freeBytes := freeMiB * 1024 * 1024
if freeBytes < 2*format.GigaByte {
log.Printf("less than 2 GB VRAM available, falling back to CPU only")
freeMiB = 0
}
return freeBytes, nil
}
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
if runtime.GOOS == "linux" {
freeBytes, err := CheckVRAM()
if err != nil {
if err.Error() != "nvidia-smi command failed" {
log.Print(err.Error())
}
// nvidia driver not installed or no nvidia GPU found
return 0
}
// Calculate bytes per layer
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
bytesPerLayer := fileSizeBytes / numLayer
// max number of layers we can fit in VRAM, subtract 8% to prevent consuming all available VRAM and running out of memory
layers := int(freeBytes/bytesPerLayer) * 92 / 100
log.Printf("%d MB VRAM available, loading up to %d GPU layers", freeBytes/(1024*1024), layers)
return layers
}
// default to enable metal on macOS
return 1
}
// StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct {
ErrCh chan error
LastErrMsg string
}
func NewStatusWriter() *StatusWriter {
return &StatusWriter{
ErrCh: make(chan error, 1),
}
}
func (w *StatusWriter) Write(b []byte) (int, error) {
var errMsg string
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
errMsg = string(bytes.TrimSpace(after))
} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
errMsg = string(bytes.TrimSpace(after))
}
if errMsg != "" {
w.LastErrMsg = errMsg
w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
}
return os.Stderr.Write(b)
}
func newLlama(model string, adapters []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
fileInfo, err := os.Stat(model)
if err != nil {
return nil, err
}
if len(adapters) > 1 {
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
}
numGPU := NumGPU(numLayers, fileInfo.Size(), opts)
params := []string{
"--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--embedding",
}
if opts.NumGQA > 0 {
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
}
if len(adapters) > 0 {
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
params = append(params, "--lora", adapters[0])
}
if opts.NumThread > 0 {
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
}
if !opts.F16KV {
params = append(params, "--memory-f32")
}
if opts.UseMLock {
params = append(params, "--mlock")
}
if !opts.UseMMap {
params = append(params, "--no-mmap")
}
if opts.UseNUMA {
params = append(params, "--numa")
}
var runnerErr error
// start the llama.cpp server with a retry in case the port is already in use
for _, runner := range runners {
if runner.Accelerated && numGPU == 0 {
log.Printf("skipping accelerated runner because num_gpu=0")
continue
}
if _, err := os.Stat(runner.Path); err != nil {
log.Printf("llama runner not found: %v", err)
continue
}
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
ctx, cancel := context.WithCancel(context.Background())
cmd := exec.CommandContext(
ctx,
runner.Path,
append(params, "--port", strconv.Itoa(port))...,
)
cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path)))
cmd.Stdout = os.Stderr
statusWriter := NewStatusWriter()
cmd.Stderr = statusWriter
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}}
log.Print("starting llama runner")
if err := llm.Cmd.Start(); err != nil {
log.Printf("error starting the external llama runner: %v", err)
continue
}
// monitor the llama runner process and signal when it exits
go func() {
err := llm.Cmd.Wait()
// default to printing the exit message of the command process, it will probably just say 'exit staus 1'
errMsg := err.Error()
// try to set a better error message if llama runner logs captured an error
if statusWriter.LastErrMsg != "" {
errMsg = statusWriter.LastErrMsg
}
log.Println(errMsg)
// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited
llm.exitOnce.Do(func() {
close(llm.exitCh)
})
}()
if err := waitForServer(llm); err != nil {
log.Printf("error starting llama runner: %v", err)
llm.Close()
// default the runnerErr to the error returned by the most recent llama runner process
runnerErr = err
// capture the error directly from the runner process, if any
select {
case runnerErr = <-statusWriter.ErrCh:
default:
// the runner process probably timed out
}
// try again
continue
}
// server started successfully
return llm, nil
}
if runnerErr != nil {
// this is the error returned from the llama runner process that failed most recently
return nil, runnerErr
}
return nil, fmt.Errorf("failed to start a llama runner")
}
func waitForServer(llm *llama) error {
start := time.Now()
expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
log.Print("waiting for llama runner to start responding")
for {
select {
case <-llm.exitCh:
// failed to start subprocess
return fmt.Errorf("llama runner process has terminated")
case <-ticker.C:
if time.Now().After(expiresAt) {
// timeout
return fmt.Errorf("timed out waiting for llama runner to start")
}
if err := llm.Ping(context.Background()); err == nil {
// success
log.Printf("llama runner started in %f seconds", time.Since(start).Seconds())
return nil
}
}
}
}
func (llm *llama) Close() {
// signal the sub-process to terminate
llm.Cancel()
// wait for the command to exit to prevent race conditions with the next run
<-llm.exitCh
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
log.Printf("llama runner stopped with error: %v", llm.StatusWriter.LastErrMsg)
} else {
log.Print("llama runner stopped successfully")
}
}
func (llm *llama) SetOptions(opts api.Options) {
llm.Options = opts
}
type prediction struct {
Content string `json:"content"`
Model string `json:"model"`
Prompt string `json:"prompt"`
Stop bool `json:"stop"`
Timings struct {
PredictedN int `json:"predicted_n"`
PredictedMS float64 `json:"predicted_ms"`
PromptN int `json:"prompt_n"`
PromptMS float64 `json:"prompt_ms"`
}
}
const maxBufferSize = 512 * format.KiloByte
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
prevConvo, err := llm.Decode(ctx, prevContext)
if err != nil {
return err
}
// Remove leading spaces from prevConvo if present
prevConvo = strings.TrimPrefix(prevConvo, " ")
var nextContext strings.Builder
nextContext.WriteString(prevConvo)
nextContext.WriteString(prompt)
request := map[string]any{
"prompt": nextContext.String(),
"stream": true,
"n_predict": llm.NumPredict,
"n_keep": llm.NumKeep,
"temperature": llm.Temperature,
"top_k": llm.TopK,
"top_p": llm.TopP,
"tfs_z": llm.TFSZ,
"typical_p": llm.TypicalP,
"repeat_last_n": llm.RepeatLastN,
"repeat_penalty": llm.RepeatPenalty,
"presence_penalty": llm.PresencePenalty,
"frequency_penalty": llm.FrequencyPenalty,
"mirostat": llm.Mirostat,
"mirostat_tau": llm.MirostatTau,
"mirostat_eta": llm.MirostatEta,
"penalize_nl": llm.PenalizeNewline,
"seed": llm.Seed,
"stop": llm.Stop,
}
// Handling JSON marshaling with special characters unescaped.
buffer := &bytes.Buffer{}
enc := json.NewEncoder(buffer)
enc.SetEscapeHTML(false)
if err := enc.Encode(request); err != nil {
return fmt.Errorf("failed to marshal data: %v", err)
}
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, buffer)
if err != nil {
return fmt.Errorf("error creating POST request: %v", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("POST predict: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed reading llm error response: %w", err)
}
log.Printf("llm predict error: %s", bodyBytes)
return fmt.Errorf("%s", bodyBytes)
}
scanner := bufio.NewScanner(resp.Body)
// increase the buffer size to avoid running out of space
buf := make([]byte, 0, maxBufferSize)
scanner.Buffer(buf, maxBufferSize)
for scanner.Scan() {
select {
case <-ctx.Done():
// This handles the request cancellation
return ctx.Err()
default:
line := scanner.Bytes()
if len(line) == 0 {
continue
}
if evt, ok := bytes.CutPrefix(line, []byte("data: ")); ok {
var p prediction
if err := json.Unmarshal(evt, &p); err != nil {
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
}
if p.Content != "" {
fn(api.GenerateResponse{Response: p.Content})
nextContext.WriteString(p.Content)
}
if p.Stop {
embd, err := llm.Encode(ctx, nextContext.String())
if err != nil {
return fmt.Errorf("encoding context: %v", err)
}
fn(api.GenerateResponse{
Done: true,
Context: embd,
PromptEvalCount: p.Timings.PromptN,
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
EvalCount: p.Timings.PredictedN,
EvalDuration: parseDurationMs(p.Timings.PredictedMS),
})
return nil
}
}
}
}
if err := scanner.Err(); err != nil {
if strings.Contains(err.Error(), "unexpected EOF") {
// this means the llama runner subprocess crashed
llm.Close()
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
return fmt.Errorf("llama runner exited: %v", llm.StatusWriter.LastErrMsg)
}
return fmt.Errorf("llama runner exited, you may not have enough available memory to run this model")
}
return fmt.Errorf("error reading llm response: %v", err)
}
return nil
}
type TokenizeRequest struct {
Content string `json:"content"`
}
type TokenizeResponse struct {
Tokens []int `json:"tokens"`
}
func (llm *llama) Encode(ctx context.Context, prompt string) ([]int, error) {
endpoint := fmt.Sprintf("http://127.0.0.1:%d/tokenize", llm.Port)
data, err := json.Marshal(TokenizeRequest{Content: prompt})
if err != nil {
return nil, fmt.Errorf("marshaling encode data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return nil, fmt.Errorf("encode request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("do encode request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read encode request: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm encode error: %s", body)
return nil, fmt.Errorf("%s", body)
}
var encoded TokenizeResponse
if err := json.Unmarshal(body, &encoded); err != nil {
return nil, fmt.Errorf("unmarshal encode response: %w", err)
}
return encoded.Tokens, nil
}
type DetokenizeRequest struct {
Tokens []int `json:"tokens"`
}
type DetokenizeResponse struct {
Content string `json:"content"`
}
func (llm *llama) Decode(ctx context.Context, tokens []int) (string, error) {
if len(tokens) == 0 {
return "", nil
}
endpoint := fmt.Sprintf("http://127.0.0.1:%d/detokenize", llm.Port)
data, err := json.Marshal(DetokenizeRequest{Tokens: tokens})
if err != nil {
return "", fmt.Errorf("marshaling decode data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return "", fmt.Errorf("decode request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", fmt.Errorf("do decode request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("read decode request: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm decode error: %s", body)
return "", fmt.Errorf("%s", body)
}
var decoded DetokenizeResponse
if err := json.Unmarshal(body, &decoded); err != nil {
return "", fmt.Errorf("unmarshal encode response: %w", err)
}
return decoded.Content, nil
}
type EmbeddingRequest struct {
Content string `json:"content"`
}
type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"`
}
func (llm *llama) Embedding(ctx context.Context, input string) ([]float64, error) {
endpoint := fmt.Sprintf("http://127.0.0.1:%d/embedding", llm.Port)
data, err := json.Marshal(TokenizeRequest{Content: input})
if err != nil {
return nil, fmt.Errorf("error marshaling embed data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return nil, fmt.Errorf("error creating embed request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("POST embedding: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading embed response: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm encode error: %s", body)
return nil, fmt.Errorf("%s", body)
}
var embedding EmbeddingResponse
if err := json.Unmarshal(body, &embedding); err != nil {
return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
}
return embedding.Embedding, nil
}
// Ping checks that the server subprocess is still running and responding to requests
func (llm *llama) Ping(ctx context.Context) error {
resp, err := http.Head(fmt.Sprintf("http://127.0.0.1:%d", llm.Port))
if err != nil {
return fmt.Errorf("ping resp: %w", err)
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected ping status: %s", resp.Status)
}
return nil
}

View File

@@ -5,10 +5,12 @@ import (
"fmt"
"log"
"os"
"runtime"
"github.com/pbnjay/memory"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
)
type LLM interface {
@@ -21,7 +23,7 @@ type LLM interface {
Ping(context.Context) error
}
func New(model string, adapters []string, opts api.Options) (LLM, error) {
func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
}
@@ -32,52 +34,61 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
}
defer f.Close()
ggml, err := DecodeGGML(f, ModelFamilyLlama)
ggml, err := DecodeGGML(f)
if err != nil {
return nil, err
}
switch ggml.FileType().String() {
case "F32", "Q5_0", "Q5_1", "Q8_0":
if opts.NumGPU != 0 {
// F32, F16, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
if runtime.GOOS == "darwin" {
switch ggml.FileType() {
case "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// GGML Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
case "F32", "Q5_0", "Q5_1":
if opts.NumGPU != 0 {
// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
}
var requiredMemory int64
var f16Multiplier int64 = 2
switch ggml.ModelType() {
case "3B", "7B":
requiredMemory = 8 * format.GigaByte
case "13B":
requiredMemory = 16 * format.GigaByte
case "30B", "34B", "40B":
requiredMemory = 32 * format.GigaByte
case "65B", "70B":
requiredMemory = 64 * format.GigaByte
case "180B":
requiredMemory = 128 * format.GigaByte
f16Multiplier = 4
}
systemMemory := int64(memory.TotalMemory())
if ggml.FileType() == "F16" && requiredMemory*f16Multiplier > systemMemory {
return nil, fmt.Errorf("F16 model requires at least %s of total memory", format.HumanBytes(requiredMemory))
} else if requiredMemory > systemMemory {
return nil, fmt.Errorf("model requires at least %s of total memory", format.HumanBytes(requiredMemory))
}
}
totalResidentMemory := memory.TotalMemory()
switch ggml.ModelType() {
case ModelType3B, ModelType7B:
if ggml.FileType().String() == "F16" && totalResidentMemory < 16*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 16GB of memory")
} else if totalResidentMemory < 8*1024*1024 {
return nil, fmt.Errorf("model requires at least 8GB of memory")
}
case ModelType13B:
if ggml.FileType().String() == "F16" && totalResidentMemory < 32*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 32GB of memory")
} else if totalResidentMemory < 16*1024*1024 {
return nil, fmt.Errorf("model requires at least 16GB of memory")
}
case ModelType30B, ModelType34B:
if ggml.FileType().String() == "F16" && totalResidentMemory < 64*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 64GB of memory")
} else if totalResidentMemory < 32*1024*1024 {
return nil, fmt.Errorf("model requires at least 32GB of memory")
}
case ModelType65B:
if ggml.FileType().String() == "F16" && totalResidentMemory < 128*1024*1024 {
return nil, fmt.Errorf("F16 model requires at least 128GB of memory")
} else if totalResidentMemory < 64*1024*1024 {
return nil, fmt.Errorf("model requires at least 64GB of memory")
}
}
switch ggml.ModelFamily() {
case ModelFamilyLlama:
return newLlama(model, adapters, ggmlRunner(), opts)
switch ggml.Name() {
case "gguf":
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
case "ggml", "ggmf", "ggjt", "ggla":
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
default:
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
}

Some files were not shown because too many files have changed in this diff Show More