Compare commits

..

399 Commits

Author SHA1 Message Date
Matt Williams
05162c56aa Update readme.md 2023-11-29 10:45:07 -08:00
Matt Williams
edd1a2b6e8 function calling for python. already had ts.
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-29 10:06:11 -08:00
Michael
2ae80e1e27 Update README.md
add new recent models as examples
2023-11-28 22:16:37 -05:00
Michael Yang
b173cfc558 Merge pull request #1195 from jmorganca/mxyng/fix-bar-rate
progress: fix bar rate
2023-11-28 11:55:23 -08:00
Michael Yang
424d53ac70 progress: fix bar rate 2023-11-28 11:44:56 -08:00
ftorto
e1a69d44c9 Update faq.md (#1299)
Fix a typo in the CA update command
2023-11-28 09:54:42 -05:00
Jason Jacobs
3d620f9462 ignore jetbrain ides (#1287) 2023-11-27 15:57:45 -05:00
Bruce MacDonald
928950fcc6 update python client create example (#1227)
* add remote create to python example client
2023-11-27 15:36:19 -05:00
Kasumi
39c6d949fc Add Amica to community integrations (#1281) 2023-11-27 10:44:37 -05:00
Jeffrey Morgan
16a9006306 add back f16c instructions on intel mac 2023-11-26 15:59:49 -05:00
Jeffrey Morgan
e9216ea459 fix readline history on linux 2023-11-26 15:59:04 -05:00
Jeffrey Morgan
9e4a316405 update submodule commit 2023-11-26 14:52:00 -05:00
Jeffrey Morgan
9fb5e8399c Fix issues with inputting and formatting multi line strings in ollama run
Co-authored-by: Wen Sun <iwendellsun@gmail.com>
2023-11-26 12:54:29 -05:00
Jing Zhang
82b9b329ff windows CUDA support (#1262)
* Support cuda build in Windows
* Enable dynamic NumGPU allocation for Windows
2023-11-24 17:16:36 -05:00
Jongwook Choi
12e8c12d2b Disable CUDA peer access as a workaround for multi-gpu inference bug (#1261)
When CUDA peer access is enabled, multi-gpu inference will produce
garbage output. This is a known bug of llama.cpp (or nvidia). Until the
upstream bug is fixed, we can disable CUDA peer access temporarily
to ensure correct output.

See #961.
2023-11-24 14:05:57 -05:00
Jeffrey Morgan
d77dde126b consistent cpu instructions on macos and linux 2023-11-22 16:26:46 -05:00
Michael Yang
c7e70cd3bb Merge pull request #1245 from jmorganca/mxyng/gguf-int
fix: gguf int type
2023-11-22 11:42:56 -08:00
Michael Yang
199941cd15 fix: gguf int type 2023-11-22 11:40:30 -08:00
Long Huynh
c9474f7f61 Update README.md - Community Integrations - Obsidian BMO Chatbot plugin (#1239) 2023-11-22 14:32:30 -05:00
Jeffrey Morgan
927e3ba4a4 tag image with correct version when building with build_docker script 2023-11-22 14:32:17 -05:00
Bruce MacDonald
37d95157df fix relative path on create (#1222) 2023-11-21 15:43:17 -05:00
Jeffrey Morgan
2eaa95b417 Update api.md 2023-11-21 15:32:05 -05:00
Kevin Cao
3cd07728f4 Make alt+backspace delete word (#1223) 2023-11-21 12:26:47 -08:00
Michael Yang
ecf8b793f0 Merge pull request #1224 from jmorganca/mxyng/update
update llama.cpp
2023-11-21 12:21:59 -08:00
Matt Williams
abf294826b Merge pull request #1221 from jmorganca/mattw/communityinstalls
add installation packages category to community
2023-11-21 12:12:23 -08:00
Steve Korshakov
ae06bb426b add Llama Coder (#1225)
* add Llama Coder
* Update README.md
2023-11-21 14:08:19 -05:00
Matt Williams
d8e0f62ebb Merge pull request #1159 from jmorganca/mattw/functioncalling
Example: Function Calling in Typescript
2023-11-21 10:06:55 -08:00
Michael Yang
a00fac4ec8 update llama.cpp 2023-11-21 09:50:02 -08:00
Jeffrey Morgan
f2113c1fc7 fix potential error in progress bar calculation 2023-11-21 12:48:20 -05:00
Jeffrey Morgan
6452e2ecb8 fix cases where progress bar would not be fixed size 2023-11-21 12:07:25 -05:00
Matt Williams
9a28e263a5 Update README.md
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-11-21 07:25:32 -08:00
Matt Williams
0c066c9214 Update README.md
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-11-21 07:25:26 -08:00
Jeffrey Morgan
aabd71aede fix rendering and variable width issues on progress bar 2023-11-21 10:02:37 -05:00
Matt Williams
da4d7c9f9c add installation packages category to community
Moved the arch package and someone has added a pr for brew.
that needs to get updated to be a link.

Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-21 06:40:59 -08:00
Matt Williams
f321b13a03 Merge pull request #1178 from tusharhero/install-instructions-archlinux
Add Installation instructions for Archlinux
2023-11-21 06:33:22 -08:00
Matt Williams
5ebcde1541 Merge branch 'main' into install-instructions-archlinux 2023-11-21 06:32:50 -08:00
Matt Williams
45206cb7cc Merge pull request #1218 from danemadsen/main
Update Maid repo
2023-11-21 06:30:33 -08:00
Matt Williams
6e65b84f54 Merge pull request #1219 from dustinblackman/main
docs: Add Oatmeal to terminal integrations
2023-11-21 06:28:12 -08:00
Dustin Blackman
c00ce12e83 docs: Add Oatmeal to terminal integrations 2023-11-21 06:47:43 -05:00
tusharhero
e1cd3152c9 Move Archlinux package to Community Integrations section. 2023-11-21 16:28:50 +05:30
Dane Madsen
0bef3778c9 Update README.md 2023-11-21 21:02:13 +11:00
Dane Madsen
6ebab38b89 Merge branch 'jmorganca:main' into main 2023-11-21 20:01:13 +10:00
Dane Madsen
5d8e864d44 Update Maid repo 2023-11-21 21:00:54 +11:00
Matt Williams
5f7acd0bbd remove 'recent'
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-20 17:03:25 -08:00
Matt Williams
44b3a1ad42 Merge branch 'mattw/functioncalling' of github.com:jmorganca/ollama into mattw/functioncalling
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-20 17:01:41 -08:00
Matt Williams
0260be4414 remove 'recently'
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-20 16:57:07 -08:00
Jeffrey Morgan
a3fcecf943 only set main_gpu if value > 0 is provided 2023-11-20 19:54:04 -05:00
Jeffrey Morgan
df07e4a097 remove redundant filename parameter (#1213) 2023-11-20 17:05:36 -05:00
Michael Yang
0b7ade0d4c Merge pull request #1212 from jmorganca/mxyng/metal
enable metal for fp32, q5_0, q5_1
2023-11-20 13:56:39 -08:00
Michael Yang
19b7a4d715 recent llama.cpp update added kernels for fp32, q5_0, and q5_1 2023-11-20 13:44:31 -08:00
Bruce MacDonald
31ab453d37 resolve FROM path before sending modelfile (#1211) 2023-11-20 16:43:48 -05:00
Jeffrey Morgan
35c4b5ec16 calculate hash separately from http request 2023-11-20 15:45:11 -05:00
James Braza
f24741ff39 Documenting how to view Modelfiles (#723)
* Documented viewing Modelfiles in ollama.ai/library

* Moved Modelfile in ollama.ai down per request
2023-11-20 15:24:29 -05:00
Jeffrey Morgan
8c4022b06b fix initial progress stats 2023-11-20 14:33:46 -05:00
Jeffrey Morgan
433702f421 hide progress stats on completion 2023-11-20 14:22:39 -05:00
Matt Williams
48896f626c Update examples/typescript-functioncalling/extractwp.ts
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-20 10:12:10 -08:00
Matt Williams
c57aee6fba Update examples/typescript-functioncalling/readme.md
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-20 10:10:42 -08:00
Jeffrey Morgan
6066c70edd restore progress messages for older endpoints 2023-11-20 11:37:17 -05:00
Jeffrey Morgan
f10ac5de19 restore stats updated every second to progress bar 2023-11-20 10:58:19 -05:00
Jeffrey Morgan
93a108214c only show decimal points for smaller file size numbers 2023-11-20 10:58:19 -05:00
Purinda Gunasekara
be61a81758 main-gpu argument is not getting passed to llamacpp, fixed. (#1192) 2023-11-20 10:52:52 -05:00
Toni Soriano
2fdf1b5ff8 add laravel package to README.md (#1208)
Co-authored-by: Toni <cloudstudio@Tonis-Mac-mini.local>
2023-11-20 10:48:35 -05:00
Huy Le
331068b964 Adding ogpt.nvim into the list of plugins! (#1190)
* adding ollama.nvim for visibility

* adding an ogpt.nvim neovim plugin
2023-11-20 10:39:14 -05:00
Andy Brenneke
0179d8eb6b Add Rivet to Community Integrations (#1183) 2023-11-20 10:36:47 -05:00
Eli Bendersky
be48741308 README: link to LangChainGo for talking to ollama, with an example (#1206) 2023-11-20 10:35:07 -05:00
Jeffrey Morgan
6bbd6e26fb fix temporary newline created and removed with spinner in ollama run 2023-11-20 00:49:08 -05:00
Jeffrey Morgan
e6ad4813d3 dont crash when redirecting stderr 2023-11-19 23:50:45 -05:00
Jeffrey Morgan
13ba6df5ab enable cpu instructions on intel macs 2023-11-19 23:20:26 -05:00
Jeffrey Morgan
9d73d3a6b5 add back part.Reset() 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
72cd336410 dont retry on upload complete context cancel 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
1bd594b2fa revert to using one open file for blob uploads 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
9a8c21ac3d use exponential everywhere 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
f6b317e8c9 fix sending too little data in chunk upload body 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
ac5076ce1e exponential backoff up to 30s 2023-11-19 14:32:19 -05:00
Michael Yang
42c2e3a624 upload: retry complete upload 2023-11-19 14:32:19 -05:00
Michael Yang
cb42589792 adjust download/upload parts 2023-11-19 14:32:19 -05:00
Jeffrey Morgan
258addc799 fix comment in progress.go 2023-11-19 13:46:19 -05:00
Jeffrey Morgan
c06b9b7304 update progress rendering to be closer to v0.1.10 2023-11-19 13:43:21 -05:00
Jeffrey Morgan
95b9acd324 improve pull percentage rendering 2023-11-19 11:00:43 -05:00
Jeffrey Morgan
04cbf5ccc0 progress bar styling improvements 2023-11-19 09:54:33 -05:00
Jeffrey Morgan
e1d7056496 update progress statuses 2023-11-19 09:21:13 -05:00
Jeffrey Morgan
02524a56ff check retry for authorization error 2023-11-19 00:19:53 -05:00
Jeffrey Morgan
1657c6abc7 add note to specify JSON in the prompt when using JSON mode 2023-11-18 22:59:26 -05:00
Jeffrey Morgan
12e046f12a remove unused function 2023-11-18 22:16:51 -05:00
Jeffrey Morgan
36a3bbf65f Update llm/llama.go 2023-11-18 21:25:07 -05:00
Bruce MacDonald
43a726149d fix potentially inaccurate error message 2023-11-18 21:25:07 -05:00
Jeffrey Morgan
984714f131 update status text when transfering blob on ollama create 2023-11-18 09:40:10 -05:00
Jeffrey Morgan
bab9494176 add - separator to temp file created on ollama create 2023-11-18 09:39:52 -05:00
Jeffrey Morgan
85e4441c6a cache docker builds 2023-11-18 08:51:38 -05:00
Michael Yang
42e43736a4 Merge pull request #1186 from jmorganca/mxyng/copy-blob
fix cross device rename
2023-11-17 21:54:53 -08:00
Michael Yang
c6e6c8ee7e fix cross device rename 2023-11-17 15:22:17 -08:00
Jeffrey Morgan
a185b29719 fix install script error on linux 2023-11-17 18:00:41 -05:00
Michael Yang
dc84b20d6b Merge pull request #1104 from jmorganca/mxyng/jupyter
add jupyter notebook example
2023-11-17 14:46:26 -08:00
Michael Yang
ad8659b980 Merge pull request #1161 from jmorganca/mxyng/systemd-placeholder
placeholder environment variables
2023-11-17 14:45:38 -08:00
Michael Yang
c1bbf5ddee Merge pull request #1134 from jmorganca/mxyng/progress
progress bar
2023-11-17 14:03:35 -08:00
Bruce MacDonald
0b19e24d81 only retry once on auth failure (#1175) 2023-11-17 14:22:35 -05:00
Michael Yang
3cb07d2773 simplify StopAndClear 2023-11-17 10:26:22 -08:00
Michael Yang
976068369b stop all spinners on progress stop 2023-11-17 10:06:19 -08:00
Michael Yang
4d677ee389 no divide by zero 2023-11-17 10:06:19 -08:00
Michael Yang
7ea905871a only move cursor up if pos > 0 2023-11-17 10:06:19 -08:00
Michael Yang
d6ecaa2cbf update progress responses 2023-11-17 10:06:19 -08:00
Michael Yang
4dcf7a59b1 generate progress 2023-11-17 10:06:19 -08:00
Michael Yang
1c0e092ead progress cmd 2023-11-17 10:06:19 -08:00
Michael Yang
c4a3ccd7ac progress 2023-11-17 10:06:19 -08:00
Michael Yang
9f04e5a8ea format bytes 2023-11-17 10:06:19 -08:00
Michael Yang
f91bb2f7f0 remove progressbar 2023-11-17 10:06:19 -08:00
Michael Yang
0813387414 Merge pull request #1177 from jmorganca/mxyng/faq
faq: fix heading and add more details
2023-11-17 10:05:21 -08:00
Michael Yang
4936b5bb37 add jupyter readme 2023-11-17 10:04:52 -08:00
tusharhero
786288829e Make Archlinux a sub-heading of Linux. 2023-11-17 23:17:36 +05:30
tusharhero
72dcc952b6 Add Installation instructions for Archlinux
Pacman is the recommended installation method. And the package is in
the official repository, so makes sense to mention it in the README.
2023-11-17 23:13:40 +05:30
Michael Yang
f7f6d6c693 Update examples/jupyter-notebook/ollama.ipynb
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-17 09:30:07 -08:00
Michael Yang
a3053b66d2 add jupyter notebook example 2023-11-17 09:30:07 -08:00
Michael Yang
c82ead4d01 faq: fix heading and add more details 2023-11-17 09:02:17 -08:00
Michael Yang
90860b6a7e update faq (#1176) 2023-11-17 11:42:58 -05:00
Jeffrey Morgan
81092147c4 remove unnecessary -X POST from example curl commands 2023-11-17 09:50:38 -05:00
Jeffrey Morgan
92656a74b7 Use llama2 as the model in api.md 2023-11-17 07:17:51 -05:00
Jeffrey Morgan
41434a7cdc build intel mac with correct binary and compile flags 2023-11-16 22:14:51 -05:00
Michael Yang
71687ab809 Merge pull request #1164 from jmorganca/mxyng/faq
update faq
2023-11-16 17:20:18 -08:00
Michael Yang
d8842b4d4b update faq 2023-11-16 17:07:36 -08:00
Michael Yang
32add8577d placeholder environment variables 2023-11-16 16:57:39 -08:00
Michael Yang
585f9c01fa Merge pull request #1160 from jmorganca/mxyng/faq
update faq
2023-11-16 16:48:51 -08:00
Michael Yang
c13bde962d Update docs/faq.md
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-11-16 16:48:38 -08:00
Michael Yang
ee307937fd update faq 2023-11-16 16:46:43 -08:00
Matt Williams
ab6639bc47 Merge pull request #1074 from jmorganca/mattw/loganalysisexample
Log Analysis Example
2023-11-16 16:33:07 -08:00
Matt Williams
fefae84c06 example: function calling
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-16 16:26:29 -08:00
Jeffrey Morgan
dbe6e77472 Update README.md 2023-11-16 16:46:38 -05:00
Bruce MacDonald
4b3f4bc7d9 return failure details when unauthorized to push (#1131)
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-11-16 16:44:18 -05:00
Michael Yang
a5ccf742c1 fix cross repo mounts 2023-11-16 16:33:30 -05:00
Michael Yang
e33ef391cd fix push scope error for inherited model 2023-11-16 16:33:30 -05:00
yanndegat
75295b9528 install: fix enable contrib on debian 12 (#1151)
On debian 12, sources definitions have moved from
/etc/apt/sources.list to /etc/apt/sources.list.d/debian.sources
2023-11-16 15:53:06 -05:00
Matt Williams
db5ef3004c Merge pull request #1079 from jmorganca/mattw/jsonexample
Add example using JSON format output
2023-11-16 09:13:34 -08:00
Michael Yang
b5f158f046 add faq for proxies (#1147) 2023-11-16 11:43:37 -05:00
Piero Savastano
30141b42e9 Add Cheshire Cat to community integrations (#1124) 2023-11-16 11:30:54 -05:00
Dane Madsen
5f301ece1d Add Maid to Community Integrations (#1120) 2023-11-16 11:27:53 -05:00
Michael Yang
77954bea0e Merge pull request #898 from jmorganca/mxyng/build-context
create remote models
2023-11-15 16:41:12 -08:00
Michael Yang
54f92f01cb update docs 2023-11-15 15:28:15 -08:00
Michael
30ae6e731e Update randomaddresses.py 2023-11-15 18:24:50 -05:00
Michael
b28a30f7ba Update examples/python-json-datagenerator/predefinedschema.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-15 18:23:36 -05:00
Jeffrey Morgan
ecd71347ab Update faq.md 2023-11-15 18:17:13 -05:00
Jeffrey Morgan
8ee4cbea0f Remove table of contents in faq.md 2023-11-15 18:16:27 -05:00
Michael Yang
652d90e1c7 Update server/images.go
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-15 15:16:23 -08:00
Michael Yang
bc22d5a38b no blob response 2023-11-15 15:16:23 -08:00
Michael Yang
71d71d0988 update docs 2023-11-15 15:16:23 -08:00
Michael Yang
1901044b07 use checksum reference 2023-11-15 15:16:23 -08:00
Michael Yang
d660eebf22 fix create from model tag 2023-11-15 15:16:23 -08:00
Michael Yang
cac11c9137 update api docs 2023-11-15 15:16:23 -08:00
Michael Yang
a07c935d34 ignore non blobs 2023-11-15 15:16:23 -08:00
Michael Yang
1552cee59f client create modelfile 2023-11-15 15:16:23 -08:00
Michael Yang
3ca56b5ada add create modelfile field 2023-11-15 15:16:23 -08:00
Michael Yang
b0d14ed51c refactor create model 2023-11-15 15:16:23 -08:00
Matt Williams
f61f340279 FAQ: answer a few faq questions (#1128)
* faq: does ollama share my prompts

Signed-off-by: Matt Williams <m@technovangelist.com>

* faq: ollama and openai

Signed-off-by: Matt Williams <m@technovangelist.com>

* faq: vscode plugins

Signed-off-by: Matt Williams <m@technovangelist.com>

* faq: send a doc to Ollama

Signed-off-by: Matt Williams <m@technovangelist.com>

* extra spacing

Signed-off-by: Matt Williams <m@technovangelist.com>

* Update faq.md

* Update faq.md

---------

Signed-off-by: Matt Williams <m@technovangelist.com>
Co-authored-by: Michael <mchiang0610@users.noreply.github.com>
2023-11-15 18:05:13 -05:00
Michael Yang
686f85d6ca Merge pull request #1132 from jmorganca/mxyng/human-bytes
replace go-humanize with format.HumanBytes
2023-11-15 09:46:21 -08:00
bnodnarb
85951d25ef Created tutorial for running Ollama on NVIDIA Jetson devices (#1098) 2023-11-15 12:32:37 -05:00
Dane Madsen
779e196ef6 Merge branch 'jmorganca:main' into main 2023-11-15 21:38:07 +10:00
Michael Yang
01ea6002c4 replace go-humanize with format.HumanBytes 2023-11-14 14:57:41 -08:00
Jeffrey Morgan
423862042a treat ollama run model < file as entire prompt, not prompt-per-line (#1126)
Previously, `ollama run` treated a non-terminal stdin (such as `ollama run model < file`) as containing one prompt per line. To run inference on a multi-line prompt, the only non-API workaround was to run `ollama run` interactively and wrap the prompt in `"""..."""`.

Now, `ollama run` treats a non-terminal stdin as containing a single prompt. For example, if `myprompt.txt` is a multi-line file, then `ollama run model < myprompt.txt` would treat `myprompt.txt`'s entire contents as the prompt.

Co-authored-by: Quinn Slack <quinn@slack.org>
2023-11-14 16:42:21 -05:00
Bruce MacDonald
df18486c35 Move /generate format to optional parameters (#1127)
This field is optional and should be under the `Advanced parameters` header
2023-11-14 16:12:30 -05:00
Jeffrey Morgan
4e612a2e92 use stdout fd for terminal size (#1125) 2023-11-14 16:09:09 -05:00
Matt Williams
47ffb81db7 Update examples/python-json-datagenerator/readme.md
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:33:34 -08:00
Matt Williams
69795d2db0 Update examples/python-json-datagenerator/readme.md
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:33:16 -08:00
Matt Williams
acde0819d9 Update examples/python-json-datagenerator/randomaddresses.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:33:02 -08:00
Matt Williams
f748331aa3 Update examples/python-json-datagenerator/predefinedschema.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:32:45 -08:00
Matt Williams
f4edc302a8 Update examples/python-loganalysis/readme.md
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:31:22 -08:00
Matt Williams
64b7e0c218 Update examples/python-loganalysis/loganalysis.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:31:05 -08:00
Matt Williams
eced0d52ab Update examples/python-loganalysis/loganalysis.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:30:30 -08:00
Matt Williams
96bf9cafa7 Update examples/python-loganalysis/loganalysis.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-14 10:30:17 -08:00
Jeffrey Morgan
6e0f686afa --format json should work in interactive mode 2023-11-14 10:22:03 -05:00
Dane Madsen
c1a5220860 Update README.md 2023-11-14 15:31:31 +10:00
Dane Madsen
3b15175a70 Add maid to community integrations 2023-11-14 15:30:03 +10:00
Jeffrey Morgan
c1844bbee2 add json mode to cli (#1095) 2023-11-13 21:54:02 -05:00
Huy Le
cb745965ce adding ollama.nvim for visibility (#1115) 2023-11-13 17:00:17 -05:00
Enrico Ros
8d29b6a2b6 New big-AGI integration (#1078)
* New big-AGI integration

Ollama works great in big-AGI, and this document explains how to link the two projects.

* Update README.md
2023-11-13 16:59:00 -05:00
Ilya Breitburg
724aa64bee Add Dart library to README.md (#1106) 2023-11-13 14:50:42 -05:00
Michael Yang
d91c103e74 Merge pull request #1055 from dansreis/946-fix-incorrect-base-model-name
Fixed incorrect base model name
2023-11-13 08:42:55 -08:00
Kevin Hermawan
98ec7d81e3 Add OllamaKit to the community integrations (#1085) 2023-11-11 14:41:42 -08:00
Matt Williams
b6817a83d8 Add gif and finish readme
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-10 16:41:48 -06:00
Matt Williams
73f3448ede add example showing use of JSON format
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-10 16:33:56 -06:00
Daniel Reis
7c438f2c53 Replaced method 2023-11-10 20:22:03 +00:00
Daniel Reis
6e46338d44 Reverting previous changes 2023-11-10 20:21:35 +00:00
Jeffrey Morgan
cdddd3df65 add format to example python client 2023-11-10 10:22:21 -08:00
Daniel Hiltgen
afa61bdf45 Merge pull request #1075 from jmorganca/dhiltgen/unexpected-eof
Resume chunk download on UnexpectedEOF errors
2023-11-10 08:48:27 -08:00
Daniel Hiltgen
cc54a416c6 Resume chunk download on UnexpectedEOF errors
If the chunk download is interrupted, resume from where we left off
2023-11-10 08:29:42 -08:00
Matt Williams
c819d7f68a Merge pull request #955 from jmorganca/mattw/example-bash-compare
docs: add examples using bash to compare models
2023-11-10 08:59:32 -06:00
Matt Williams
e4f59ba073 better streaming plus gif
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-10 08:55:17 -06:00
Matt Williams
5de568bffe Add a simple log analysis example
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-10 08:28:52 -06:00
Jeffrey Morgan
5cba29b9d6 JSON mode: add `"format" as an api parameter (#1051)
* add `"format": "json"` as an API parameter
---------
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-09 16:44:02 -08:00
Daniel Reis
d17730356a Removed inline parse model path 2023-11-09 22:44:26 +00:00
Daniel Reis
32d79a6eea Using 'GetShortTagname' method instead 2023-11-09 22:40:37 +00:00
Bruce MacDonald
5b39503bcd document specifying multiple stop params (#1061) 2023-11-09 13:16:26 -08:00
Bruce MacDonald
1ae84bc2a2 skip gpu if less than 2GB VRAM are available (#1059) 2023-11-09 13:16:16 -08:00
Bruce MacDonald
db8bf336fc Update README.md 2023-11-09 12:53:24 -08:00
Nick Anderson
d77e094a90 Added gptel to list of integrations (#1062) 2023-11-09 12:52:36 -08:00
Matt Williams
dd3dc47ddb Merge pull request #992 from aashish2057/aashish2057/langchainjs_doc_update 2023-11-09 05:08:31 -08:00
Michael Yang
c5e1bbabda instead of static number of parameters for each model family, get the real number from the tensors (#1022)
* parse tensor info

* refactor decoder

* return actual parameter count

* explicit rounding

* s/Human/HumanNumber/
2023-11-08 17:55:46 -08:00
Bruce MacDonald
a49d6acc1e add a complete /generate options example (#1035) 2023-11-08 16:44:36 -08:00
Moritz Poldrack
6e9bcdb9b3 progressbar: make start and end seamless (#1042) 2023-11-08 16:42:40 -08:00
Matt Williams
13086363bd Update as per bmacd
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-08 18:09:05 -06:00
Bruce MacDonald
ec2a31e9b3 support raw generation requests (#952)
- add the optional `raw` generate request parameter to bypass prompt formatting and response context
-add raw request to docs
2023-11-08 14:05:02 -08:00
Amith Koujalgi
ec84c02d54 Add Ollama4j Java library to the list of community libraries (#1044) 2023-11-08 11:04:32 -08:00
Kevin Hermawan
2a88b66bc9 Add Ollamac to community integrations (#1043) 2023-11-08 11:01:09 -08:00
Jeffrey Morgan
2d0faea96c clean up README.md 2023-11-08 00:03:29 -08:00
Jeffrey Morgan
637142181a clean up README.md 2023-11-07 23:52:31 -08:00
Matt Williams
bcbff421c9 Merge pull request #1023 from jmorganca/mattw/wherearemodelsfaq 2023-11-07 17:59:54 -08:00
thealhu
1359d6cf3b Fix sudo variable in install.sh (#1034)
It was forgotten to replace sudo at one place with the variable for sudo.
2023-11-07 09:59:57 -08:00
Omar Magdy
6e2d0224d9 Added logseq ollama plugin (#1029) 2023-11-07 09:58:13 -08:00
Ikko Eltociear Ashimine
921406f721 Update client.py (#1026)
recieve -> receive
2023-11-07 09:55:47 -08:00
Michael Yang
c7047d7353 Merge pull request #959 from jmorganca/mxyng/example-k8s 2023-11-07 10:43:21 -06:00
Matt Williams
1d155caba3 docs: clarify where the models are stored in the faq
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-11-06 14:38:49 -08:00
Michael Yang
866324b9a5 Merge pull request #943 from tjbck/patch-1
doc: categorised community integrations + added ollama-webui
2023-11-06 11:35:39 -08:00
Michael Yang
145e060855 Apply suggestions from code review
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-11-06 11:32:23 -08:00
Michael Yang
146072113d Merge pull request #993 from jmorganca/mxyng/cleanup
cleanup upload and download errors
2023-11-06 11:32:12 -08:00
Timothy Jaeryang Baek
33d31d1b56 Merge branch 'main' into patch-1 2023-11-06 14:27:02 -05:00
Dr. David A. Kunz
274c6cbf4c Added gen.nvim to community integrations (#996) 2023-11-06 10:51:41 -08:00
Elton Renda
7ebbd89bbf add hass-ollama-conversation (#999) 2023-11-06 10:50:35 -08:00
Lars Grammel
9079b1bb6d Add ModelFusion community integration (#1020) 2023-11-06 10:46:16 -08:00
Timothy Jaeryang Baek
6febde7200 Merge branch 'main' into patch-1 2023-11-04 19:12:18 -05:00
pepperoni21
325cfcd9ff Added ollama-rs to community integrations (#995)
Co-authored-by: pepperoni21 <pepperoni2100@gmail.com>
2023-11-04 14:51:29 -07:00
Jeffrey Morgan
639d0fd070 Update README.md 2023-11-04 12:24:24 -07:00
Jeffrey Morgan
e21579a0f1 Restore system prompt on requests 2023-11-03 17:26:45 -07:00
Jeffrey Morgan
c44b619428 remove unused fmt.Println 2023-11-03 17:24:58 -07:00
Michael Yang
434a6f9d46 return last error 2023-11-03 16:49:51 -07:00
aashish2057
b13586cc72 update langchainjs doc 2023-11-03 18:45:19 -05:00
Jeffrey Morgan
17678b7225 Restore system prompt on requests and default num_keep to 0 2023-11-03 13:25:25 -07:00
Michael Yang
84725ec7e3 refactor part reset 2023-11-03 09:20:32 -07:00
Bruce MacDonald
6109bebba6 reformat api docs for more examples (#972) 2023-11-03 10:57:00 -04:00
Noah Gitsham
8ae8c9fa8c Remove duplicate "install" in GPU support warning (#984) 2023-11-03 00:45:14 -07:00
Noah Gitsham
f39daff461 Add missing "be" to GPU support warning message (#983) 2023-11-02 18:37:12 -07:00
Jeffrey Morgan
c50b01bc21 check request.Context for initial system prompt 2023-11-02 18:17:00 -07:00
Bruce MacDonald
b9dc875401 remove modelfile context deprecated in v0.0.7 (#974) 2023-11-02 20:52:56 -04:00
Jeffrey Morgan
06589a3b30 Set NumKeep to 4 by default (#982) 2023-11-02 17:26:11 -07:00
Michael Yang
1fd511e661 Merge pull request #975 from jmorganca/mxyng/downloads
update downloads to use retry wrapper
2023-11-02 16:12:48 -07:00
Michael Yang
c01bbe94fd Merge pull request #979 from jmorganca/mxyng/num-keep
update default NumKeep
2023-11-02 15:48:44 -07:00
Jeffrey Morgan
1beb5645a9 only use system prompt if context is not provided (#978) 2023-11-02 15:48:02 -07:00
Michael Yang
6db3691b8f update default NumKeep 2023-11-02 15:47:35 -07:00
Michael Yang
fe5a872444 fix upload 2023-11-02 13:25:58 -07:00
Michael Yang
d39709260f download with retry 2023-11-02 13:16:11 -07:00
Michael Yang
60bb3c03a1 use http.Method 2023-11-02 13:12:45 -07:00
Jeffrey Morgan
2e53704685 default rope params to 0 for new models (#968) 2023-11-02 08:41:30 -07:00
Michael Yang
527f9a7975 Merge pull request #966 from jmorganca/mxyng/fix-log 2023-11-01 17:49:10 -07:00
Michael Yang
c4cc738cbf fix log 2023-11-01 17:18:11 -07:00
Michael Yang
2c6189f4fe Merge pull request #750 from jmorganca/mxyng/concurrent-uploads
concurrent uploads
2023-11-01 15:00:01 -07:00
Michael Yang
dccac8c8fa k8s example 2023-11-01 14:52:58 -07:00
Michael Yang
c05ab9a86e Merge pull request #965 from jmorganca/mxyng/go-mod-tidy
go mod tidy
2023-11-01 11:55:43 -07:00
Michael Yang
f42f3d9b27 go fmt 2023-11-01 11:55:08 -07:00
Michael Yang
341fb7e35f go mod tidy 2023-11-01 11:54:25 -07:00
Michael
f31961637f Update README.md 2023-11-01 12:20:55 -04:00
Michael Yang
ec3614812a Merge pull request #960 from jmorganca/mxyng/fix-tautology 2023-11-01 08:30:49 -07:00
Michael Yang
f14969314a Merge pull request #958 from jmorganca/mxyng/append-ld-library-path 2023-11-01 08:30:38 -07:00
Bruce MacDonald
1fb9288661 notify that the ollama api is available after linux install (#954) 2023-11-01 11:28:26 -04:00
Matt Williams
01a03caa20 Merge pull request #956 from jmorganca/mattw/apidocupdate 2023-10-31 21:43:11 -07:00
Michael Yang
bf6786bb39 fix tautology 2023-10-31 20:49:48 -07:00
Michael Yang
642128b75a append LD_LIBRARY_PATH 2023-10-31 15:54:49 -07:00
Matt Williams
f21bd6210d docs: clarify and clean up API docs
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-31 13:11:33 -07:00
Matt Williams
80362fedce better readme
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-31 12:40:46 -07:00
Matt Williams
5757925060 add a gif
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-31 11:52:01 -07:00
Michael
4512301756 Update README.md 2023-10-31 13:25:36 -04:00
Matt Williams
2236a93efc docs: add examples using bash to compare models
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-31 09:12:39 -07:00
Matt Williams
ad88799411 Merge pull request #949 from jmorganca/matt/fixPrivateGPT
fix: private gpt example was broken due to changes in chroma
2023-10-30 17:17:00 -07:00
Bruce MacDonald
0818b5e318 readline windows terminal support (#950)
- update the readline package to have basic support on windows, this is not full feature parity with the unix cli yet
2023-10-30 16:18:12 -04:00
Matt Williams
1df6100c77 Update examples/langchain-python-rag-privategpt/privateGPT.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-10-30 12:48:17 -07:00
Matt Williams
5c48fe1fb0 Update examples/langchain-python-rag-privategpt/constants.py
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-10-30 12:47:56 -07:00
Dirk Loss
874bb31986 Fix conversion command for gptneox (#948) 2023-10-30 14:34:29 -04:00
Matt Williams
f7856a57eb fix: private gpt example was broken due to changes in chroma
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-30 10:56:25 -07:00
Bruce MacDonald
f9a4281124 clean up: remove server functions from client (#937) 2023-10-30 11:10:18 -04:00
Timothy Jaeryang Baek
96da0792e6 doc: OllamaSharp for .NET moved to libraries 2023-10-28 16:18:38 -05:00
Timothy Jaeryang Baek
95d24262fc doc: categorised community integrations + added web-ui 2023-10-28 16:02:13 -05:00
Jeffrey Morgan
8d03bd7b54 remove +build directive in term.go 2023-10-28 09:56:03 -07:00
Jeffrey Morgan
9ec16f0f03 fix formatting when exiting ollama run 2023-10-27 21:26:23 -07:00
Jeffrey Morgan
57a58db1b0 history: update pos after compact 2023-10-27 20:38:03 -07:00
Jeffrey Morgan
2d75a4537c close input channel when receiving io.EOF 2023-10-27 20:26:04 -07:00
Jeffrey Morgan
4748609611 Don't quit ioloop on NUL character (#940)
* dont quit ioloop on 0 rune

* check for closed channel

* remove unused error on `Close()`
2023-10-27 20:01:48 -07:00
Jeffrey Morgan
c0dcea1398 Update faq.md 2023-10-27 18:29:00 -07:00
Michael Yang
115fc56eb7 calculate and verify md5 checksum 2023-10-27 17:07:33 -07:00
Michael Yang
186f685224 retry PUT 2023-10-27 17:07:33 -07:00
Michael Yang
12efcbb057 comments 2023-10-27 17:07:33 -07:00
Michael Yang
4e09aab8b9 concurrent uploads 2023-10-27 17:07:33 -07:00
Jeffrey Morgan
3a1ed9ff70 restore building runner with AVX on by default (#900) 2023-10-27 12:13:44 -07:00
Bruce MacDonald
6d283882b1 catch insufficient permissions nvidia err (#934) 2023-10-27 12:42:40 -04:00
Bruce MacDonald
5c3491f425 allow for a configurable ollama model storage directory (#897)
* allow for a configurable ollama models directory

- set OLLAMA_MODELS in the environment that ollama is running in to change where model files are stored
- update docs

Co-Authored-By: Jeffrey Morgan <jmorganca@gmail.com>
Co-Authored-By: Jay Nakrani <dhananjaynakrani@gmail.com>
Co-Authored-By: Akhil Acharya <akhilcacharya@gmail.com>
Co-Authored-By: Sasha Devol <sasha.devol@protonmail.com>
2023-10-27 10:19:59 -04:00
James Braza
e5d1ce4dde Tweaks to README.md (#906)
* Mentioned Docker Hub in docs
* Consolidated brew installs to one line
2023-10-27 00:10:23 -07:00
Bruce MacDonald
2665f3c28e offload 75% of available vram to improve stability (#921) 2023-10-26 20:49:55 -04:00
Patrick Devine
a79f030e75 add bracketed paste mode (#922) 2023-10-26 15:57:00 -07:00
Michael Yang
9bc5864a03 Merge pull request #918 from jmorganca/mxyng/fix-out-of-space
fix(download): no retry when out of space
2023-10-26 12:24:20 -07:00
Michael Yang
b88cc0fac9 Merge pull request #916 from jmorganca/mxyng/fix-client-host
fix(client): trim trailing slash
2023-10-26 12:24:12 -07:00
Patrick Devine
5b2cf16397 fix docker build annotations (#917) 2023-10-26 12:00:33 -07:00
Michael Yang
910816a532 fix(download): no retry when out of space 2023-10-26 11:34:07 -07:00
Michael Yang
28c3f288e2 client: fix trailing slash 2023-10-26 11:09:38 -07:00
Patrick Devine
deeac961bb new readline library (#847) 2023-10-25 16:41:18 -07:00
Jeffrey Morgan
49443e7da5 fix typo in README.md 2023-10-25 16:19:27 -07:00
Ajay Kemparaj
bb8464c0d2 update golang.org/x/net fixes CVE-2023-3978,CVE-2023-39325,CVE-2023-44487 (#855) 2023-10-25 16:17:24 -07:00
Michael Yang
daa5bb4473 Merge pull request #907 from jmorganca/mxyng/linux
update linux.md
2023-10-25 15:03:34 -07:00
Michael Yang
92119de9d8 update linux.md 2023-10-25 14:57:50 -07:00
Michael Yang
53b0ba8d43 Merge pull request #893 from jmorganca/mxyng/update-faq
update faq
2023-10-24 16:02:35 -07:00
Michael Yang
db342691f9 Update docs/faq.md
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-10-24 13:59:33 -07:00
Bruce MacDonald
cecf83141e Linux uninstall instructions (#894) 2023-10-24 14:07:05 -04:00
Michael Yang
a5a2adf1ec update faq 2023-10-24 10:54:16 -07:00
Jeffrey Morgan
b0c9cd0f3b fix metal assertion errors 2023-10-24 00:32:36 -07:00
Jeffrey Morgan
77f61c6301 update submodule commit 2023-10-24 00:30:27 -07:00
Jeffrey Morgan
f3604534e5 update submodule commit 2023-10-23 23:59:12 -07:00
Jeffrey Morgan
914428351a Update import.md 2023-10-23 17:44:53 -07:00
Jeffrey Morgan
9afea9e3b9 Update import.md
Separate GGUF and PyTorch guides
2023-10-23 17:42:17 -07:00
Bruce MacDonald
c039432b5c add current user to ollama group on install (#772) 2023-10-23 17:06:31 -04:00
Michael Yang
c345b4ca7c Merge pull request #884 from jmorganca/mxyng/update-submodules
bump submodules
2023-10-23 11:27:38 -07:00
Michael Yang
0c7a00a264 bump submodules
pin to 9e70cc03229df19ca2d28ce23cc817198f897278 for now since
438c2ca83045a00ef244093d27e9ed41a8cb4ea9 is breaking
2023-10-23 11:17:59 -07:00
Michael Yang
36c160f1c3 Merge pull request #881 from jmorganca/mxyng/ggufv3
ggufv3
2023-10-23 10:50:45 -07:00
Michael Yang
b66bcaa582 Merge pull request #883 from jmorganca/mxyng/logs
update default log target
2023-10-23 10:50:29 -07:00
Michael Yang
c9167494cb update default log target 2023-10-23 10:44:50 -07:00
Michael Yang
125d0a013a ggufv3
ggufv3 adds support for big endianness, mainly for s390x architecture.
while that's not currently supported for ollama, the change is simple.

loosen version check to be more forward compatible. unless specified,
gguf versions other v1 will be decoded into v2.
2023-10-23 09:35:49 -07:00
Richard Awoyemi
ba2da6ceaa Added a minimalist React UI for Ollama models to the community contributions.md (#870) 2023-10-23 10:44:39 -04:00
Jeffrey Morgan
ccff9ca09c Update README.md 2023-10-21 11:58:10 -04:00
Jeffrey Morgan
436a5be49c Update README.md 2023-10-21 11:57:32 -04:00
Matt Williams
cc0bf96398 Merge pull request #829 from jmorganca/mattw/example-summarize-news
added python rag news summary
2023-10-20 21:03:16 -07:00
Michael Yang
386169205c update runtime options (#864) 2023-10-20 21:17:14 -04:00
Michael Yang
0d6342a882 Merge pull request #863 from jmorganca/mxyng/nil-pointer
fix: nil pointer dereference
2023-10-20 17:23:37 -07:00
Michael Yang
75bee074b6 fix: nil pointer dereference 2023-10-20 16:55:24 -07:00
Michael Yang
533d76368c Merge pull request #859 from jmorganca/mxyng/fix-hostname
fix: ollama host for hostname
2023-10-20 11:40:56 -07:00
Michael Yang
459f4a7889 fix: ollama host for hostname 2023-10-20 11:32:41 -07:00
Matt Williams
25c63c91d8 Update README.md
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-10-19 13:52:40 -07:00
Jeffrey Morgan
cbfff4f868 update dependencies in app/ 2023-10-19 15:52:41 -04:00
Jeffrey Morgan
7ed5a39bc7 simpler check for model loading compatibility errors 2023-10-19 14:50:49 -04:00
Michael Yang
cc1d03f4ec Merge pull request #841 from jmorganca/mxyng/cleanup-cmd-args 2023-10-19 11:22:40 -07:00
Michael Yang
846f593dbf Merge pull request #828 from jmorganca/mxyng/template-parameters
image: show parameters
2023-10-19 09:31:31 -07:00
Michael Yang
0a53da03fd Merge pull request #843 from jmorganca/mxyng/request-validation
basic request validation
2023-10-19 09:30:45 -07:00
Michael Yang
2ce1793a1d go fmt 2023-10-19 09:21:51 -07:00
Michael Yang
e1c5be24e7 check json eof 2023-10-19 09:21:51 -07:00
Michael Yang
2ad8a074ac generate: set created_at
move the empty response so it's more visible
2023-10-19 09:21:51 -07:00
Michael Yang
7e547c6833 s/message/error/ 2023-10-19 09:21:04 -07:00
Michael Yang
689842b9ff request: bad request when model missing fields 2023-10-19 09:21:04 -07:00
Michael Yang
a19d47642e models: rm workDir from CreateModel
unused after removing EMBED
2023-10-19 09:21:04 -07:00
Jeffrey Morgan
a7dad24d92 add error for falcon and starcoder vocab compatibility (#844)
add error for falcon and starcoder vocab compatibility
---------
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
2023-10-19 12:18:31 -04:00
Jeffrey Morgan
6b213216d5 Update import.md 2023-10-19 12:17:36 -04:00
Bruce MacDonald
fe6f3b48f7 do not reload the running llm when runtime params change (#840)
- only reload the running llm if the model has changed, or the options for loading the running model have changed
- rename loaded llm to runner to differentiate from loaded model image
- remove logic which keeps the first system prompt in the generation context
2023-10-19 10:39:58 -04:00
Michael Yang
36c88cb9db cmd: set ExactArgs 2023-10-18 14:40:48 -07:00
Michael Yang
235e43d7f6 Merge pull request #833 from discovertomorrow/leadingspace
Fix Issue with Leading Whitespaces in Decoded Context
2023-10-18 13:52:48 -07:00
Arne Müller
730996e530 use TrimPrefix instead of TrimLeft 2023-10-18 22:51:30 +02:00
Arne Müller
ce6197a8e0 removed redundant strings.CutPrefix from Decode 2023-10-18 22:47:20 +02:00
Arne Müller
46b9953f32 use strings.TrimLeft to remove spaces 2023-10-18 22:41:19 +02:00
Michael Yang
4dcceeffb7 let the template do the work 2023-10-18 13:12:00 -07:00
Michael Yang
019e4a4558 image: show parameters 2023-10-18 13:12:00 -07:00
Michael Yang
627d04d927 Merge pull request #827 from jmorganca/mxyng/template-adapters
model: native gotemplate adapter template
2023-10-18 13:11:25 -07:00
Michael Yang
940e8ebec3 Merge pull request #826 from jmorganca/mxyng/template-system
show: no template system if empty
2023-10-18 13:11:09 -07:00
Bruce MacDonald
565648f3f7 relay CUDA errors to the client (#825) 2023-10-18 15:36:56 -04:00
Arne Müller
90c49bed57 moved removal of leading space into Predict 2023-10-18 20:08:26 +02:00
Michael Yang
3a2477174f Merge pull request #822 from ggozad/fix-tags-api
Fix /api/tags for no models.
2023-10-18 09:34:00 -07:00
Yiorgis Gozadinos
8c6c2cbc8c When the .ollama folder is broken or there are no models return an empty list on /api/tags 2023-10-18 08:23:20 +02:00
Arne Müller
5dc0cff459 fix whitespace removal 2023-10-18 08:15:27 +02:00
Matt Williams
c5c8b4b16a added python rag news summary
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-17 16:41:28 -07:00
Michael Yang
8299bf76ed model: native gotemplate adapter template 2023-10-17 15:28:38 -07:00
Michael Yang
ee4979e510 show: no template system if empty 2023-10-17 15:25:43 -07:00
Michael Yang
08b0e04f40 Merge pull request #813 from jmorganca/mxyng/llama
refactor llm/llama.go
2023-10-17 14:05:58 -07:00
Michael Yang
b36b0b71f8 use cut prefix 2023-10-17 14:01:39 -07:00
Michael Yang
094df37563 remove unused struct 2023-10-17 14:01:38 -07:00
Bruce MacDonald
f3648fd206 Update llama.cpp gguf to latest (#710) 2023-10-17 16:55:16 -04:00
Bruce MacDonald
bd93a94abd fix MB VRAM log output (#824) 2023-10-17 15:35:16 -04:00
Michael Yang
f55bdb6f10 Merge pull request #799 from deichbewohner/jsonmarshaling
Fix JSON Marshal Escaping for Special Characters
2023-10-17 08:46:02 -07:00
Michael Yang
2870a9bfc8 Merge pull request #812 from jmorganca/mxyng/fix-format-string
fix: wrong format string type
2023-10-17 08:40:49 -07:00
Michael Yang
c031c211d1 Merge pull request #809 from jmorganca/mxyng/fix-gpu
fix: regression unsupported metal types
2023-10-17 08:40:40 -07:00
Andreas Wäscher
68391b0055 Add OllamaSharp for .NET (#811) 2023-10-17 11:31:48 -04:00
Alexander F. Rødseth
b7e137323a Fix a typo (#818) 2023-10-17 09:00:15 -04:00
Arne Müller
8fa3f366ad Removed newline trimming and used buffer directly in POST request. 2023-10-17 08:17:35 +02:00
Michael Yang
fddb303f23 fix: format string wrong type 2023-10-16 16:14:28 -07:00
Michael Yang
ad5ee20c7b Merge pull request #794 from ggozad/add_oterm
Add oterm to community integrations
2023-10-16 15:51:55 -07:00
Michael Yang
785b4eb5bf Merge branch 'main' into add_oterm 2023-10-16 15:51:44 -07:00
Michael Yang
16ede1b30b Merge pull request #801 from s-kostyaev/add-ellama-community-integration
Add ellama community integration
2023-10-16 15:51:25 -07:00
Michael Yang
17d6bbbb2a Merge pull request #810 from vieux/patch-1
Update install.sh
2023-10-16 15:50:57 -07:00
Victor Vieux
6481b7f34c Update install.sh, avoid ARCH: unbound variable 2023-10-16 14:40:24 -07:00
Michael Yang
cb4a80b693 fix: regression unsupported metal types
omitting `--n-gpu-layers` means use metal on macos which isn't correct
since ollama uses `num_gpu=0` to explicitly disable gpu for file types
that are not implemented in metal
2023-10-16 14:37:20 -07:00
Bruce MacDonald
68d7255bd3 show request to server rather than local check (#778) 2023-10-16 17:27:25 -04:00
Michael Yang
9ef2fce33a Merge pull request #768 from jmorganca/mxyng/bytes
fix memory check
2023-10-16 12:42:41 -07:00
Michael Yang
43eaba3d60 Merge pull request #787 from jmorganca/mxyng/server-version2
server: print version on start
2023-10-16 09:59:30 -07:00
Michael Yang
1af493c5a0 server: print version on start 2023-10-16 09:59:14 -07:00
Bruce MacDonald
a0c3e989de deprecate modelfile embed command (#759) 2023-10-16 11:07:37 -04:00
Sergey Kostyaev
7af0fdce48 add ellama community integration 2023-10-16 16:39:10 +07:00
Arne Müller
ee94693b1a handling unescaped json marshaling 2023-10-16 11:15:55 +02:00
Yiorgis Gozadinos
731dbdc1a5 Add oterm to community integrations 2023-10-15 23:21:17 +02:00
Jeffrey Morgan
06bcfbd629 cleanup docker section in readme 2023-10-15 02:33:25 -04:00
Jeffrey Morgan
7d7c2510f8 add docker exec command to readme 2023-10-15 02:31:15 -04:00
Jeffrey Morgan
f9b2f999ac update readme with docker setup and link to import.md 2023-10-15 02:23:03 -04:00
Jeffrey Morgan
c416087339 import.md: formatting and spelling 2023-10-15 01:39:46 -04:00
Jeffrey Morgan
6002cebd2c import.md: convert and quantize docs 2023-10-15 00:11:51 -04:00
Jeffrey Morgan
212bdc541c import.md: model architectures spelling 2023-10-15 00:07:58 -04:00
Jeffrey Morgan
dca6686273 add steps for creating a Modelfile and more example commands to import.md 2023-10-15 00:05:50 -04:00
Jeffrey Morgan
598621afab add push script for docker images 2023-10-14 14:24:39 -04:00
Matt Williams
6479f49c09 Merge pull request #773 from jmorganca/mattw/howtoquant
add how to quantize doc
2023-10-14 08:29:39 -07:00
Matt Williams
b2974a7095 applied mikes comments
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-14 08:29:24 -07:00
Jeffrey Morgan
832b4db9d4 Use correct url for auto updates 2023-10-13 19:04:42 -04:00
Bruce MacDonald
c43873f33b check update response (#785) 2023-10-13 18:05:46 -04:00
Michael Yang
11d82d7b9b update checkvram 2023-10-13 14:47:29 -07:00
Michael Yang
36fe2deebf only check system memory on macos 2023-10-13 14:47:29 -07:00
Michael Yang
4a8931f634 check total (system + video) memory 2023-10-13 14:47:29 -07:00
Michael Yang
bd6e38fb1a refactor memory check 2023-10-13 14:47:29 -07:00
Michael Yang
92189a5855 fix memory check 2023-10-13 14:47:29 -07:00
Michael Yang
d790bf9916 Merge pull request #783 from jmorganca/mxyng/fix-gpu-offloading
fix: offloading on low end GPUs
2023-10-13 14:36:44 -07:00
Michael Yang
35afac099a do not use gpu binary when num_gpu == 0 2023-10-13 14:32:12 -07:00
Michael Yang
811c3d1900 no gpu if vram < 2GB 2023-10-13 14:32:12 -07:00
Bruce MacDonald
3553d10769 check for newer updates (#784)
Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com>
2023-10-13 17:29:46 -04:00
Bruce MacDonald
6fe178134d improve api error handling (#781)
- remove new lines from llama.cpp error messages relayed to client
- check api option types and return error on wrong type
- change num layers from 95% VRAM to 92% VRAM
2023-10-13 16:57:10 -04:00
Matt Williams
3c975f898f update doc to refer to docker image
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-12 15:57:50 -07:00
Matt Williams
9245c8a1df add how to quantize doc
Signed-off-by: Matt Williams <m@technovangelist.com>
2023-10-12 15:34:57 -07:00
105 changed files with 6920 additions and 5459 deletions

View File

@@ -6,3 +6,4 @@ scripts
llm/llama.cpp/ggml
llm/llama.cpp/gguf
.env
.cache

3
.gitignore vendored
View File

@@ -6,3 +6,6 @@
dist
ollama
ggml-metal.metal
.cache
*.exe
.idea

100
README.md
View File

@@ -15,6 +15,10 @@ Get up and running with large language models locally.
[Download](https://ollama.ai/download/Ollama-darwin.zip)
### Windows
Coming soon!
### Linux & WSL2
```
@@ -23,9 +27,9 @@ curl https://ollama.ai/install.sh | sh
[Manual install instructions](https://github.com/jmorganca/ollama/blob/main/docs/linux.md)
### Windows
### Docker
coming soon
The official [Ollama Docker image](https://hub.docker.com/r/ollama/ollama) `ollama/ollama` is available on Docker Hub.
## Quickstart
@@ -43,6 +47,8 @@ Here are some example open-source models that can be downloaded:
| Model | Parameters | Size | Download |
| ------------------ | ---------- | ----- | ------------------------------ |
| Neural Chat | 7B | 4.1GB | `ollama run neural-chat` |
| Starling | 7B | 4.1GB | `ollama run starling-lm` |
| Mistral | 7B | 4.1GB | `ollama run mistral` |
| Llama 2 | 7B | 3.8GB | `ollama run llama2` |
| Code Llama | 7B | 3.8GB | `ollama run codellama` |
@@ -56,11 +62,11 @@ Here are some example open-source models that can be downloaded:
## Customize your own model
### Import from GGUF or GGML
### Import from GGUF
Ollama supports importing GGUF and GGML file formats in the Modelfile. This means if you have a model that is not in the Ollama library, you can create it, iterate on it, and upload it to the Ollama library to share with others when you are ready.
Ollama supports importing GGUF models in the Modelfile:
1. Create a file named Modelfile, and add a `FROM` instruction with the local filepath to the model you want to import.
1. Create a file named `Modelfile`, with a `FROM` instruction with the local filepath to the model you want to import.
```
FROM ./vicuna-33b.Q4_0.gguf
@@ -69,18 +75,22 @@ Ollama supports importing GGUF and GGML file formats in the Modelfile. This mean
2. Create the model in Ollama
```
ollama create name -f path_to_modelfile
ollama create example -f Modelfile
```
3. Run the model
```
ollama run name
ollama run example
```
### Import from PyTorch or Safetensors
See the [guide](docs/import.md) on importing models for more information.
### Customize a prompt
Models from the Ollama library can be customized with a prompt. The example
Models from the Ollama library can be customized with a prompt. For example, to customize the `llama2` model:
```
ollama pull llama2
@@ -151,7 +161,7 @@ I'm a basic program that prints the famous "Hello, world!" message to the consol
### Pass in prompt as arguments
```
$ ollama run llama2 "summarize this file:" "$(cat README.md)"
$ ollama run llama2 "Summarize this file: $(cat README.md)"
Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications.
```
@@ -170,8 +180,7 @@ ollama list
Install `cmake` and `go`:
```
brew install cmake
brew install go
brew install cmake go
```
Then generate dependencies and build:
@@ -195,30 +204,79 @@ Finally, in a separate shell, run a model:
## REST API
> See the [API documentation](docs/api.md) for all endpoints.
Ollama has an API for running and managing models. For example to generate text from a model:
Ollama has a REST API for running and managing models.
For example, to generate text from a model:
```
curl -X POST http://localhost:11434/api/generate -d '{
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt":"Why is the sky blue?"
}'
```
See the [API documentation](./docs/api.md) for all endpoints.
## Community Integrations
### Mobile
- [Mobile Artificial Intelligence Distribution](https://github.com/MaidFoundation/Maid) (Maid)
### Web & Desktop
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
- [Minimalistic React UI for Ollama Models](https://github.com/richawo/minimal-llm-ui)
- [Web UI](https://github.com/ollama-webui/ollama-webui)
- [Ollamac](https://github.com/kevinhermawan/Ollamac)
- [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)
- [Amica](https://github.com/semperai/amica)
### Terminal
- [oterm](https://github.com/ggozad/oterm)
- [Ellama Emacs client](https://github.com/s-kostyaev/ellama)
- [Emacs client](https://github.com/zweifisch/ollama)
- [gen.nvim](https://github.com/David-Kunz/gen.nvim)
- [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
- [gptel Emacs client](https://github.com/karthink/gptel)
- [Oatmeal](https://github.com/dustinblackman/oatmeal)
### Package managers
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
### Libraries
- [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
- [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
- [Ollama-rs for Rust](https://github.com/pepperoni21/ollama-rs)
- [Ollama4j for Java](https://github.com/amithkoujalgi/ollama4j)
- [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
- [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
- [Ollama for Dart](https://github.com/breitburg/dart-ollama)
- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
### Mobile
- [Maid](https://github.com/danemadsen/Maid) (Mobile Artificial Intelligence Distribution)
### Extensions & Plugins
- [Raycast extension](https://github.com/MassimilianoPasquini97/raycast_ollama)
- [Discollama](https://github.com/mxyng/discollama) (Discord bot inside the Ollama discord channel)
- [Continue](https://github.com/continuedev/continue)
- [Obsidian Ollama plugin](https://github.com/hinterdupfinger/obsidian-ollama)
- [Logseq Ollama plugin](https://github.com/omagdy7/ollama-logseq)
- [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
- [LiteLLM](https://github.com/BerriAI/litellm)
- [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
- [Chatbot UI](https://github.com/ivanfioravanti/chatbot-ollama)
- [HTML UI](https://github.com/rtcfirefly/ollama-ui)
- [Typescript UI](https://github.com/ollama-interface/Ollama-Gui?tab=readme-ov-file)
- [Dumbar](https://github.com/JerrySievert/Dumbar)
- [Emacs client](https://github.com/zweifisch/ollama)
- [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
- [Llama Coder](https://github.com/ex3ndr/llama-coder) (Copilot alternative using Ollama)
- [Obsidian BMO Chatbot plugin](https://github.com/longy2k/obsidian-bmo-chatbot)

View File

@@ -5,6 +5,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net"
@@ -14,13 +15,10 @@ import (
"runtime"
"strings"
"github.com/jmorganca/ollama/format"
"github.com/jmorganca/ollama/version"
)
const DefaultHost = "127.0.0.1:11434"
var envHost = os.Getenv("OLLAMA_HOST")
type Client struct {
base *url.URL
http http.Client
@@ -43,16 +41,28 @@ func checkError(resp *http.Response, body []byte) error {
}
func ClientFromEnvironment() (*Client, error) {
defaultPort := "11434"
scheme, hostport, ok := strings.Cut(os.Getenv("OLLAMA_HOST"), "://")
if !ok {
switch {
case !ok:
scheme, hostport = "http", os.Getenv("OLLAMA_HOST")
case scheme == "http":
defaultPort = "80"
case scheme == "https":
defaultPort = "443"
}
// trim trailing slashes
hostport = strings.TrimRight(hostport, "/")
host, port, err := net.SplitHostPort(hostport)
if err != nil {
host, port = "127.0.0.1", "11434"
if ip := net.ParseIP(strings.Trim(os.Getenv("OLLAMA_HOST"), "[]")); ip != nil {
host, port = "127.0.0.1", defaultPort
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
host = ip.String()
} else if hostport != "" {
host = hostport
}
}
@@ -63,7 +73,7 @@ func ClientFromEnvironment() (*Client, error) {
},
}
mockRequest, err := http.NewRequest("HEAD", client.base.String(), nil)
mockRequest, err := http.NewRequest(http.MethodHead, client.base.String(), nil)
if err != nil {
return nil, err
}
@@ -86,11 +96,19 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
var reqBody io.Reader
var data []byte
var err error
if reqData != nil {
switch reqData := reqData.(type) {
case io.Reader:
// reqData is already an io.Reader
reqBody = reqData
case nil:
// noop
default:
data, err = json.Marshal(reqData)
if err != nil {
return err
}
reqBody = bytes.NewReader(data)
}
@@ -127,7 +145,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
return nil
}
const maxBufferSize = 512 * 1000 // 512KB
const maxBufferSize = 512 * format.KiloByte
func (c *Client) stream(ctx context.Context, method, path string, data any, fn func([]byte) error) error {
var buf *bytes.Buffer
@@ -278,3 +296,18 @@ func (c *Client) Heartbeat(ctx context.Context) error {
}
return nil
}
func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
var statusError StatusError
if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
return err
}
if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
return err
}
}
return nil
}

View File

@@ -1,13 +1,17 @@
import os
import json
import requests
import os
import hashlib
import json
from pathlib import Path
BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
# The final response object will include statistics and additional data from the request. Use the callback function to override
# the default handler.
def generate(model_name, prompt, system=None, template=None, context=None, options=None, callback=None):
def generate(model_name, prompt, system=None, template=None, format="", context=None, options=None, callback=None):
try:
url = f"{BASE_URL}/api/generate"
payload = {
@@ -16,7 +20,8 @@ def generate(model_name, prompt, system=None, template=None, context=None, optio
"system": system,
"template": template,
"context": context,
"options": options
"options": options,
"format": format,
}
# Remove keys with None values
@@ -56,30 +61,86 @@ def generate(model_name, prompt, system=None, template=None, context=None, optio
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return None, None
# Create a blob file on the server if it doesn't exist.
def create_blob(digest, file_path):
url = f"{BASE_URL}/api/blobs/{digest}"
# Check if the blob exists
response = requests.head(url)
if response.status_code != 404:
return # Blob already exists, no need to upload
response.raise_for_status()
# Upload the blob
with open(file_path, 'rb') as file_data:
requests.post(url, data=file_data)
# Create a model from a Modelfile. Use the callback function to override the default handler.
def create(model_name, model_path, callback=None):
def create(model_name, filename, callback=None):
try:
file_path = Path(filename).expanduser().resolve()
processed_lines = []
# Read and process the modelfile
with open(file_path, 'r') as f:
for line in f:
# Skip empty or whitespace-only lines
if not line.strip():
continue
command, args = line.split(maxsplit=1)
if command.upper() in ["FROM", "ADAPTER"]:
path = Path(args.strip()).expanduser()
# Check if path is relative and resolve it
if not path.is_absolute():
path = (file_path.parent / path)
# Skip if file does not exist for "model", this is handled by the server
if not path.exists():
processed_lines.append(line)
continue
# Calculate SHA-256 hash
with open(path, 'rb') as bin_file:
hash = hashlib.sha256()
hash.update(bin_file.read())
blob = f"sha256:{hash.hexdigest()}"
# Add the file to the remote server
create_blob(blob, path)
# Replace path with digest in the line
line = f"{command} @{blob}\n"
processed_lines.append(line)
# Combine processed lines back into a single string
modelfile_content = '\n'.join(processed_lines)
url = f"{BASE_URL}/api/create"
payload = {"name": model_name, "path": model_path}
payload = {"name": model_name, "modelfile": modelfile_content}
# Making a POST request with the stream parameter set to True to handle streaming responses
with requests.post(url, json=payload, stream=True) as response:
response.raise_for_status()
# Iterating over the response line by line and displaying the status
for line in response.iter_lines():
if line:
# Parsing each line (JSON chunk) and extracting the status
chunk = json.loads(line)
if callback:
callback(chunk)
else:
print(f"Status: {chunk.get('status')}")
except requests.exceptions.RequestException as e:
except Exception as e:
print(f"An error occurred: {e}")
# Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple
# calls to will share the same download progress. Use the callback function to override the default handler.
def pull(model_name, insecure=False, callback=None):
@@ -221,5 +282,3 @@ def heartbeat():
except requests.exceptions.RequestException as e:
print(f"An error occurred: {e}")
return "Ollama is not running"

43
api/client_test.go Normal file
View File

@@ -0,0 +1,43 @@
package api
import "testing"
func TestClientFromEnvironment(t *testing.T) {
type testCase struct {
value string
expect string
err error
}
testCases := map[string]*testCase{
"empty": {value: "", expect: "http://127.0.0.1:11434"},
"only address": {value: "1.2.3.4", expect: "http://1.2.3.4:11434"},
"only port": {value: ":1234", expect: "http://:1234"},
"address and port": {value: "1.2.3.4:1234", expect: "http://1.2.3.4:1234"},
"scheme http and address": {value: "http://1.2.3.4", expect: "http://1.2.3.4:80"},
"scheme https and address": {value: "https://1.2.3.4", expect: "https://1.2.3.4:443"},
"scheme, address, and port": {value: "https://1.2.3.4:1234", expect: "https://1.2.3.4:1234"},
"hostname": {value: "example.com", expect: "http://example.com:11434"},
"hostname and port": {value: "example.com:1234", expect: "http://example.com:1234"},
"scheme http and hostname": {value: "http://example.com", expect: "http://example.com:80"},
"scheme https and hostname": {value: "https://example.com", expect: "https://example.com:443"},
"scheme, hostname, and port": {value: "https://example.com:1234", expect: "https://example.com:1234"},
"trailing slash": {value: "example.com/", expect: "http://example.com:11434"},
"trailing slash port": {value: "example.com:1234/", expect: "http://example.com:1234"},
}
for k, v := range testCases {
t.Run(k, func(t *testing.T) {
t.Setenv("OLLAMA_HOST", v.value)
client, err := ClientFromEnvironment()
if err != v.err {
t.Fatalf("expected %s, got %s", v.err, err)
}
if client.base.String() != v.expect {
t.Fatalf("expected %s, got %s", v.expect, client.base.String())
}
})
}
}

View File

@@ -3,7 +3,6 @@ package api
import (
"encoding/json"
"fmt"
"log"
"math"
"os"
"reflect"
@@ -38,10 +37,56 @@ type GenerateRequest struct {
Template string `json:"template"`
Context []int `json:"context,omitempty"`
Stream *bool `json:"stream,omitempty"`
Raw bool `json:"raw,omitempty"`
Format string `json:"format"`
Options map[string]interface{} `json:"options"`
}
// Options specfied in GenerateRequest, if you add a new option here add it to the API docs also
type Options struct {
Runner
// Predict options used at runtime
NumKeep int `json:"num_keep,omitempty"`
Seed int `json:"seed,omitempty"`
NumPredict int `json:"num_predict,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"`
TFSZ float32 `json:"tfs_z,omitempty"`
TypicalP float32 `json:"typical_p,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatTau float32 `json:"mirostat_tau,omitempty"`
MirostatEta float32 `json:"mirostat_eta,omitempty"`
PenalizeNewline bool `json:"penalize_newline,omitempty"`
Stop []string `json:"stop,omitempty"`
}
// Runner options which must be set when the model is loaded into memory
type Runner struct {
UseNUMA bool `json:"numa,omitempty"`
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
F16KV bool `json:"f16_kv,omitempty"`
LogitsAll bool `json:"logits_all,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"`
UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
EmbeddingOnly bool `json:"embedding_only,omitempty"`
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
NumThread int `json:"num_thread,omitempty"`
}
type EmbeddingRequest struct {
Model string `json:"model"`
Prompt string `json:"prompt"`
@@ -54,9 +99,10 @@ type EmbeddingResponse struct {
}
type CreateRequest struct {
Name string `json:"name"`
Path string `json:"path"`
Stream *bool `json:"stream,omitempty"`
Name string `json:"name"`
Path string `json:"path"`
Modelfile string `json:"modelfile"`
Stream *bool `json:"stream,omitempty"`
}
type DeleteRequest struct {
@@ -162,49 +208,6 @@ func (r *GenerateResponse) Summary() {
}
}
type Options struct {
Seed int `json:"seed,omitempty"`
// Backend options
UseNUMA bool `json:"numa,omitempty"`
// Model options
NumCtx int `json:"num_ctx,omitempty"`
NumKeep int `json:"num_keep,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGQA int `json:"num_gqa,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
LowVRAM bool `json:"low_vram,omitempty"`
F16KV bool `json:"f16_kv,omitempty"`
LogitsAll bool `json:"logits_all,omitempty"`
VocabOnly bool `json:"vocab_only,omitempty"`
UseMMap bool `json:"use_mmap,omitempty"`
UseMLock bool `json:"use_mlock,omitempty"`
EmbeddingOnly bool `json:"embedding_only,omitempty"`
RopeFrequencyBase float32 `json:"rope_frequency_base,omitempty"`
RopeFrequencyScale float32 `json:"rope_frequency_scale,omitempty"`
// Predict options
NumPredict int `json:"num_predict,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"`
TFSZ float32 `json:"tfs_z,omitempty"`
TypicalP float32 `json:"typical_p,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
Mirostat int `json:"mirostat,omitempty"`
MirostatTau float32 `json:"mirostat_tau,omitempty"`
MirostatEta float32 `json:"mirostat_eta,omitempty"`
PenalizeNewline bool `json:"penalize_newline,omitempty"`
Stop []string `json:"stop,omitempty"`
NumThread int `json:"num_thread,omitempty"`
}
var ErrInvalidOpts = fmt.Errorf("invalid options")
func (opts *Options) FromMap(m map[string]interface{}) error {
@@ -238,44 +241,39 @@ func (opts *Options) FromMap(m map[string]interface{}) error {
// when JSON unmarshals numbers, it uses float64, not int
field.SetInt(int64(t))
default:
log.Printf("could not convert model parameter %v of type %T to int, skipped", key, val)
return fmt.Errorf("option %q must be of type integer", key)
}
case reflect.Bool:
val, ok := val.(bool)
if !ok {
log.Printf("could not convert model parameter %v of type %T to bool, skipped", key, val)
continue
return fmt.Errorf("option %q must be of type boolean", key)
}
field.SetBool(val)
case reflect.Float32:
// JSON unmarshals to float64
val, ok := val.(float64)
if !ok {
log.Printf("could not convert model parameter %v of type %T to float32, skipped", key, val)
continue
return fmt.Errorf("option %q must be of type float32", key)
}
field.SetFloat(val)
case reflect.String:
val, ok := val.(string)
if !ok {
log.Printf("could not convert model parameter %v of type %T to string, skipped", key, val)
continue
return fmt.Errorf("option %q must be of type string", key)
}
field.SetString(val)
case reflect.Slice:
// JSON unmarshals to []interface{}, not []string
val, ok := val.([]interface{})
if !ok {
log.Printf("could not convert model parameter %v of type %T to slice, skipped", key, val)
continue
return fmt.Errorf("option %q must be of type array", key)
}
// convert []interface{} to []string
slice := make([]string, len(val))
for i, item := range val {
str, ok := item.(string)
if !ok {
log.Printf("could not convert model parameter %v of type %T to slice of strings, skipped", key, item)
continue
return fmt.Errorf("option %q must be of an array of strings", key)
}
slice[i] = str
}
@@ -299,7 +297,7 @@ func DefaultOptions() Options {
return Options{
// options set on request to runner
NumPredict: -1,
NumKeep: -1,
NumKeep: 0,
Temperature: 0.8,
TopK: 40,
TopP: 0.9,
@@ -315,20 +313,22 @@ func DefaultOptions() Options {
PenalizeNewline: true,
Seed: -1,
// options set when the model is loaded
NumCtx: 2048,
RopeFrequencyBase: 10000.0,
RopeFrequencyScale: 1.0,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false,
F16KV: true,
UseMLock: false,
UseMMap: true,
UseNUMA: false,
EmbeddingOnly: true,
Runner: Runner{
// options set when the model is loaded
NumCtx: 2048,
RopeFrequencyBase: 10000.0,
RopeFrequencyScale: 1.0,
NumBatch: 512,
NumGPU: -1, // -1 here indicates that NumGPU should be set dynamically
NumGQA: 1,
NumThread: 0, // let the runtime decide
LowVRAM: false,
F16KV: true,
UseMLock: false,
UseMMap: true,
UseNUMA: false,
EmbeddingOnly: true,
},
}
}

View File

@@ -47,16 +47,6 @@ const config: ForgeConfig = {
},
rebuildConfig: {},
makers: [new MakerSquirrel({}), new MakerZIP({}, ['darwin'])],
publishers: [
new PublisherGithub({
repository: {
name: 'ollama',
owner: 'jmorganca',
},
draft: false,
prerelease: true,
}),
],
hooks: {
readPackageJson: async (_, packageJson) => {
return { ...packageJson, version: process.env.VERSION || packageJson.version }

992
app/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -46,7 +46,7 @@
"chmodr": "^1.2.0",
"copy-webpack-plugin": "^11.0.0",
"css-loader": "^6.8.1",
"electron": "25.2.0",
"electron": "25.9.2",
"eslint": "^8.43.0",
"eslint-plugin-import": "^2.27.5",
"fork-ts-checker-webpack-plugin": "^7.3.0",

View File

@@ -162,13 +162,56 @@ app.on('before-quit', () => {
}
})
const updateURL = `https://ollama.ai/api/update?os=${process.platform}&arch=${
process.arch
}&version=${app.getVersion()}&id=${id()}`
let latest = ''
async function isNewReleaseAvailable() {
try {
const response = await fetch(updateURL)
if (!response.ok) {
return false
}
if (response.status === 204) {
return false
}
const data = await response.json()
const url = data?.url
if (!url) {
return false
}
if (latest === url) {
return false
}
latest = url
return true
} catch (error) {
logger.error(`update check failed - ${error}`)
return false
}
}
async function checkUpdate() {
const available = await isNewReleaseAvailable()
if (available) {
logger.info('checking for update')
autoUpdater.checkForUpdates()
}
}
function init() {
if (app.isPackaged) {
autoUpdater.checkForUpdates()
checkUpdate()
setInterval(() => {
if (!updateAvailable) {
autoUpdater.checkForUpdates()
}
checkUpdate()
}, 60 * 60 * 1000)
}
@@ -246,11 +289,7 @@ function id(): string {
return uuid
}
autoUpdater.setFeedURL({
url: `https://ollama.ai/api/update?os=${process.platform}&arch=${
process.arch
}&version=${app.getVersion()}&id=${id()}`,
})
autoUpdater.setFeedURL({ url: updateURL })
autoUpdater.on('error', e => {
logger.error(`update check failed - ${e.message}`)

View File

@@ -1,16 +1,18 @@
package cmd
import (
"bufio"
"bytes"
"context"
"crypto/ed25519"
"crypto/rand"
"crypto/sha256"
"encoding/pem"
"errors"
"fmt"
"io"
"log"
"net"
"net/http"
"os"
"os/exec"
"os/signal"
@@ -20,40 +22,20 @@ import (
"syscall"
"time"
"github.com/dustin/go-humanize"
"github.com/olekukonko/tablewriter"
"github.com/pdevine/readline"
"github.com/spf13/cobra"
"golang.org/x/crypto/ssh"
"golang.org/x/term"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
"github.com/jmorganca/ollama/progressbar"
"github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/progress"
"github.com/jmorganca/ollama/readline"
"github.com/jmorganca/ollama/server"
"github.com/jmorganca/ollama/version"
)
type Painter struct {
IsMultiLine bool
}
func (p Painter) Paint(line []rune, _ int) []rune {
termType := os.Getenv("TERM")
if termType == "xterm-256color" && len(line) == 0 {
var prompt string
if p.IsMultiLine {
prompt = "Use \"\"\" to end multi-line input"
} else {
prompt = "Send a message (/? for help)"
}
return []rune(fmt.Sprintf("\033[38;5;245m%s\033[%dD\033[0m", prompt, len(prompt)))
}
// add a space and a backspace to prevent the cursor from walking up the screen
line = append(line, []rune(" \b")...)
return line
}
func CreateHandler(cmd *cobra.Command, args []string) error {
filename, _ := cmd.Flags().GetString("file")
filename, err := filepath.Abs(filename)
@@ -66,55 +48,95 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
return err
}
var spinner *Spinner
p := progress.NewProgress(os.Stderr)
defer p.Stop()
var currentDigest string
var bar *progressbar.ProgressBar
bars := make(map[string]*progress.Bar)
modelfile, err := os.ReadFile(filename)
if err != nil {
return err
}
commands, err := parser.Parse(bytes.NewReader(modelfile))
if err != nil {
return err
}
home, err := os.UserHomeDir()
if err != nil {
return err
}
status := "transferring model data"
spinner := progress.NewSpinner(status)
p.Add(status, spinner)
for _, c := range commands {
switch c.Name {
case "model", "adapter":
path := c.Args
if path == "~" {
path = home
} else if strings.HasPrefix(path, "~/") {
path = filepath.Join(home, path[2:])
}
if !filepath.IsAbs(path) {
path = filepath.Join(filepath.Dir(filename), path)
}
bin, err := os.Open(path)
if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
continue
} else if err != nil {
return err
}
defer bin.Close()
hash := sha256.New()
if _, err := io.Copy(hash, bin); err != nil {
return err
}
bin.Seek(0, io.SeekStart)
digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
return err
}
modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
}
}
request := api.CreateRequest{Name: args[0], Path: filename}
fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" {
if spinner != nil {
spinner.Stop()
if resp.Digest != "" {
spinner.Stop()
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
currentDigest = resp.Digest
switch {
case strings.Contains(resp.Status, "embeddings"):
bar = progressbar.Default(resp.Total, resp.Status)
bar.Set64(resp.Completed)
default:
// pulling
bar = progressbar.DefaultBytes(
resp.Total,
resp.Status,
)
bar.Set64(resp.Completed)
}
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
if spinner != nil {
spinner.Stop()
}
spinner = NewSpinner(resp.Status)
go spinner.Spin(100 * time.Millisecond)
bar.Set(resp.Completed)
} else if status != resp.Status {
spinner.Stop()
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
if err := client.Create(context.Background(), &request, fn); err != nil {
return err
}
if spinner != nil {
spinner.Stop()
if spinner.description != "success" {
return errors.New("unexpected end to create model")
}
}
return nil
}
@@ -124,19 +146,16 @@ func RunHandler(cmd *cobra.Command, args []string) error {
return err
}
models, err := client.List(context.Background())
if err != nil {
return err
}
canonicalModelPath := server.ParseModelPath(args[0])
for _, model := range models.Models {
if model.Name == canonicalModelPath.GetShortTagname() {
return RunGenerate(cmd, args)
name := args[0]
// check if the model exists on the server
_, err = client.Show(context.Background(), &api.ShowRequest{Name: name})
var statusError api.StatusError
switch {
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
if err := PullHandler(cmd, args); err != nil {
return err
}
}
if err := PullHandler(cmd, args); err != nil {
case err != nil:
return err
}
@@ -154,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
return err
}
var currentDigest string
var bar *progressbar.ProgressBar
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PushRequest{Name: args[0], Insecure: insecure}
fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
resp.Total,
fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
)
if resp.Digest != "" {
if spinner != nil {
spinner.Stop()
}
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.PushRequest{Name: args[0], Insecure: insecure}
if err := client.Push(context.Background(), &request, fn); err != nil {
return err
}
if bar != nil && !bar.IsFinished() {
return errors.New("unexpected end to push model")
}
spinner.Stop()
return nil
}
@@ -202,7 +231,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {
for _, m := range models.Models {
if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
data = append(data, []string{m.Name, m.Digest[:12], humanize.Bytes(uint64(m.Size)), format.HumanTime(m.ModifiedAt, "Never")})
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), format.HumanTime(m.ModifiedAt, "Never")})
}
}
@@ -334,85 +363,108 @@ func PullHandler(cmd *cobra.Command, args []string) error {
return err
}
return pull(args[0], insecure)
}
func pull(model string, insecure bool) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
var currentDigest string
var bar *progressbar.ProgressBar
p := progress.NewProgress(os.Stderr)
defer p.Stop()
bars := make(map[string]*progress.Bar)
var status string
var spinner *progress.Spinner
request := api.PullRequest{Name: model, Insecure: insecure}
fn := func(resp api.ProgressResponse) error {
if resp.Digest != currentDigest && resp.Digest != "" {
currentDigest = resp.Digest
bar = progressbar.DefaultBytes(
resp.Total,
fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
)
if resp.Digest != "" {
if spinner != nil {
spinner.Stop()
}
bar.Set64(resp.Completed)
} else if resp.Digest == currentDigest && resp.Digest != "" {
bar.Set64(resp.Completed)
} else {
currentDigest = ""
fmt.Println(resp.Status)
bar, ok := bars[resp.Digest]
if !ok {
bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
bars[resp.Digest] = bar
p.Add(resp.Digest, bar)
}
bar.Set(resp.Completed)
} else if status != resp.Status {
if spinner != nil {
spinner.Stop()
}
status = resp.Status
spinner = progress.NewSpinner(status)
p.Add(status, spinner)
}
return nil
}
request := api.PullRequest{Name: args[0], Insecure: insecure}
if err := client.Pull(context.Background(), &request, fn); err != nil {
return err
}
if bar != nil && !bar.IsFinished() {
return errors.New("unexpected end to pull model")
}
return nil
}
func RunGenerate(cmd *cobra.Command, args []string) error {
if len(args) > 1 {
// join all args into a single prompt
wordWrap := false
if term.IsTerminal(int(os.Stdout.Fd())) {
wordWrap = true
}
format, err := cmd.Flags().GetString("format")
if err != nil {
return err
}
nowrap, err := cmd.Flags().GetBool("nowordwrap")
prompts := args[1:]
// prepend stdin to the prompt if provided
if !term.IsTerminal(int(os.Stdin.Fd())) {
in, err := io.ReadAll(os.Stdin)
if err != nil {
return err
}
if nowrap {
wordWrap = false
}
return generate(cmd, args[0], strings.Join(args[1:], " "), wordWrap)
prompts = append([]string{string(in)}, prompts...)
}
if readline.IsTerminal(int(os.Stdin.Fd())) {
return generateInteractive(cmd, args[0])
// output is being piped
if !term.IsTerminal(int(os.Stdout.Fd())) {
return generate(cmd, args[0], strings.Join(prompts, " "), false, format)
}
return generateBatch(cmd, args[0])
wordWrap := os.Getenv("TERM") == "xterm-256color"
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
if nowrap {
wordWrap = false
}
// prompts are provided via stdin or args so don't enter interactive mode
if len(prompts) > 0 {
return generate(cmd, args[0], strings.Join(prompts, " "), wordWrap, format)
}
return generateInteractive(cmd, args[0], wordWrap, format)
}
type generateContextKey string
func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format string) error {
client, err := api.ClientFromEnvironment()
if err != nil {
return err
}
spinner := NewSpinner("")
go spinner.Spin(60 * time.Millisecond)
p := progress.NewProgress(os.Stderr)
defer p.StopAndClear()
spinner := progress.NewSpinner("")
p.Add("", spinner)
var latest api.GenerateResponse
@@ -421,7 +473,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
generateContext = []int{}
}
termWidth, _, err := term.GetSize(int(0))
termWidth, _, err := term.GetSize(int(os.Stdout.Fd()))
if err != nil {
wordWrap = false
}
@@ -442,11 +494,9 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
var currentLineLength int
var wordBuffer string
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext}
request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
fn := func(response api.GenerateResponse) error {
if !spinner.IsFinished() {
spinner.Finish()
}
p.StopAndClear()
latest = response
@@ -480,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
if err := client.Generate(cancelCtx, &request, fn); err != nil {
if strings.Contains(err.Error(), "context canceled") && abort {
spinner.Finish()
return nil
}
return err
@@ -513,39 +562,12 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool) error {
return nil
}
func generateInteractive(cmd *cobra.Command, model string) error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
func generateInteractive(cmd *cobra.Command, model string, wordWrap bool, format string) error {
// load the model
if err := generate(cmd, model, "", false); err != nil {
if err := generate(cmd, model, "", false, ""); err != nil {
return err
}
completer := readline.NewPrefixCompleter(
readline.PcItem("/help"),
readline.PcItem("/list"),
readline.PcItem("/set",
readline.PcItem("history"),
readline.PcItem("nohistory"),
readline.PcItem("wordwrap"),
readline.PcItem("nowordwrap"),
readline.PcItem("verbose"),
readline.PcItem("quiet"),
),
readline.PcItem("/show",
readline.PcItem("license"),
readline.PcItem("modelfile"),
readline.PcItem("parameters"),
readline.PcItem("system"),
readline.PcItem("template"),
),
readline.PcItem("/exit"),
readline.PcItem("/bye"),
)
usage := func() {
fmt.Fprintln(os.Stderr, "Available Commands:")
fmt.Fprintln(os.Stderr, " /set Set session variables")
@@ -563,6 +585,8 @@ func generateInteractive(cmd *cobra.Command, model string) error {
fmt.Fprintln(os.Stderr, " /set nohistory Disable history")
fmt.Fprintln(os.Stderr, " /set wordwrap Enable wordwrap")
fmt.Fprintln(os.Stderr, " /set nowordwrap Disable wordwrap")
fmt.Fprintln(os.Stderr, " /set format json Enable JSON mode")
fmt.Fprintln(os.Stderr, " /set noformat Disable formatting")
fmt.Fprintln(os.Stderr, " /set verbose Show LLM stats")
fmt.Fprintln(os.Stderr, " /set quiet Disable LLM stats")
fmt.Fprintln(os.Stderr, "")
@@ -578,74 +602,59 @@ func generateInteractive(cmd *cobra.Command, model string) error {
fmt.Fprintln(os.Stderr, "")
}
var painter Painter
config := readline.Config{
Painter: &painter,
Prompt: ">>> ",
HistoryFile: filepath.Join(home, ".ollama", "history"),
AutoComplete: completer,
}
scanner, err := readline.NewEx(&config)
scanner, err := readline.New(readline.Prompt{
Prompt: ">>> ",
AltPrompt: "... ",
Placeholder: "Send a message (/? for help)",
AltPlaceholder: `Use """ to end multi-line input`,
})
if err != nil {
return err
}
defer scanner.Close()
var wordWrap bool
termType := os.Getenv("TERM")
if termType == "xterm-256color" {
wordWrap = true
}
fmt.Print(readline.StartBracketedPaste)
defer fmt.Printf(readline.EndBracketedPaste)
// override wrapping if the user turned it off
nowrap, err := cmd.Flags().GetBool("nowordwrap")
if err != nil {
return err
}
if nowrap {
wordWrap = false
}
var multiLineBuffer string
var isMultiLine bool
var prompt string
for {
line, err := scanner.Readline()
switch {
case errors.Is(err, io.EOF):
fmt.Println()
return nil
case errors.Is(err, readline.ErrInterrupt):
if line == "" {
fmt.Println("Use Ctrl-D or /bye to exit.")
fmt.Println("\nUse Ctrl-D or /bye to exit.")
}
scanner.Prompt.UseAlt = false
prompt = ""
continue
case err != nil:
return err
}
line = strings.TrimSpace(line)
switch {
case isMultiLine:
if strings.HasSuffix(line, `"""`) {
isMultiLine = false
painter.IsMultiLine = isMultiLine
multiLineBuffer += strings.TrimSuffix(line, `"""`)
line = multiLineBuffer
multiLineBuffer = ""
scanner.SetPrompt(">>> ")
} else {
multiLineBuffer += line + " "
case strings.HasPrefix(prompt, `"""`):
// if the prompt so far starts with """ then we're in multiline mode
// and we need to keep reading until we find a line that ends with """
cut, found := strings.CutSuffix(line, `"""`)
prompt += cut + "\n"
if !found {
continue
}
case strings.HasPrefix(line, `"""`):
isMultiLine = true
painter.IsMultiLine = isMultiLine
multiLineBuffer = strings.TrimPrefix(line, `"""`) + " "
scanner.SetPrompt("... ")
prompt = strings.TrimPrefix(prompt, `"""`)
scanner.Prompt.UseAlt = false
case strings.HasPrefix(line, `"""`) && len(prompt) == 0:
scanner.Prompt.UseAlt = true
prompt += line + "\n"
continue
case scanner.Pasting:
prompt += line + "\n"
continue
case strings.HasPrefix(line, "/list"):
args := strings.Fields(line)
@@ -672,19 +681,16 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case "quiet":
cmd.Flags().Set("verbose", "false")
fmt.Println("Set 'quiet' mode.")
case "mode":
if len(args) > 2 {
switch args[2] {
case "vim":
scanner.SetVimMode(true)
case "emacs", "default":
scanner.SetVimMode(false)
default:
usage()
}
case "format":
if len(args) < 3 || args[2] != "json" {
fmt.Println("Invalid or missing format. For 'json' mode use '/set format json'")
} else {
usage()
format = args[2]
fmt.Printf("Set format to '%s' mode.\n", args[2])
}
case "noformat":
format = ""
fmt.Println("Disabled format.")
default:
fmt.Printf("Unknown command '/set %s'. Type /? for help\n", args[1])
}
@@ -694,7 +700,12 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case strings.HasPrefix(line, "/show"):
args := strings.Fields(line)
if len(args) > 1 {
resp, err := server.GetModelInfo(model)
client, err := api.ClientFromEnvironment()
if err != nil {
fmt.Println("error: couldn't connect to ollama server")
return err
}
resp, err := client.Show(cmd.Context(), &api.ShowRequest{Name: model})
if err != nil {
fmt.Println("error: couldn't get model")
return err
@@ -750,29 +761,21 @@ func generateInteractive(cmd *cobra.Command, model string) error {
case strings.HasPrefix(line, "/"):
args := strings.Fields(line)
fmt.Printf("Unknown command '%s'. Type /? for help\n", args[0])
continue
default:
prompt += line
}
if len(line) > 0 && line[0] != '/' {
if err := generate(cmd, model, line, wordWrap); err != nil {
if len(prompt) > 0 && prompt[0] != '/' {
if err := generate(cmd, model, prompt, wordWrap, format); err != nil {
return err
}
prompt = ""
}
}
}
func generateBatch(cmd *cobra.Command, model string) error {
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
prompt := scanner.Text()
fmt.Printf(">>> %s\n", prompt)
if err := generate(cmd, model, prompt, false); err != nil {
return err
}
}
return nil
}
func RunServer(cmd *cobra.Command, _ []string) error {
host, port, err := net.SplitHostPort(os.Getenv("OLLAMA_HOST"))
if err != nil {
@@ -796,21 +799,6 @@ func RunServer(cmd *cobra.Command, _ []string) error {
origins = strings.Split(o, ",")
}
if noprune := os.Getenv("OLLAMA_NOPRUNE"); noprune == "" {
if err := server.PruneLayers(); err != nil {
return err
}
manifestsPath, err := server.GetManifestPath()
if err != nil {
return err
}
if err := server.PruneDirectory(manifestsPath); err != nil {
return err
}
}
return server.Serve(ln, origins)
}
@@ -933,7 +921,7 @@ func NewCLI() *cobra.Command {
createCmd := &cobra.Command{
Use: "create MODEL",
Short: "Create a model from a Modelfile",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: CreateHandler,
}
@@ -943,7 +931,7 @@ func NewCLI() *cobra.Command {
showCmd := &cobra.Command{
Use: "show MODEL",
Short: "Show information for a model",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: ShowHandler,
}
@@ -965,18 +953,20 @@ func NewCLI() *cobra.Command {
runCmd.Flags().Bool("verbose", false, "Show timings for response")
runCmd.Flags().Bool("insecure", false, "Use an insecure registry")
runCmd.Flags().Bool("nowordwrap", false, "Don't wrap words to the next line automatically")
runCmd.Flags().String("format", "", "Response format (e.g. json)")
serveCmd := &cobra.Command{
Use: "serve",
Aliases: []string{"start"},
Short: "Start ollama",
Args: cobra.ExactArgs(0),
RunE: RunServer,
}
pullCmd := &cobra.Command{
Use: "pull MODEL",
Short: "Pull a model from a registry",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: PullHandler,
}
@@ -986,7 +976,7 @@ func NewCLI() *cobra.Command {
pushCmd := &cobra.Command{
Use: "push MODEL",
Short: "Push a model to a registry",
Args: cobra.MinimumNArgs(1),
Args: cobra.ExactArgs(1),
PreRunE: checkServerHeartbeat,
RunE: PushHandler,
}
@@ -1002,15 +992,15 @@ func NewCLI() *cobra.Command {
}
copyCmd := &cobra.Command{
Use: "cp",
Use: "cp SOURCE TARGET",
Short: "Copy a model",
Args: cobra.MinimumNArgs(2),
Args: cobra.ExactArgs(2),
PreRunE: checkServerHeartbeat,
RunE: CopyHandler,
}
deleteCmd := &cobra.Command{
Use: "rm",
Use: "rm MODEL [MODEL...]",
Short: "Remove a model",
Args: cobra.MinimumNArgs(1),
PreRunE: checkServerHeartbeat,

View File

@@ -1,44 +0,0 @@
package cmd
import (
"fmt"
"os"
"time"
"github.com/jmorganca/ollama/progressbar"
)
type Spinner struct {
description string
*progressbar.ProgressBar
}
func NewSpinner(description string) *Spinner {
return &Spinner{
description: description,
ProgressBar: progressbar.NewOptions(-1,
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionThrottle(60*time.Millisecond),
progressbar.OptionSpinnerType(14),
progressbar.OptionSetRenderBlankState(true),
progressbar.OptionSetElapsedTime(false),
progressbar.OptionClearOnFinish(),
progressbar.OptionSetDescription(description),
),
}
}
func (s *Spinner) Spin(tick time.Duration) {
for range time.Tick(tick) {
if s.IsFinished() {
break
}
s.Add(1)
}
}
func (s *Spinner) Stop() {
s.Finish()
fmt.Println(s.description)
}

View File

@@ -41,28 +41,38 @@ Generate a response for a given prompt with a provided model. This is a streamin
Advanced parameters (optional):
- `format`: the format to return a response in. Currently the only accepted value is `json`
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
- `system`: system prompt to (overrides what is defined in the `Modelfile`)
- `template`: the full prompt or prompt template (overrides what is defined in the `Modelfile`)
- `context`: the context parameter returned from a previous request to `/generate`, this can be used to keep a short conversational memory
- `stream`: if `false` the response will be be returned as a single response object, rather than a stream of objects
- `stream`: if `false` the response will be returned as a single response object, rather than a stream of objects
- `raw`: if `true` no formatting will be applied to the prompt and no context will be returned. You may choose to use the `raw` parameter if you are specifying a full templated prompt in your request to the API, and are managing history yourself.
### Request
### JSON mode
Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.
### Examples
#### Request
```shell
curl -X POST http://localhost:11434/api/generate -d '{
"model": "llama2:7b",
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "Why is the sky blue?"
}'
```
### Response
#### Response
A stream of JSON objects:
A stream of JSON objects is returned:
```json
{
"model": "llama2:7b",
"model": "llama2",
"created_at": "2023-08-04T08:52:19.385406455-07:00",
"response": "The",
"done": false
@@ -86,7 +96,7 @@ To calculate how fast the response is generated in tokens per second (token/s),
```json
{
"model": "llama2:7b",
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "",
"context": [1, 2, 3],
@@ -102,30 +112,209 @@ To calculate how fast the response is generated in tokens per second (token/s),
}
```
#### Request (No streaming)
```shell
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "Why is the sky blue?",
"stream": false
}'
```
#### Response
If `stream` is set to `false`, the response will be a single JSON object:
```json
{
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.",
"context": [1, 2, 3],
"done": true,
"total_duration": 5589157167,
"load_duration": 3013701500,
"sample_count": 114,
"sample_duration": 81442000,
"prompt_eval_count": 46,
"prompt_eval_duration": 1160282000,
"eval_count": 13,
"eval_duration": 1325948000
}
```
#### Request (Raw mode)
In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.
```shell
curl http://localhost:11434/api/generate -d '{
"model": "mistral",
"prompt": "[INST] why is the sky blue? [/INST]",
"raw": true,
"stream": false
}'
```
#### Response
```json
{
"model": "mistral",
"created_at": "2023-11-03T15:36:02.583064Z",
"response": " The sky appears blue because of a phenomenon called Rayleigh scattering.",
"done": true,
"total_duration": 14648695333,
"load_duration": 3302671417,
"prompt_eval_count": 14,
"prompt_eval_duration": 286243000,
"eval_count": 129,
"eval_duration": 10931424000
}
```
#### Request (JSON mode)
```shell
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "What color is the sky at different times of the day? Respond using JSON",
"format": "json",
"stream": false
}'
```
#### Response
```json
{
"model": "llama2",
"created_at": "2023-11-09T21:07:55.186497Z",
"response": "{\n\"morning\": {\n\"color\": \"blue\"\n},\n\"noon\": {\n\"color\": \"blue-gray\"\n},\n\"afternoon\": {\n\"color\": \"warm gray\"\n},\n\"evening\": {\n\"color\": \"orange\"\n}\n}\n",
"done": true,
"total_duration": 4661289125,
"load_duration": 1714434500,
"prompt_eval_count": 36,
"prompt_eval_duration": 264132000,
"eval_count": 75,
"eval_duration": 2112149000
}
```
The value of `response` will be a string containing JSON similar to:
```json
{
"morning": {
"color": "blue"
},
"noon": {
"color": "blue-gray"
},
"afternoon": {
"color": "warm gray"
},
"evening": {
"color": "orange"
}
}
```
#### Request (With options)
If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.
```shell
curl http://localhost:11434/api/generate -d '{
"model": "llama2",
"prompt": "Why is the sky blue?",
"stream": false,
"options": {
"num_keep": 5,
"seed": 42,
"num_predict": 100,
"top_k": 20,
"top_p": 0.9,
"tfs_z": 0.5,
"typical_p": 0.7,
"repeat_last_n": 33,
"temperature": 0.8,
"repeat_penalty": 1.2,
"presence_penalty": 1.5,
"frequency_penalty": 1.0,
"mirostat": 1,
"mirostat_tau": 0.8,
"mirostat_eta": 0.6,
"penalize_newline": true,
"stop": ["\n", "user:"],
"numa": false,
"num_ctx": 4,
"num_batch": 2,
"num_gqa": 1,
"num_gpu": 1,
"main_gpu": 0,
"low_vram": false,
"f16_kv": true,
"logits_all": false,
"vocab_only": false,
"use_mmap": true,
"use_mlock": false,
"embedding_only": false,
"rope_frequency_base": 1.1,
"rope_frequency_scale": 0.8,
"num_thread": 8
}
}'
```
#### Response
```json
{
"model": "llama2",
"created_at": "2023-08-04T19:22:45.499127Z",
"response": "The sky is blue because it is the color of the sky.",
"context": [1, 2, 3],
"done": true,
"total_duration": 5589157167,
"load_duration": 3013701500,
"sample_count": 114,
"sample_duration": 81442000,
"prompt_eval_count": 46,
"prompt_eval_duration": 1160282000,
"eval_count": 13,
"eval_duration": 1325948000
}
```
## Create a Model
```shell
POST /api/create
```
Create a model from a [`Modelfile`](./modelfile.md)
Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response.
### Parameters
- `name`: name of the model to create
- `path`: path to the Modelfile
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
- `modelfile` (optional): contents of the Modelfile
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
- `path` (optional): path to the Modelfile
### Request
### Examples
#### Request
```shell
curl -X POST http://localhost:11434/api/create -d '{
curl http://localhost:11434/api/create -d '{
"name": "mario",
"path": "~/Modelfile"
"modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
}'
```
### Response
#### Response
A stream of JSON objects. When finished, `status` is `success`.
@@ -135,6 +324,54 @@ A stream of JSON objects. When finished, `status` is `success`.
}
```
### Check if a Blob Exists
```shell
HEAD /api/blobs/:digest
```
Check if a blob is known to the server.
#### Query Parameters
- `digest`: the SHA256 digest of the blob
#### Examples
##### Request
```shell
curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
```
##### Response
Return 200 OK if the blob exists, 404 Not Found if it does not.
### Create a Blob
```shell
POST /api/blobs/:digest
```
Create a blob from a file. Returns the server file path.
#### Query Parameters
- `digest`: the expected SHA256 digest of the file
#### Examples
##### Request
```shell
curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
```
##### Response
Return 201 Created if the blob was successfully created.
## List Local Models
```shell
@@ -143,19 +380,23 @@ GET /api/tags
List models that are available locally.
### Request
### Examples
#### Request
```shell
curl http://localhost:11434/api/tags
```
### Response
#### Response
A single JSON object will be returned.
```json
{
"models": [
{
"name": "llama2:7b",
"name": "llama2",
"modified_at": "2023-08-02T17:02:23.713454393-07:00",
"size": 3791730596
},
@@ -180,15 +421,17 @@ Show details about a model including modelfile, template, parameters, license, a
- `name`: name of the model to show
### Request
### Examples
#### Request
```shell
curl http://localhost:11434/api/show -d '{
"name": "llama2:7b"
"name": "llama2"
}'
```
### Response
#### Response
```json
{
@@ -207,15 +450,21 @@ POST /api/copy
Copy a model. Creates a model with another name from an existing model.
### Request
### Examples
#### Request
```shell
curl http://localhost:11434/api/copy -d '{
"source": "llama2:7b",
"source": "llama2",
"destination": "llama2-backup"
}'
```
#### Response
The only response is a 200 OK if successful.
## Delete a Model
```shell
@@ -226,9 +475,11 @@ Delete a model and its data.
### Parameters
- `model`: model name to delete
- `name`: model name to delete
### Request
### Examples
#### Request
```shell
curl -X DELETE http://localhost:11434/api/delete -d '{
@@ -236,6 +487,10 @@ curl -X DELETE http://localhost:11434/api/delete -d '{
}'
```
#### Response
If successful, the only response is a 200 OK.
## Pull a Model
```shell
@@ -248,23 +503,63 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
- `name`: name of the model to pull
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pulling from your own library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
### Request
### Examples
#### Request
```shell
curl -X POST http://localhost:11434/api/pull -d '{
"name": "llama2:7b"
curl http://localhost:11434/api/pull -d '{
"name": "llama2"
}'
```
### Response
#### Response
If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:
The first object is the manifest:
```json
{
"status": "pulling manifest"
}
```
Then there is a series of downloading responses. Until any of the download is completed, the `completed` key may not be included. The number of files to be downloaded depends on the number of layers specified in the manifest.
```json
{
"status": "downloading digestname",
"digest": "digestname",
"total": 2142590208
"total": 2142590208,
"completed": 241970
}
```
After all the files are downloaded, the final responses are:
```json
{
"status": "verifying sha256 digest"
}
{
"status": "writing manifest"
}
{
"status": "removing any unused layers"
}
{
"status": "success"
}
```
if `stream` is set to false, then the response is a single JSON object:
```json
{
"status": "success"
}
```
@@ -280,19 +575,21 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
- `name`: name of the model to push in the form of `<namespace>/<model>:<tag>`
- `insecure`: (optional) allow insecure connections to the library. Only use this if you are pushing to your library during development.
- `stream`: (optional) if `false` the response will be be returned as a single response object, rather than a stream of objects
- `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
### Request
### Examples
#### Request
```shell
curl -X POST http://localhost:11434/api/push -d '{
curl http://localhost:11434/api/push -d '{
"name": "mattw/pygmalion:latest"
}'
```
### Response
#### Response
Streaming response that starts with:
If `stream` is not specified, or set to `true`, a stream of JSON objects is returned:
```json
{ "status": "retrieving manifest" }
@@ -325,6 +622,12 @@ Finally, when the upload is complete:
{"status":"success"}
```
If `stream` is set to `false`, then the response is a single JSON object:
```json
{ "status": "success" }
```
## Generate Embeddings
```shell
@@ -342,20 +645,22 @@ Advanced parameters:
- `options`: additional model parameters listed in the documentation for the [Modelfile](./modelfile.md#valid-parameters-and-values) such as `temperature`
### Request
### Examples
#### Request
```shell
curl -X POST http://localhost:11434/api/embeddings -d '{
"model": "llama2:7b",
curl http://localhost:11434/api/embeddings -d '{
"model": "llama2",
"prompt": "Here is an article about llamas..."
}'
```
### Response
#### Response
```json
{
"embeddings": [
"embedding": [
0.5670403838157654, 0.009260174818336964, 0.23178744316101074, -0.2916173040866852, -0.8924556970596313,
0.8785552978515625, -0.34576427936553955, 0.5742510557174683, -0.04222835972905159, -0.137906014919281
]

View File

@@ -1,18 +1,156 @@
# FAQ
## How can I expose the Ollama server?
## How can I view the logs?
On macOS:
```
cat ~/.ollama/logs/server.log
```
On Linux:
```
journalctl -u ollama
```
If you're running `ollama serve` directly, the logs will be printed to the console.
## How can I expose Ollama on my network?
Ollama binds to 127.0.0.1 port 11434 by default. Change the bind address with the `OLLAMA_HOST` environment variable.
On macOS:
```bash
OLLAMA_HOST=0.0.0.0:11435 ollama serve
```
By default, Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0`. To support more origins, you can use the `OLLAMA_ORIGINS` environment variable:
On Linux:
Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST`
```bash
mkdir -p /etc/systemd/system/ollama.service.d
echo '[Service]' >>/etc/systemd/system/ollama.service.d/environment.conf
```
```bash
echo 'Environment="OLLAMA_HOST=0.0.0.0:11434"' >>/etc/systemd/system/ollama.service.d/environment.conf
```
Reload `systemd` and restart Ollama:
```bash
systemctl daemon-reload
systemctl restart ollama
```
## How can I allow additional web origins to access Ollama?
Ollama allows cross origin requests from `127.0.0.1` and `0.0.0.0` by default. Add additional origins with the `OLLAMA_ORIGINS` environment variable:
On macOS:
```bash
OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
```
On Linux:
```bash
echo 'Environment="OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
```
Reload `systemd` and restart Ollama:
```bash
systemctl daemon-reload
systemctl restart ollama
```
## Where are models stored?
* macOS: Raw model data is stored under `~/.ollama/models`.
* Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`
- macOS: Raw model data is stored under `~/.ollama/models`.
- Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`
Below the models directory you will find a structure similar to the following:
```shell
.
├── blobs
└── manifests
└── registry.ollama.ai
├── f0rodo
├── library
├── mattw
└── saikatkumardey
```
There is a `manifests/registry.ollama.ai/namespace` path. In example above, the user has downloaded models from the official `library`, `f0rodo`, `mattw`, and `saikatkumardey` namespaces. Within each of those directories, you will find directories for each of the models downloaded. And in there you will find a file name representing each tag. Each tag file is the manifest for the model.
The manifest lists all the layers used in this model. You will see a `media type` for each layer, along with a digest. That digest corresponds with a file in the `models/blobs directory`.
### How can I change where Ollama stores models?
To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service.
## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
No. Anything you do with Ollama, such as generate a response from the model, stays with you. We don't collect any data about how you use the model. You are always in control of your own data.
## How can I use Ollama in Visual Studio Code?
There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. You can see the list of [extensions & plugins](https://github.com/jmorganca/ollama#extensions--plugins) at the bottom of the main repository readme.
## How do I use Ollama behind a proxy?
Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values.
When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate.
On macOS:
```bash
HTTPS_PROXY=http://proxy.example.com ollama serve
```
On Linux:
```bash
echo 'Environment="HTTPS_PROXY=https://proxy.example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
```
Reload `systemd` and restart Ollama:
```bash
systemctl daemon-reload
systemctl restart ollama
```
### How do I use Ollama behind a proxy in Docker?
The Ollama Docker container image can be configured to use a proxy by passing `-e HTTPS_PROXY=https://proxy.example.com` when starting the container.
Alternatively, Docker daemon can be configured to use a proxy. Instructions are available for Docker Desktop on [macOS](https://docs.docker.com/desktop/settings/mac/#proxies), [Windows](https://docs.docker.com/desktop/settings/windows/#proxies), and [Linux](https://docs.docker.com/desktop/settings/linux/#proxies), and Docker [daemon with systemd](https://docs.docker.com/config/daemon/systemd/#httphttps-proxy).
Ensure the certificate is installed as a system certificate when using HTTPS. This may require a new Docker image when using a self-signed certificate.
```dockerfile
FROM ollama/ollama
COPY my-ca.pem /usr/local/share/ca-certificates/my-ca.crt
RUN update-ca-certificates
```
Build and run this image:
```shell
docker build -t ollama-with-ca .
docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
```
## How do I use Ollama with GPU acceleration in Docker?
The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.

198
docs/import.md Normal file
View File

@@ -0,0 +1,198 @@
# Import a model
This guide walks through importing a GGUF, PyTorch or Safetensors model.
## Importing (GGUF)
### Step 1: Write a `Modelfile`
Start by creating a `Modelfile`. This file is the blueprint for your model, specifying weights, parameters, prompt templates and more.
```
FROM ./mistral-7b-v0.1.Q4_0.gguf
```
(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
```
FROM ./q4_0.bin
TEMPLATE "[INST] {{ .Prompt }} [/INST]"
```
### Step 2: Create the Ollama model
Finally, create a model from your `Modelfile`:
```
ollama create example -f Modelfile
```
### Step 3: Run your model
Next, test the model with `ollama run`:
```
ollama run example "What is your favourite condiment?"
```
## Importing (PyTorch & Safetensors)
### Supported models
Ollama supports a set of model architectures, with support for more coming soon:
- Llama & Mistral
- Falcon & RW
- GPT-NeoX
- BigCode
To view a model's architecture, check the `config.json` file in its HuggingFace repo. You should see an entry under `architectures` (e.g. `LlamaForCausalLM`).
### Step 1: Clone the HuggingFace repository (optional)
If the model is currently hosted in a HuggingFace repository, first clone that repository to download the raw model.
```
git lfs install
git clone https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1
cd Mistral-7B-Instruct-v0.1
```
### Step 2: Convert and quantize to a `.bin` file (optional, for PyTorch and Safetensors)
If the model is in PyTorch or Safetensors format, a [Docker image](https://hub.docker.com/r/ollama/quantize) with the tooling required to convert and quantize models is available.
First, Install [Docker](https://www.docker.com/get-started/).
Next, to convert and quantize your model, run:
```
docker run --rm -v .:/model ollama/quantize -q q4_0 /model
```
This will output two files into the directory:
- `f16.bin`: the model converted to GGUF
- `q4_0.bin` the model quantized to a 4-bit quantization (we will use this file to create the Ollama model)
### Step 3: Write a `Modelfile`
Next, create a `Modelfile` for your model:
```
FROM ./q4_0.bin
```
(Optional) many chat models require a prompt template in order to answer correctly. A default prompt template can be specified with the `TEMPLATE` instruction in the `Modelfile`:
```
FROM ./q4_0.bin
TEMPLATE "[INST] {{ .Prompt }} [/INST]"
```
### Step 4: Create the Ollama model
Finally, create a model from your `Modelfile`:
```
ollama create example -f Modelfile
```
### Step 5: Run your model
Next, test the model with `ollama run`:
```
ollama run example "What is your favourite condiment?"
```
## Publishing your model (optional early alpha)
Publishing models is in early alpha. If you'd like to publish your model to share with others, follow these steps:
1. Create [an account](https://ollama.ai/signup)
2. Run `cat ~/.ollama/id_ed25519.pub` to view your Ollama public key. Copy this to the clipboard.
3. Add your public key to your [Ollama account](https://ollama.ai/settings/keys)
Next, copy your model to your username's namespace:
```
ollama cp example <your username>/example
```
Then push the model:
```
ollama push <your username>/example
```
After publishing, your model will be available at `https://ollama.ai/<your username>/example`.
## Quantization reference
The quantization options are as follow (from highest highest to lowest levels of quantization). Note: some architectures such as Falcon do not support K quants.
- `q2_K`
- `q3_K`
- `q3_K_S`
- `q3_K_M`
- `q3_K_L`
- `q4_0` (recommended)
- `q4_1`
- `q4_K`
- `q4_K_S`
- `q4_K_M`
- `q5_0`
- `q5_1`
- `q5_K`
- `q5_K_S`
- `q5_K_M`
- `q6_K`
- `q8_0`
## Manually converting & quantizing models
### Prerequisites
Start by cloning the `llama.cpp` repo to your machine in another directory:
```
git clone https://github.com/ggerganov/llama.cpp.git
cd llama.cpp
```
Next, install the Python dependencies:
```
pip install -r requirements.txt
```
Finally, build the `quantize` tool:
```
make quantize
```
### Convert the model
Run the correct conversion script for your model architecture:
```shell
# LlamaForCausalLM or MistralForCausalLM
python convert.py <path to model directory>
# FalconForCausalLM
python convert-falcon-hf-to-gguf.py <path to model directory>
# GPTNeoXForCausalLM
python convert-gptneox-hf-to-gguf.py <path to model directory>
# GPTBigCodeForCausalLM
python convert-starcoder-hf-to-gguf.py <path to model directory>
```
### Quantize the model
```
quantize <path to model dir>/ggml-model-f32.bin <path to model dir>/q4_0.bin q4_0
```

View File

@@ -1,12 +1,16 @@
# Installing Ollama on Linux
# Ollama on Linux
> Note: A one line installer for Ollama is available by running:
## Install
Install Ollama running this one-liner:
>
> ```bash
> curl https://ollama.ai/install.sh | sh
> ```
```bash
curl https://ollama.ai/install.sh | sh
```
## Download the `ollama` binary
## Manual install
### Download the `ollama` binary
Ollama is distributed as a self-contained binary. Download it to a directory in your PATH:
@@ -15,31 +19,7 @@ sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
```
## Start Ollama
Start Ollama by running `ollama serve`:
```bash
ollama serve
```
Once Ollama is running, run a model in another terminal session:
```bash
ollama run llama2
```
## Install CUDA drivers (optional for Nvidia GPUs)
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
Verify that the drivers are installed by running the following command, which should print details about your GPU:
```bash
nvidia-smi
```
## Adding Ollama as a startup service (optional)
### Adding Ollama as a startup service (recommended)
Create a user for Ollama:
@@ -60,7 +40,6 @@ User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="HOME=/usr/share/ollama"
[Install]
WantedBy=default.target
@@ -73,7 +52,40 @@ sudo systemctl daemon-reload
sudo systemctl enable ollama
```
### Viewing logs
### Install CUDA drivers (optional for Nvidia GPUs)
[Download and install](https://developer.nvidia.com/cuda-downloads) CUDA.
Verify that the drivers are installed by running the following command, which should print details about your GPU:
```bash
nvidia-smi
```
### Start Ollama
Start Ollama using `systemd`:
```bash
sudo systemctl start ollama
```
## Update
Update ollama by running the install script again:
```bash
curl https://ollama.ai/install.sh | sh
```
Or by downloading the ollama binary:
```bash
sudo curl -L https://ollama.ai/download/ollama-linux-amd64 -o /usr/bin/ollama
sudo chmod +x /usr/bin/ollama
```
## Viewing logs
To view logs of Ollama running as a startup service, run:
@@ -81,3 +93,24 @@ To view logs of Ollama running as a startup service, run:
journalctl -u ollama
```
## Uninstall
Remove the ollama service:
```bash
sudo systemctl stop ollama
sudo systemctl disable ollama
sudo rm /etc/systemd/system/ollama.service
```
Remove the ollama binary from your bin directory (either `/usr/local/bin`, `/usr/bin`, or `/bin`):
```bash
sudo rm $(which ollama)
```
Remove the downloaded models and Ollama service user:
```bash
sudo rm -r /usr/share/ollama
sudo userdel ollama
```

View File

@@ -12,7 +12,6 @@ A model file is the blueprint to create and share models with Ollama.
- [FROM (Required)](#from-required)
- [Build from llama2](#build-from-llama2)
- [Build from a bin file](#build-from-a-bin-file)
- [EMBED](#embed)
- [PARAMETER](#parameter)
- [Valid Parameters and Values](#valid-parameters-and-values)
- [TEMPLATE](#template)
@@ -42,6 +41,8 @@ INSTRUCTION arguments
## Examples
### Basic `Modelfile`
An example of a `Modelfile` creating a mario blueprint:
```modelfile
@@ -64,6 +65,35 @@ To use this:
More examples are available in the [examples directory](../examples).
### `Modelfile`s in [ollama.ai/library][1]
There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
- Option 1: view a details page from a model's tags page:
1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
3. Scroll down to "Layers"
- Note: if the [`FROM` instruction](#from-required) is not present,
it means the model was created from a local file
- Option 2: use `ollama show` to print the `Modelfile` like so:
```bash
> ollama show --modelfile llama2:13b
# Modelfile generated by "ollama show"
# To build a new Modelfile based on this one, replace the FROM line with:
# FROM llama2:13b
FROM /root/.ollama/models/blobs/sha256:123abc
TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
{{ end }}{{ .Prompt }} [/INST] """
SYSTEM """"""
PARAMETER stop [INST]
PARAMETER stop [/INST]
PARAMETER stop <<SYS>>
PARAMETER stop <</SYS>>
```
## Instructions
### FROM (Required)
@@ -91,17 +121,6 @@ FROM ./ollama-model.bin
This bin file location should be specified as an absolute path or relative to the `Modelfile` location.
### EMBED
The `EMBED` instruction is used to add embeddings of files to a model. This is useful for adding custom data that the model can reference when generating an answer. Note that currently only text files are supported, formatted with each line as one embedding.
```modelfile
FROM <model name>:<tag>
EMBED <file path>.txt
EMBED <different file path>.txt
EMBED <path to directory>/*.txt
```
### PARAMETER
The `PARAMETER` instruction defines a parameter that can be set when the model is run.
@@ -124,7 +143,8 @@ PARAMETER <parameter> <parametervalue>
| repeat_last_n | Sets how far back for the model to look back to prevent repetition. (Default: 64, 0 = disabled, -1 = num_ctx) | int | repeat_last_n 64 |
| repeat_penalty | Sets how strongly to penalize repetitions. A higher value (e.g., 1.5) will penalize repetitions more strongly, while a lower value (e.g., 0.9) will be more lenient. (Default: 1.1) | float | repeat_penalty 1.1 |
| temperature | The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8) | float | temperature 0.7 |
| stop | Sets the stop sequences to use. | string | stop "AI assistant:" |
| seed | Sets the random number seed to use for generation. Setting this to a specific number will make the model generate the same text for the same prompt. (Default: 0) | int | seed 42 |
| stop | Sets the stop sequences to use. When this pattern is encountered the LLM will stop generating text and return. Multiple stop patterns may be set by specifying multiple separate `stop` parameters in a modelfile. | string | stop "AI assistant:" |
| tfs_z | Tail free sampling is used to reduce the impact of less probable tokens from the output. A higher value (e.g., 2.0) will reduce the impact more, while a value of 1.0 disables this setting. (default: 1) | float | tfs_z 1 |
| num_predict | Maximum number of tokens to predict when generating text. (Default: 128, -1 = infinite generation, -2 = fill context) | int | num_predict 42 |
| top_k | Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40) | int | top_k 40 |
@@ -132,7 +152,7 @@ PARAMETER <parameter> <parametervalue>
### TEMPLATE
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific.
`TEMPLATE` of the full prompt template to be passed into the model. It may include (optionally) a system prompt and a user's prompt. This is used to create a full custom prompt, and syntax may be model specific. You can usually find the template for a given model in the readme for that model.
#### Template Variables
@@ -188,3 +208,5 @@ LICENSE """
- the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
- Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
[1]: https://ollama.ai/library

View File

@@ -4,5 +4,6 @@ Here is a list of ways you can use Ollama with other tools to build interesting
- [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md)
- [Using LangChain with Ollama in Python](./tutorials/langchainpy.md)
- [Running Ollama on NVIDIA Jetson Devices](./tutorials/nvidia-jetson.md)
Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.
Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.

View File

@@ -23,13 +23,17 @@ const answer = await ollama.call(`why is the sky blue?`);
console.log(answer);
```
That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's build that part of the app.
That will get us the same thing as if we ran `ollama run llama2 "why is the sky blue"` in the terminal. But we want to load a document from the web to ask a question against. **Cheerio** is a great library for ingesting a webpage, and **LangChain** uses it in their **CheerioWebBaseLoader**. So let's install **Cheerio** and build that part of the app.
```bash
npm install cheerio
```
```javascript
import { CheerioWebBaseLoader } from "langchain/document_loaders/web/cheerio";
const loader = new CheerioWebBaseLoader("https://en.wikipedia.org/wiki/2023_Hawaii_wildfires");
const data = loader.load();
const data = await loader.load();
```
That will load the document. Although this page is smaller than the Odyssey, it is certainly bigger than the context size for most LLMs. So we are going to need to split into smaller pieces, and then select just the pieces relevant to our question. This is a great use for a vector datastore. In this example, we will use the **MemoryVectorStore** that is part of **LangChain**. But there is one more thing we need to get the content into the datastore. We have to run an embeddings process that converts the tokens in the text into a series of vectors. And for that, we are going to use **Tensorflow**. There is a lot of stuff going on in this one. First, install the **Tensorflow** components that we need.

View File

@@ -0,0 +1,38 @@
# Running Ollama on NVIDIA Jetson Devices
With some minor configuration, Ollama runs well on [NVIDIA Jetson Devices](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/). The following has been tested on [JetPack 5.1.2](https://developer.nvidia.com/embedded/jetpack).
NVIDIA Jetson devices are Linux-based embedded AI computers that are purpose-built for AI applications.
Jetsons have an integrated GPU that is wired directly to the memory controller of the machine. For this reason, the `nvidia-smi` command is unrecognized, and Ollama proceeds to operate in "CPU only"
mode. This can be verified by using a monitoring tool like jtop.
In order to address this, we simply pass the path to the Jetson's pre-installed CUDA libraries into `ollama serve` (while in a tmux session). We then hardcode the num_gpu parameters into a cloned
version of our target model.
Prerequisites:
- curl
- tmux
Here are the steps:
- Install Ollama via standard Linux command (ignore the 404 error): `curl https://ollama.ai/install.sh | sh`
- Stop the Ollama service: `sudo systemctl stop ollama`
- Start Ollama serve in a tmux session called ollama_jetson and reference the CUDA libraries path: `tmux has-session -t ollama_jetson 2>/dev/null || tmux new-session -d -s ollama_jetson
'LD_LIBRARY_PATH=/usr/local/cuda/lib64 ollama serve'`
- Pull the model you want to use (e.g. mistral): `ollama pull mistral`
- Create a new Modelfile specifically for enabling GPU support on the Jetson: `touch ModelfileMistralJetson`
- In the ModelfileMistralJetson file, specify the FROM model and the num_gpu PARAMETER as shown below:
```
FROM mistral
PARAMETER num_gpu 999
```
- Create a new model from your Modelfile: `ollama create mistral-jetson -f ./ModelfileMistralJetson`
- Run the new model: `ollama run mistral-jetson`
If you run a monitoring tool like jtop you should now see that Ollama is using the Jetson's integrated GPU.
And that's it!

View File

@@ -0,0 +1,10 @@
# Bash Shell examples
When calling `ollama`, you can pass it a file to run all the prompts in the file, one after the other:
`ollama run llama2 < sourcequestions.txt`
This concept is used in the following example.
## Compare Models
`comparemodels.sh` is a script that runs all the questions in `sourcequestions.txt` using any 4 models you choose that you have already pulled from the Ollama library or have created locally.

View File

@@ -0,0 +1,64 @@
#! /usr/bin/env bash
# Compare multiple models by running them with the same questions
NUMBEROFCHOICES=4
SELECTIONS=()
declare -a SUMS=()
# Get the list of models
CHOICES=$(ollama list | awk '{print $1}')
# Select which models to run as a comparison
echo "Select $NUMBEROFCHOICES models to compare:"
select ITEM in $CHOICES; do
if [[ -n $ITEM ]]; then
echo "You have selected $ITEM"
SELECTIONS+=("$ITEM")
((COUNT++))
if [[ $COUNT -eq $NUMBEROFCHOICES ]]; then
break
fi
else
echo "Invalid selection"
fi
done
# Loop through each of the selected models
for ITEM in "${SELECTIONS[@]}"; do
echo "--------------------------------------------------------------"
echo "Loading the model $ITEM into memory"
ollama run "$ITEM" ""
echo "--------------------------------------------------------------"
echo "Running the questions through the model $ITEM"
COMMAND_OUTPUT=$(ollama run "$ITEM" --verbose < sourcequestions.txt 2>&1| tee /dev/stderr)
# eval duration is sometimes listed in seconds and sometimes in milliseconds.
# Add up the values for each model
SUM=$(echo "$COMMAND_OUTPUT" | awk '
/eval duration:/ {
value = $3
if (index(value, "ms") > 0) {
gsub("ms", "", value)
value /= 1000
} else {
gsub("s", "", value)
}
sum += value
}
END { print sum }')
SUMS+=("All questions for $ITEM completed in $SUM seconds")
done
echo ""
echo "--------------------------------------------------------------"
echo -e "Sums of eval durations for each run:"
for val in "${SUMS[@]}"; do
echo "$val"
done
echo "--------------------------------------------------------------"
echo "Comparison complete. Now you can decide"
echo "which model is best."
echo "--------------------------------------------------------------"

View File

@@ -0,0 +1,7 @@
Why is the sky blue
What is a black hole
Explain the big bang theory like I am 5?
What is the quickest way to win a game of Monopoly with 3 others?
Why does a vacuum bottle keep my coffee hot and my milkshake cold?
What is the difference between a meteor, a meteorite, and a meteoroid?
Create an array with 5 items and print to the console. Do this in Python, C#, Typescript, and Rust.

View File

@@ -3,10 +3,10 @@ package main
import (
"bytes"
"fmt"
"net/http"
"os"
"io"
"log"
"net/http"
"os"
)
func main() {
@@ -16,7 +16,7 @@ func main() {
if err != nil {
fmt.Print(err.Error())
os.Exit(1)
}
}
responseData, err := io.ReadAll(resp.Body)
if err != nil {

View File

@@ -0,0 +1,5 @@
# Ollama Jupyter Notebook
This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
For best results, use an instance with GPU accelerator.

View File

@@ -0,0 +1,102 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "93f59dcb-c588-41b8-a792-55d88ade739c",
"metadata": {},
"outputs": [],
"source": [
"# Download and run the Ollama Linux install script\n",
"!curl https://ollama.ai/install.sh | sh\n",
"!command -v systemctl >/dev/null && sudo systemctl stop ollama"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "658c147e-c7f8-490e-910e-62b80f577dda",
"metadata": {},
"outputs": [],
"source": [
"!pip install aiohttp pyngrok\n",
"\n",
"import os\n",
"import asyncio\n",
"from aiohttp import ClientSession\n",
"\n",
"# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
"# over the built-in library. This is particularly important for \n",
"# Google Colab which installs older drivers\n",
"os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
"\n",
"async def run(cmd):\n",
" '''\n",
" run is a helper function to run subcommands asynchronously.\n",
" '''\n",
" print('>>> starting', *cmd)\n",
" p = await asyncio.subprocess.create_subprocess_exec(\n",
" *cmd,\n",
" stdout=asyncio.subprocess.PIPE,\n",
" stderr=asyncio.subprocess.PIPE,\n",
" )\n",
"\n",
" async def pipe(lines):\n",
" async for line in lines:\n",
" print(line.strip().decode('utf-8'))\n",
"\n",
" await asyncio.gather(\n",
" pipe(p.stdout),\n",
" pipe(p.stderr),\n",
" )\n",
"\n",
"\n",
"await asyncio.gather(\n",
" run(['ollama', 'serve']),\n",
" run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
")"
]
},
{
"cell_type": "markdown",
"id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
"metadata": {},
"source": [
"The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
"\n",
"```\n",
"t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
"```\n",
"\n",
"The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
"\n",
"```bash\n",
"export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
"ollama list\n",
"ollama run mistral\n",
"```"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,36 @@
# Deploy Ollama to Kubernetes
## Prerequisites
- Ollama: https://ollama.ai/download
- Kubernetes cluster. This example will use Google Kubernetes Engine.
## Steps
1. Create the Ollama namespace, daemon set, and service
```bash
kubectl apply -f cpu.yaml
```
1. Port forward the Ollama service to connect and use it locally
```bash
kubectl -n ollama port-forward service/ollama 11434:80
```
1. Pull and run a model, for example `orca-mini:3b`
```bash
ollama run orca-mini:3b
```
## (Optional) Hardware Acceleration
Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin). Follow the link for more details.
Once configured, create a GPU enabled Ollama deployment.
```bash
kubectl apply -f gpu.yaml
```

View File

@@ -0,0 +1,42 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: ollama
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
namespace: ollama
spec:
selector:
matchLabels:
name: ollama
template:
metadata:
labels:
name: ollama
spec:
containers:
- name: ollama
image: ollama/ollama:latest
ports:
- name: http
containerPort: 11434
protocol: TCP
---
apiVersion: v1
kind: Service
metadata:
name: ollama
namespace: ollama
spec:
type: ClusterIP
selector:
name: ollama
ports:
- port: 80
name: http
targetPort: http
protocol: TCP

View File

@@ -0,0 +1,56 @@
---
apiVersion: v1
kind: Namespace
metadata:
name: ollama
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: ollama
namespace: ollama
spec:
strategy:
type: Recreate
selector:
matchLabels:
name: ollama
template:
metadata:
labels:
name: ollama
spec:
containers:
- name: ollama
image: ollama/ollama:latest
env:
- name: PATH
value: /usr/local/nvidia/bin:/usr/local/nvidia/lib64:/usr/bin:/usr/sbin:/bin:/sbin
- name: LD_LIBRARY_PATH
value: /usr/local/nvidia/lib64
ports:
- name: http
containerPort: 11434
protocol: TCP
resources:
limits:
nvidia.com/gpu: 1
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
---
apiVersion: v1
kind: Service
metadata:
name: ollama
namespace: ollama
spec:
type: ClusterIP
selector:
name: ollama
ports:
- port: 80
name: http
targetPort: http
protocol: TCP

View File

@@ -6,7 +6,6 @@ PERSIST_DIRECTORY = os.environ.get('PERSIST_DIRECTORY', 'db')
# Define the Chroma settings
CHROMA_SETTINGS = Settings(
chroma_db_impl='duckdb+parquet',
persist_directory=PERSIST_DIRECTORY,
anonymized_telemetry=False
)

View File

@@ -150,7 +150,7 @@ def main():
print("Creating new vectorstore")
texts = process_documents()
print(f"Creating embeddings. May take some minutes...")
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory, client_settings=CHROMA_SETTINGS)
db = Chroma.from_documents(texts, embeddings, persist_directory=persist_directory)
db.persist()
db = None

View File

@@ -4,6 +4,7 @@ from langchain.embeddings import HuggingFaceEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.vectorstores import Chroma
from langchain.llms import Ollama
import chromadb
import os
import argparse
import time
@@ -22,7 +23,9 @@ def main():
# Parse the command line arguments
args = parse_arguments()
embeddings = HuggingFaceEmbeddings(model_name=embeddings_model_name)
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings, client_settings=CHROMA_SETTINGS)
db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
retriever = db.as_retriever(search_kwargs={"k": target_source_chunks})
# activate/deactivate the streaming StdOut callback for LLMs
callbacks = [] if args.mute_stream else [StreamingStdOutCallbackHandler()]

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,17 @@
---
Hi matt,
thanks for letting me know that you are going to come today, November 16, for my tea party. My address is 123 Falk St on Bainbridge Island. I live in the house with the red door. I will be home all day so just come by whenever you want.
Fred
---
Great, send the check to our office at 1917 1st St, Seattle, WA 98101. I will let you know when we receive it.
Mark Richardson
Big Corp
---
We are looking forward to seeing you at our Local AI Meetup. It will be held on December 3. It will be at the offices of Enormous Co. Our address is 344 1st Ave, Seattle, WA 98101. We will be meeting in the conference room on the 3rd floor.
Barbara Reilly
Enormous Co.

View File

@@ -0,0 +1,108 @@
import requests
import json
model = "openchat"
def reportEvents(name, date, location):
nameString = name if name else "an event"
dateString = f" on {date}" if date else ""
locationString = f" at {location}" if location else ""
print(f"You have an event: {nameString}{dateString}{locationString}")
def reportAddresses(address):
for field in address:
if field == "city":
city = address["city"]
state = f", {address['state']}" if address["state"] else ""
zip = f" {address['zip']}"
print(f"{city}{state}{zip}\n")
break
else:
print(address[field])
systemPrompt = "You will be given a text along with a prompt and a schema. You will have to extract the information requested in the prompt from the text and generate output in JSON observing the schema provided. If the schema shows a type of integer or number, you must only show a integer for that field. A string should always be a valid string. If a value is unknown, leave it empty. Output the JSON with extra spaces to ensure that it pretty prints."
schema = {
"eventsQuantity": {
"type": "integer",
"description": "The number of events in the source text",
},
"addressesQuantity": {
"type": "integer",
"description": "The number of addresses in the source text",
},
"events": [
{
"name": {"type": "string", "description": "Name of the event"},
"date": {"type": "string", "description": "Date of the event"},
"location": {"type": "string", "description": "Location of the event"},
"extraInfo": {
"type": "string",
"description": "Any extra information that is provided about the event.",
},
}
],
"people": [
{
"name": {"type": "string", "description": "Name of the person"},
"company": {
"type": "string",
"description": "Name of the company where they work",
},
"street": {
"type": "string",
"description": "Street address of the person or company. This is only the street name and the numerical address. Do not include city, state, or zip of the address in this field.",
},
"city": {
"type": "string",
"description": "City portion of the address of the person or company",
},
"state": {
"type": "string",
"description": "State portion of the address of the person or company",
},
"zip": {
"type": "string",
"description": "Zip code of the person or company",
},
"extraInfo": {
"type": "string",
"description": "Any extra information that is provided about the location.",
},
}
],
}
with open("emails.txt") as f:
content=f.read()
prompt = f"The source text is a series of emails that have been put into a single file. They are separated by three dashes. Review the source text and determine the full address of the person sending each of the emails as well as any events that we need to track. If they provide a company address use that. If any extra info is provided, such as a description of the place, or a floor, add it to extraInfo. The first field in the address JSON is quantity of events and should be set to the number of events tracked and the second field should be set to the number of addresses tracked in the file. Don't stuff an event into the output that isn't an event. Only add data to the mostly appropriate field. Don't make up fields that aren't in the schema. If there isn't a value for a field, use null. Output should be in JSON.\n\nSchema: \n{schema}\n\nSource Text:\n{content}"
r = requests.post(
"http://localhost:11434/api/generate",
json={
"model": model,
"system": systemPrompt,
"prompt": prompt,
"format": "json",
"stream": False,
},
)
j = json.loads(r.text)
output = json.loads(j["response"])
events = output["events"]
addresses = output["people"]
print(f"Here are your {output['eventsQuantity']} events:")
for event in events:
reportEvents(event["name"], event["date"], event["location"])
print(f"\n\nHere are your {output['addressesQuantity']} addresses")
for address in addresses:
reportAddresses(address)

View File

@@ -0,0 +1,52 @@
import requests
import json
model = "orca2"
systemprompt = "You will be given a text along with a prompt and a schema. You will have to extract the information requested in the prompt from the text and generate output in JSON observing the schema provided. If the schema shows a type of integer or number, you must only show a integer for that field. A string should always be a valid string. If a value is unknown, leave it empty. Output the JSON with extra spaces to ensure that it pretty prints."
schema = {
"people": [
{
"name": {"type": "string", "description": "Name of the person"},
"title": {"type": "string", "description": "Title of the person"},
}
],
}
# Read the content from the file
words = []
with open("wp.txt") as f:
maxwords = 2000
count = 0
lines = f.readlines()
for line in lines:
for word in line.split(" "):
count += 1
if count > maxwords:
break
words.append(word)
content = ' '.join(words)
# Use the text and schema to set the prompt
prompt = f"Review the source text and determine 10 the most important people to focus on. Then extract the name and title for those people. Output should be in JSON.\n\nSchema: {schema}\n\nSource Text:\n{content}"
# Make the actual request to the model
r = requests.post(
"http://localhost:11434/api/generate",
json={
"model": model,
"system": systemprompt,
"prompt": prompt,
"format": "json",
"stream": False
},
)
# Get the response as JSON.
j = json.loads(r.text)
# Return the result.
print(j["response"])

View File

@@ -0,0 +1,28 @@
# Function calling
![function calling 2023-11-16 16_12_58](https://github.com/jmorganca/ollama/assets/633681/a0acc247-9746-45ab-b325-b65dfbbee4fb)
Function calling in the context of LLM's simply means that the output of the model is formatted in JSON, using a preconfigured schema, and uses the expected types. Then your code can use the output of the model and call functions with it. Using the JSON format in Ollama, you can use any model for function calling.
The two examples provided can extract information out of the provided texts. The first example uses the first couple of chapters from War and Peace by Lev Nikolayevich Tolstoy, and extracts the names and titles of the characters introduced in the story. The second example uses a more complicated schema to pull out addresses and event information from a series of emails.
## Running the examples
1. Clone this repo and navigate to the `examples/python-functioncalling` directory.
2. Install the dependencies with `pip install -r requirements.txt`.
3. Review the `wp.txt` file.
4. Run `python extractwp.py`.
5. Review the `info.txt` file.
6. Run `python extractemail.py`.
## Review the Code
Both examples do roughly the same thing with different source material. They both use the same system prompt, which tells the model to expect some instructions and a schema. Then we inject the schema into the prompt and generate an answer.
The first example, `extractwp.py`, outputs the resulting JSON to the console, listing the characters introduced at the start of War and Peace. The second example, `extractemail.py`, is a bit more complicated, extracting two different types of information: addresses and events. It outputs the results to a JSON blob, then the addresses are handed off to one function called `reportAddresses` and the events are handed off to another function called `reportEvents`.
Notice that both examples are using the model from Intel called `openchat`. This is not a model tuned for function calling, yet it performs very well at this task.
## Next Steps
Try exporting some of your real emails to the input file and seeing how well the model does. Try pointing the first example at other books. You could even have it cycle through all the sections and maybe add up the number of times any character is seen throughout the book, determining the most important characters. You can also try out different models.

View File

@@ -0,0 +1,183 @@
"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist - I really believe he is Antichrist - I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you - sit down and tell me all the news."
It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.
All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:
"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10 - Annette Scherer."
"Heavens! what a virulent attack!" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.
"First of all, dear friend, tell me how you are. Set your friend's mind at rest," said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.
"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?" said Anna Pavlovna. "You are staying the whole evening, I hope?"
"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there," said the prince. "My daughter is coming for me to take me there."
"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome."
"If they had known that you wished it, the entertainment would have been put off," said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.
"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything."
"What can one say about it?" replied the prince in a cold, listless tone. "What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours."
Prince Vasili always spoke languidly, like an actor repeating a stale part. Anna Pavlovna Scherer on the contrary, despite her forty years, overflowed with animation and impulsiveness. To be an enthusiast had become her social vocation and, sometimes even when she did not feel like it, she became enthusiastic in order not to disappoint the expectations of those who knew her. The subdued smile which, though it did not suit her faded features, always played round her lips expressed, as in a spoiled child, a continual consciousness of her charming defect, which she neither wished, nor could, nor considered it necessary, to correct.
In the midst of a conversation on political matters Anna Pavlovna burst out:
"Oh, don't speak to me of Austria. Perhaps I don't understand things, but Austria never has wished, and does not wish, for war. She is betraying us! Russia alone must save Europe. Our gracious sovereign recognizes his high vocation and will be true to it. That is the one thing I have faith in! Our good and wonderful sovereign has to perform the noblest role on earth, and he is so virtuous and noble that God will not forsake him. He will fulfill his vocation and crush the hydra of revolution, which has become more terrible than ever in the person of this murderer and villain! We alone must avenge the blood of the just one.... Whom, I ask you, can we rely on?... England with her commercial spirit will not and cannot understand the Emperor Alexander's loftiness of soul. She has refused to evacuate Malta. She wanted to find, and still seeks, some secret motive in our actions. What answer did Novosiltsev get? None. The English have not understood and cannot understand the self-abnegation of our Emperor who wants nothing for himself, but only desires the good of mankind. And what have they promised? Nothing! And what little they have promised they will not perform! Prussia has always declared that Buonaparte is invincible, and that all Europe is powerless before him.... And I don't believe a word that Hardenburg says, or Haugwitz either. This famous Prussian neutrality is just a trap. I have faith only in God and the lofty destiny of our adored monarch. He will save Europe!"
She suddenly paused, smiling at her own impetuosity.
"I think," said the prince with a smile, "that if you had been sent instead of our dear Wintzingerode you would have captured the King of Prussia's consent by assault. You are so eloquent. Will you give me a cup of tea?"
"In a moment. A propos," she added, becoming calm again, "I am expecting two very interesting men tonight, le Vicomte de Mortemart, who is connected with the Montmorencys through the Rohans, one of the best French families. He is one of the genuine emigres, the good ones. And also the Abbe Morio. Do you know that profound thinker? He has been received by the Emperor. Had you heard?"
"I shall be delighted to meet them," said the prince. "But tell me," he added with studied carelessness as if it had only just occurred to him, though the question he was about to ask was the chief motive of his visit, "is it true that the Dowager Empress wants Baron Funke to be appointed first secretary at Vienna? The baron by all accounts is a poor creature."
Prince Vasili wished to obtain this post for his son, but others were trying through the Dowager Empress Marya Fedorovna to secure it for the baron.
Anna Pavlovna almost closed her eyes to indicate that neither she nor anyone else had a right to criticize what the Empress desired or was pleased with.
"Baron Funke has been recommended to the Dowager Empress by her sister," was all she said, in a dry and mournful tone.
As she named the Empress, Anna Pavlovna's face suddenly assumed an expression of profound and sincere devotion and respect mingled with sadness, and this occurred every time she mentioned her illustrious patroness. She added that Her Majesty had deigned to show Baron Funke beaucoup d'estime, and again her face clouded over with sadness.
The prince was silent and looked indifferent. But, with the womanly and courtierlike quickness and tact habitual to her, Anna Pavlovna wished both to rebuke him (for daring to speak as he had done of a man recommended to the Empress) and at the same time to console him, so she said:
"Now about your family. Do you know that since your daughter came out everyone has been enraptured by her? They say she is amazingly beautiful."
The prince bowed to signify his respect and gratitude.
"I often think," she continued after a short pause, drawing nearer to the prince and smiling amiably at him as if to show that political and social topics were ended and the time had come for intimate conversation - "I often think how unfairly sometimes the joys of life are distributed. Why has fate given you two such splendid children? I don't speak of Anatole, your youngest. I don't like him," she added in a tone admitting of no rejoinder and raising her eyebrows. "Two such charming children. And really you appreciate them less than anyone, and so you don't deserve to have them."
And she smiled her ecstatic smile.
"I can't help it," said the prince. "Lavater would have said I lack the bump of paternity."
"Don't joke; I mean to have a serious talk with you. Do you know I am dissatisfied with your younger son? Between ourselves" (and her face assumed its melancholy expression), "he was mentioned at Her Majesty's and you were pitied...."
The prince answered nothing, but she looked at him significantly, awaiting a reply. He frowned.
"What would you have me do?" he said at last. "You know I did all a father could for their education, and they have both turned out fools. Hippolyte is at least a quiet fool, but Anatole is an active one. That is the only difference between them." He said this smiling in a way more natural and animated than usual, so that the wrinkles round his mouth very clearly revealed something unexpectedly coarse and unpleasant.
"And why are children born to such men as you? If you were not a father there would be nothing I could reproach you with," said Anna Pavlovna, looking up pensively.
"I am your faithful slave and to you alone I can confess that my children are the bane of my life. It is the cross I have to bear. That is how I explain it to myself. It can't be helped!"
He said no more, but expressed his resignation to cruel fate by a gesture. Anna Pavlovna meditated.
"Have you never thought of marrying your prodigal son Anatole?" she asked. "They say old maids have a mania for matchmaking, and though I don't feel that weakness in myself as yet, I know a little person who is very unhappy with her father. She is a relation of yours, Princess Mary Bolkonskaya."
Prince Vasili did not reply, though, with the quickness of memory and perception befitting a man of the world, he indicated by a movement of the head that he was considering this information.
"Do you know," he said at last, evidently unable to check the sad current of his thoughts, "that Anatole is costing me forty thousand rubles a year? And," he went on after a pause, "what will it be in five years, if he goes on like this?" Presently he added: "That's what we fathers have to put up with.... Is this princess of yours rich?"
"Her father is very rich and stingy. He lives in the country. He is the well-known Prince Bolkonski who had to retire from the army under the late Emperor, and was nicknamed 'the King of Prussia.' He is very clever but eccentric, and a bore. The poor girl is very unhappy. She has a brother; I think you know him, he married Lise Meinen lately. He is an aide-de-camp of Kutuzov's and will be here tonight."
"Listen, dear Annette," said the prince, suddenly taking Anna Pavlovna's hand and for some reason drawing it downwards. "Arrange that affair for me and I shall always be your most devoted slave-slafe with an f, as a village elder of mine writes in his reports. She is rich and of good family and that's all I want."
And with the familiarity and easy grace peculiar to him, he raised the maid of honor's hand to his lips, kissed it, and swung it to and fro as he lay back in his armchair, looking in another direction.
"Attendez," said Anna Pavlovna, reflecting, "I'll speak to Lise, young Bolkonski's wife, this very evening, and perhaps the thing can be arranged. It shall be on your family's behalf that I'll start my apprenticeship as old maid."
Anna Pavlovna's drawing room was gradually filling. The highest Petersburg society was assembled there: people differing widely in age and character but alike in the social circle to which they belonged. Prince Vasili's daughter, the beautiful Helene, came to take her father to the ambassador's entertainment; she wore a ball dress and her badge as maid of honor. The youthful little Princess Bolkonskaya, known as la femme la plus seduisante de Petersbourg, * was also there. She had been married during the previous winter, and being pregnant did not go to any large gatherings, but only to small receptions. Prince Vasili's son, Hippolyte, had come with Mortemart, whom he introduced. The Abbe Morio and many others had also come.
* The most fascinating woman in Petersburg.
To each new arrival Anna Pavlovna said, "You have not yet seen my aunt," or "You do not know my aunt?" and very gravely conducted him or her to a little old lady, wearing large bows of ribbon in her cap, who had come sailing in from another room as soon as the guests began to arrive; and slowly turning her eyes from the visitor to her aunt, Anna Pavlovna mentioned each one's name and then left them.
Each visitor performed the ceremony of greeting this old aunt whom not one of them knew, not one of them wanted to know, and not one of them cared about; Anna Pavlovna observed these greetings with mournful and solemn interest and silent approval. The aunt spoke to each of them in the same words, about their health and her own, and the health of Her Majesty, "who, thank God, was better today." And each visitor, though politeness prevented his showing impatience, left the old woman with a sense of relief at having performed a vexatious duty and did not return to her the whole evening.
The young Princess Bolkonskaya had brought some work in a gold-embroidered velvet bag. Her pretty little upper lip, on which a delicate dark down was just perceptible, was too short for her teeth, but it lifted all the more sweetly, and was especially charming when she occasionally drew it down to meet the lower lip. As is always the case with a thoroughly attractive woman, her defect - the shortness of her upper lip and her half-open mouth - seemed to be her own special and peculiar form of beauty. Everyone brightened at the sight of this pretty young woman, so soon to become a mother, so full of life and health, and carrying her burden so lightly. Old men and dull dispirited young ones who looked at her, after being in her company and talking to her a little while, felt as if they too were becoming, like her, full of life and health. All who talked to her, and at each word saw her bright smile and the constant gleam of her white teeth, thought that they were in a specially amiable mood that day.
The little princess went round the table with quick, short, swaying steps, her workbag on her arm, and gaily spreading out her dress sat down on a sofa near the silver samovar, as if all she was doing was a pleasure to herself and to all around her. "I have brought my work," said she in French, displaying her bag and addressing all present. "Mind, Annette, I hope you have not played a wicked trick on me," she added, turning to her hostess. "You wrote that it was to be quite a small reception, and just see how badly I am dressed." And she spread out her arms to show her short-waisted, lace-trimmed, dainty gray dress, girdled with a broad ribbon just below the breast.
"Soyez tranquille, Lise, you will always be prettier than anyone else," replied Anna Pavlovna.
"You know," said the princess in the same tone of voice and still in French, turning to a general, "my husband is deserting me? He is going to get himself killed. Tell me what this wretched war is for?" she added, addressing Prince Vasili, and without waiting for an answer she turned to speak to his daughter, the beautiful Helene.
"What a delightful woman this little princess is!" said Prince Vasili to Anna Pavlovna.
One of the next arrivals was a stout, heavily built young man with close-cropped hair, spectacles, the light-colored breeches fashionable at that time, a very high ruffle, and a brown dress coat. This stout young man was an illegitimate son of Count Bezukhov, a well-known grandee of Catherine's time who now lay dying in Moscow. The young man had not yet entered either the military or civil service, as he had only just returned from abroad where he had been educated, and this was his first appearance in society. Anna Pavlovna greeted him with the nod she accorded to the lowest hierarchy in her drawing room. But in spite of this lowest-grade greeting, a look of anxiety and fear, as at the sight of something too large and unsuited to the place, came over her face when she saw Pierre enter. Though he was certainly rather bigger than the other men in the room, her anxiety could only have reference to the clever though shy, but observant and natural, expression which distinguished him from everyone else in that drawing room.
"It is very good of you, Monsieur Pierre, to come and visit a poor invalid," said Anna Pavlovna, exchanging an alarmed glance with her aunt as she conducted him to her.
Pierre murmured something unintelligible, and continued to look round as if in search of something. On his way to the aunt he bowed to the little princess with a pleased smile, as to an intimate acquaintance.
Anna Pavlovna's alarm was justified, for Pierre turned away from the aunt without waiting to hear her speech about Her Majesty's health. Anna Pavlovna in dismay detained him with the words: "Do you know the Abbe Morio? He is a most interesting man."
"Yes, I have heard of his scheme for perpetual peace, and it is very interesting but hardly feasible."
"You think so?" rejoined Anna Pavlovna in order to say something and get away to attend to her duties as hostess. But Pierre now committed a reverse act of impoliteness. First he had left a lady before she had finished speaking to him, and now he continued to speak to another who wished to get away. With his head bent, and his big feet spread apart, he began explaining his reasons for thinking the abbe's plan chimerical.
"We will talk of it later," said Anna Pavlovna with a smile.
And having got rid of this young man who did not know how to behave, she resumed her duties as hostess and continued to listen and watch, ready to help at any point where the conversation might happen to flag. As the foreman of a spinning mill, when he has set the hands to work, goes round and notices here a spindle that has stopped or there one that creaks or makes more noise than it should, and hastens to check the machine or set it in proper motion, so Anna Pavlovna moved about her drawing room, approaching now a silent, now a too-noisy group, and by a word or slight rearrangement kept the conversational machine in steady, proper, and regular motion. But amid these cares her anxiety about Pierre was evident. She kept an anxious watch on him when he approached the group round Mortemart to listen to what was being said there, and again when he passed to another group whose center was the abbe.
Pierre had been educated abroad, and this reception at Anna Pavlovna's was the first he had attended in Russia. He knew that all the intellectual lights of Petersburg were gathered there and, like a child in a toyshop, did not know which way to look, afraid of missing any clever conversation that was to be heard. Seeing the self-confident and refined expression on the faces of those present he was always expecting to hear something very profound. At last he came up to Morio. Here the conversation seemed interesting and he stood waiting for an opportunity to express his own views, as young people are fond of doing.
CHAPTER III
Anna Pavlovna's reception was in full swing. The spindles hummed steadily and ceaselessly on all sides. With the exception of the aunt, beside whom sat only one elderly lady, who with her thin careworn face was rather out of place in this brilliant society, the whole company had settled into three groups. One, chiefly masculine, had formed round the abbe. Another, of young people, was grouped round the beautiful Princess Helene, Prince Vasili's daughter, and the little Princess Bolkonskaya, very pretty and rosy, though rather too plump for her age. The third group was gathered round Mortemart and Anna Pavlovna.
The vicomte was a nice-looking young man with soft features and polished manners, who evidently considered himself a celebrity but out of politeness modestly placed himself at the disposal of the circle in which he found himself. Anna Pavlovna was obviously serving him up as a treat to her guests. As a clever maitre d'hotel serves up as a specially choice delicacy a piece of meat that no one who had seen it in the kitchen would have cared to eat, so Anna Pavlovna served up to her guests, first the vicomte and then the abbe, as peculiarly choice morsels. The group about Mortemart immediately began discussing the murder of the Duc d'Enghien. The vicomte said that the Duc d'Enghien had perished by his own magnanimity, and that there were particular reasons for Buonaparte's hatred of him.
"Ah, yes! Do tell us all about it, Vicomte," said Anna Pavlovna, with a pleasant feeling that there was something A la Louis XV in the sound of that sentence: "Contez nous cela, Vicomte."
The vicomte bowed and smiled courteously in token of his willingness to comply. Anna Pavlovna arranged a group round him, inviting everyone to listen to his tale.
"The vicomte knew the duc personally," whispered Anna Pavlovna to one of the guests. "The vicomte is a wonderful raconteur," said she to another. "How evidently he belongs to the best society," said she to a third; and the vicomte was served up to the company in the choicest and most advantageous style, like a well-garnished joint of roast beef on a hot dish.
The vicomte wished to begin his story and gave a subtle smile.
"Come over here, Helene, dear," said Anna Pavlovna to the beautiful young princess who was sitting some way off, the center of another group.
The princess smiled. She rose with the same unchanging smile with which she had first entered the room - the smile of a perfectly beautiful woman. With a slight rustle of her white dress trimmed with moss and ivy, with a gleam of white shoulders, glossy hair, and sparkling diamonds, she passed between the men who made way for her, not looking at any of them but smiling on all, as if graciously allowing each the privilege of admiring her beautiful figure and shapely shoulders, back, and bosom - which in the fashion of those days were very much exposed - and she seemed to bring the glamour of a ballroom with her as she moved toward Anna Pavlovna. Helene was so lovely that not only did she not show any trace of coquetry, but on the contrary she even appeared shy of her unquestionable and all too victorious beauty. She seemed to wish, but to be unable, to diminish its effect.
"How lovely!" said everyone who saw her; and the vicomte lifted his shoulders and dropped his eyes as if startled by something extraordinary when she took her seat opposite and beamed upon him also with her unchanging smile.
"Madame, I doubt my ability before such an audience," said he, smilingly inclining his head.
The princess rested her bare round arm on a little table and considered a reply unnecessary. She smilingly waited. All the time the story was being told she sat upright, glancing now at her beautiful round arm, altered in shape by its pressure on the table, now at her still more beautiful bosom, on which she readjusted a diamond necklace. From time to time she smoothed the folds of her dress, and whenever the story produced an effect she glanced at Anna Pavlovna, at once adopted just the expression she saw on the maid of honor's face, and again relapsed into her radiant smile.
The little princess had also left the tea table and followed Helene.
"Wait a moment, I'll get my work.... Now then, what are you thinking of?" she went on, turning to Prince Hippolyte. "Fetch me my workbag."
There was a general movement as the princess, smiling and talking merrily to everyone at once, sat down and gaily arranged herself in her seat.
"Now I am all right," she said, and asking the vicomte to begin, she took up her work.
Prince Hippolyte, having brought the workbag, joined the circle and moving a chair close to hers seated himself beside her.
Le charmant Hippolyte was surprising by his extraordinary resemblance to his beautiful sister, but yet more by the fact that in spite of this resemblance he was exceedingly ugly. His features were like his sister's, but while in her case everything was lit up by a joyous, self-satisfied, youthful, and constant smile of animation, and by the wonderful classic beauty of her figure, his face on the contrary was dulled by imbecility and a constant expression of sullen self-confidence, while his body was thin and weak. His eyes, nose, and mouth all seemed puckered into a vacant, wearied grimace, and his arms and legs always fell into unnatural positions.
"It's not going to be a ghost story?" said he, sitting down beside the princess and hastily adjusting his lorgnette, as if without this instrument he could not begin to speak.
"Why no, my dear fellow," said the astonished narrator, shrugging his shoulders.
"Because I hate ghost stories," said Prince Hippolyte in a tone which showed that he only understood the meaning of his words after he had uttered them.
He spoke with such self-confidence that his hearers could not be sure whether what he said was very witty or very stupid. He was dressed in a dark-green dress coat, knee breeches of the color of cuisse de nymphe effrayee, as he called it, shoes, and silk stockings.
The vicomte told his tale very neatly. It was an anecdote, then current, to the effect that the Duc d'Enghien had gone secretly to Paris to visit Mademoiselle George; that at her house he came upon Bonaparte, who also enjoyed the famous actress' favors, and that in his presence Napoleon happened to fall into one of the fainting fits to which he was subject, and was thus at the duc's mercy. The latter spared him, and this magnanimity Bonaparte subsequently repaid by death.
The story was very pretty and interesting, especially at the point where the rivals suddenly recognized one another; and the ladies looked agitated.
"Charming!" said Anna Pavlovna with an inquiring glance at the little princess.
"Charming!" whispered the little princess, sticking the needle into her work as if to testify that the interest and fascination of the story prevented her from going on with it.
The vicomte appreciated this silent praise and smiling gratefully prepared to continue, but just then Anna Pavlovna, who had kept a watchful eye on the young man who so alarmed her, noticed that he was talking too loudly and vehemently with the abbe, so she hurried to the rescue. Pierre had managed to start a conversation with the abbe about the balance of power, and the latter, evidently interested by the young man's simple-minded eagerness, was explaining his pet theory. Both were talking and listening too eagerly and too naturally, which was why Anna Pavlovna disapproved.
"The means are ... the balance of power in Europe and the rights of the people," the abbe was saying. "It is only necessary for one powerful nation like Russia - barbaric as she is said to be - to place herself disinterestedly at the head of an alliance having for its object the maintenance of the balance of power of Europe, and it would save the world!"
"But how are you to get that balance?" Pierre was beginning.
At that moment Anna Pavlovna came up and, looking severely at Pierre, asked the Italian how he stood Russian climate. The Italian's face instantly changed and assumed an offensively affected, sugary expression, evidently habitual to him when conversing with women.
"I am so enchanted by the brilliancy of the wit and culture of the society, more especially of the feminine society, in which I have had the honor of being received, that I have not yet had time to think of the climate," said he.
Not letting the abbe and Pierre escape, Anna Pavlovna, the more conveniently to keep them under observation, brought them into the larger circle.

View File

@@ -0,0 +1,31 @@
import requests
import json
import random
model = "llama2"
template = {
"firstName": "",
"lastName": "",
"address": {
"street": "",
"city": "",
"state": "",
"zipCode": ""
},
"phoneNumber": ""
}
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and phone number. \nUse the following template: {json.dumps(template)}."
data = {
"prompt": prompt,
"model": model,
"format": "json",
"stream": False,
"options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
}
print(f"Generating a sample user")
response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
json_data = json.loads(response.text)
print(json.dumps(json.loads(json_data["response"]), indent=2))

View File

@@ -0,0 +1,31 @@
import requests
import json
import random
countries = [
"United States",
"United Kingdom",
"the Netherlands",
"Germany",
"Mexico",
"Canada",
"France",
]
country = random.choice(countries)
model = "llama2"
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
data = {
"prompt": prompt,
"model": model,
"format": "json",
"stream": False,
"options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
}
print(f"Generating a sample user in {country}")
response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
json_data = json.loads(response.text)
print(json.dumps(json.loads(json_data["response"]), indent=2))

View File

@@ -0,0 +1,34 @@
# JSON Output Example
![llmjson 2023-11-10 15_31_31](https://github.com/jmorganca/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25)
There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in.
## Review the Code
Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body.
```python
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters."
data = {
"prompt": prompt,
"model": model,
"format": "json",
"stream": False,
"options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
}
```
When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country.
In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with.
Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read.
```python
response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
json_data = json.loads(response.text)
print(json.dumps(json.loads(json_data["response"]), indent=2))
```

View File

@@ -0,0 +1 @@
Requests==2.31.0

View File

@@ -0,0 +1,8 @@
FROM codebooga:latest
SYSTEM """
You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
"""
PARAMETER TEMPERATURE 0.3

View File

@@ -0,0 +1,42 @@
import sys
import re
import requests
import json
# prelines and postlines represent the number of lines of context to include in the output around the error
prelines = 10
postlines = 10
def find_errors_in_log_file():
if len(sys.argv) < 2:
print("Usage: python loganalysis.py <filename>")
return
log_file_path = sys.argv[1]
with open(log_file_path, 'r') as log_file:
log_lines = log_file.readlines()
error_logs = []
for i, line in enumerate(log_lines):
if "error" in line.lower():
start_index = max(0, i - prelines)
end_index = min(len(log_lines), i + postlines + 1)
error_logs.extend(log_lines[start_index:end_index])
return error_logs
error_logs = find_errors_in_log_file()
data = {
"prompt": "\n".join(error_logs),
"model": "mattw/loganalyzer"
}
response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
for line in response.iter_lines():
if line:
json_data = json.loads(line)
if json_data['done'] == False:
print(json_data['response'], end='', flush=True)

View File

@@ -0,0 +1,32 @@
2023-11-10 07:17:40 /docker-entrypoint.sh: /docker-entrypoint.d/ is not empty, will attempt to perform configuration
2023-11-10 07:17:40 /docker-entrypoint.sh: Looking for shell scripts in /docker-entrypoint.d/
2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/10-listen-on-ipv6-by-default.sh
2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Getting the checksum of /etc/nginx/conf.d/default.conf
2023-11-10 07:17:40 10-listen-on-ipv6-by-default.sh: info: Enabled listen on IPv6 in /etc/nginx/conf.d/default.conf
2023-11-10 07:17:40 /docker-entrypoint.sh: Sourcing /docker-entrypoint.d/15-local-resolvers.envsh
2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/20-envsubst-on-templates.sh
2023-11-10 07:17:40 /docker-entrypoint.sh: Launching /docker-entrypoint.d/30-tune-worker-processes.sh
2023-11-10 07:17:40 /docker-entrypoint.sh: Configuration complete; ready for start up
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: using the "epoll" event method
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: nginx/1.25.3
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: built by gcc 12.2.0 (Debian 12.2.0-14)
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: OS: Linux 6.4.16-linuxkit
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: getrlimit(RLIMIT_NOFILE): 1048576:1048576
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker processes
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 29
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 30
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 31
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 32
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 33
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 34
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 35
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 36
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 37
2023-11-10 07:17:40 2023/11/10 13:17:40 [notice] 1#1: start worker process 38
2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:43 +0000] "GET / HTTP/1.1" 200 615 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
2023-11-10 07:17:44 2023/11/10 13:17:44 [error] 29#29: *1 open() "/usr/share/nginx/html/favicon.ico" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /favicon.ico HTTP/1.1", host: "localhost:8080", referrer: "http://localhost:8080/"
2023-11-10 07:17:44 192.168.65.1 - - [10/Nov/2023:13:17:44 +0000] "GET /favicon.ico HTTP/1.1" 404 555 "http://localhost:8080/" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
2023-11-10 07:17:50 2023/11/10 13:17:50 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080"
2023-11-10 07:17:50 192.168.65.1 - - [10/Nov/2023:13:17:50 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"
2023-11-10 07:18:53 2023/11/10 13:18:53 [error] 29#29: *1 open() "/usr/share/nginx/html/ahstat" failed (2: No such file or directory), client: 192.168.65.1, server: localhost, request: "GET /ahstat HTTP/1.1", host: "localhost:8080"
2023-11-10 07:18:53 192.168.65.1 - - [10/Nov/2023:13:18:53 +0000] "GET /ahstat HTTP/1.1" 404 555 "-" "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36" "-"

View File

@@ -0,0 +1,48 @@
# Log Analysis example
![loganalyzer 2023-11-10 08_53_29](https://github.com/jmorganca/ollama/assets/633681/ad30f1fc-321f-4953-8914-e30e24db9921)
This example shows one possible way to create a log file analyzer. To use it, run:
`python loganalysis.py <logfile>`
You can try this with the `logtest.logfile` file included in this directory.
## Review the code
The first part of this example is a Modelfile that takes `codebooga` and applies a new System Prompt:
```plaintext
SYSTEM """
You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
"""
```
This model is available at https://ollama.ai/mattw/loganalyzer. You can customize it and add to your own namespace using the command `ollama create <namespace/modelname> -f <path-to-modelfile>` then `ollama push <namespace/modelname>`.
Then loganalysis.py scans all the lines in the given log file and searches for the word 'error'. When the word is found, the 10 lines before and after are set as the prompt for a call to the Generate API.
```python
data = {
"prompt": "\n".join(error_logs),
"model": "mattw/loganalyzer"
}
```
Finally, the streamed output is parsed and the response field in the output is printed to the line.
```python
response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
for line in response.iter_lines():
if line:
json_data = json.loads(line)
if json_data['done'] == False:
print(json_data['response'], end='')
```
## Next Steps
There is a lot more that can be done here. This is a simple way to detect errors, looking for the word error. Perhaps it would be interesting to find anomalous activity in the logs. It could be interesting to create embeddings for each line and compare them, looking for similar lines. Or look into applying Levenshtein Distance algorithms to find similar lines to help identify the anomalous lines.
Also try different models and different prompts to analyze the data. You could consider adding retrieval augmented generation (RAG) to this to help understand newer log formats.

View File

@@ -0,0 +1 @@
Requests==2.31.0

View File

@@ -0,0 +1,22 @@
# News Summarizer
This example goes through a series of steps:
1. You choose a topic area (e.g., "news", "NVidia", "music", etc.).
2. Gets the most recent articles on that topic from various sources.
3. Uses Ollama to summarize each article.
4. Creates chunks of sentences from each article.
5. Uses Sentence Transformers to generate embeddings for each of those chunks.
6. You enter a question regarding the summaries shown.
7. Uses Sentence Transformers to generate an embedding for that question.
8. Uses the embedded question to find the most similar chunks.
9. Feeds all that to Ollama to generate a good answer to your question based on these news articles.
This example lets you pick from a few different topic areas, then summarize the most recent x articles for that topic. It then creates chunks of sentences from each article and then generates embeddings for each of those chunks.
You can run the example like this:
```bash
pip install -r requirements.txt
python summ.py
```

View File

@@ -0,0 +1,9 @@
beautifulsoup4==4.12.2
feedparser==6.0.10
mattsollamatools==0.0.8
newspaper3k==0.2.8
nltk==3.8.1
numpy==1.24.3
Requests==2.31.0
scikit_learn==1.3.0
sentence_transformers==2.2.2

View File

@@ -0,0 +1,86 @@
import curses
import json
from utils import get_url_for_topic, topic_urls, menu, getUrls, get_summary, getArticleText, knn_search
import requests
from sentence_transformers import SentenceTransformer
from mattsollamatools import chunker
if __name__ == "__main__":
chosen_topic = curses.wrapper(menu)
print("Here is your news summary:\n")
urls = getUrls(chosen_topic, n=5)
model = SentenceTransformer('all-MiniLM-L6-v2')
allEmbeddings = []
for url in urls:
article={}
article['embeddings'] = []
article['url'] = url
text = getArticleText(url)
summary = get_summary(text)
chunks = chunker(text) # Use the chunk_text function from web_utils
embeddings = model.encode(chunks)
for (chunk, embedding) in zip(chunks, embeddings):
item = {}
item['source'] = chunk
item['embedding'] = embedding.tolist() # Convert NumPy array to list
item['sourcelength'] = len(chunk)
article['embeddings'].append(item)
allEmbeddings.append(article)
print(f"{summary}\n")
while True:
context = []
# Input a question from the user
question = input("Enter your question about the news, or type quit: ")
if question.lower() == 'quit':
break
# Embed the user's question
question_embedding = model.encode([question])
# Perform KNN search to find the best matches (indices and source text)
best_matches = knn_search(question_embedding, allEmbeddings, k=10)
sourcetext=""
for i, (index, source_text) in enumerate(best_matches, start=1):
sourcetext += f"{i}. Index: {index}, Source Text: {source_text}"
systemPrompt = f"Only use the following information to answer the question. Do not use anything else: {sourcetext}"
url = "http://localhost:11434/api/generate"
payload = {
"model": "mistral-openorca",
"prompt": question,
"system": systemPrompt,
"stream": False,
"context": context
}
# Convert the payload to a JSON string
payload_json = json.dumps(payload)
# Set the headers to specify JSON content
headers = {
"Content-Type": "application/json"
}
# Send the POST request
response = requests.post(url, data=payload_json, headers=headers)
# Check the response
if response.status_code == 200:
output = json.loads(response.text)
context = output['context']
print(output['response']+ "\n")
else:
print(f"Request failed with status code {response.status_code}")

View File

@@ -0,0 +1,108 @@
import curses
import feedparser
import requests
import unicodedata
import json
from newspaper import Article
from bs4 import BeautifulSoup
from nltk.tokenize import sent_tokenize, word_tokenize
import numpy as np
from sklearn.neighbors import NearestNeighbors
from mattsollamatools import chunker
# Create a dictionary to store topics and their URLs
topic_urls = {
"Mac": "https://9to5mac.com/guides/mac/feed",
"News": "http://www.npr.org/rss/rss.php?id=1001",
"Nvidia": "https://nvidianews.nvidia.com/releases.xml",
"Raspberry Pi": "https://www.raspberrypi.com/news/feed/",
"Music": "https://www.billboard.com/c/music/music-news/feed/"
}
# Use curses to create a menu of topics
def menu(stdscr):
chosen_topic = get_url_for_topic(stdscr)
url = topic_urls[chosen_topic] if chosen_topic in topic_urls else "Topic not found"
stdscr.addstr(len(topic_urls) + 3, 0, f"Selected URL for {chosen_topic}: {url}")
stdscr.refresh()
return chosen_topic
# You have chosen a topic. Now return the url for that topic
def get_url_for_topic(stdscr):
curses.curs_set(0) # Hide the cursor
stdscr.clear()
stdscr.addstr(0, 0, "Choose a topic using the arrow keys (Press Enter to select):")
# Create a list of topics
topics = list(topic_urls.keys())
current_topic = 0
while True:
for i, topic in enumerate(topics):
if i == current_topic:
stdscr.addstr(i + 2, 2, f"> {topic}")
else:
stdscr.addstr(i + 2, 2, f" {topic}")
stdscr.refresh()
key = stdscr.getch()
if key == curses.KEY_DOWN and current_topic < len(topics) - 1:
current_topic += 1
elif key == curses.KEY_UP and current_topic > 0:
current_topic -= 1
elif key == 10: # Enter key
return topic_urls[topics[current_topic]]
# Get the last N URLs from an RSS feed
def getUrls(feed_url, n=20):
feed = feedparser.parse(feed_url)
entries = feed.entries[-n:]
urls = [entry.link for entry in entries]
return urls
# Often there are a bunch of ads and menus on pages for a news article. This uses newspaper3k to get just the text of just the article.
def getArticleText(url):
article = Article(url)
article.download()
article.parse()
return article.text
def get_summary(text):
systemPrompt = "Write a concise summary of the text, return your responses with 5 lines that cover the key points of the text given."
prompt = text
url = "http://localhost:11434/api/generate"
payload = {
"model": "mistral-openorca",
"prompt": prompt,
"system": systemPrompt,
"stream": False
}
payload_json = json.dumps(payload)
headers = {"Content-Type": "application/json"}
response = requests.post(url, data=payload_json, headers=headers)
return json.loads(response.text)["response"]
# Perform K-nearest neighbors (KNN) search
def knn_search(question_embedding, embeddings, k=5):
X = np.array([item['embedding'] for article in embeddings for item in article['embeddings']])
source_texts = [item['source'] for article in embeddings for item in article['embeddings']]
# Fit a KNN model on the embeddings
knn = NearestNeighbors(n_neighbors=k, metric='cosine')
knn.fit(X)
# Find the indices and distances of the k-nearest neighbors
distances, indices = knn.kneighbors(question_embedding, n_neighbors=k)
# Get the indices and source texts of the best matches
best_matches = [(indices[0][i], source_texts[indices[0][i]]) for i in range(k)]
return best_matches

View File

@@ -17,7 +17,7 @@ def generate(prompt, context):
for line in r.iter_lines():
body = json.loads(line)
response_part = body.get('response', '')
# the response streams one token at a time, print that as we recieve it
# the response streams one token at a time, print that as we receive it
print(response_part, end='', flush=True)
if 'error' in body:
@@ -35,4 +35,4 @@ def main():
print()
if __name__ == "__main__":
main()
main()

View File

@@ -0,0 +1,118 @@
import { Ollama } from "ollama-node";
import { readFile } from "fs/promises";
// function to be called on events
function reportEvents(name: string, date: string, location: string) {
const nameString = name ? `${name}` : `an event`;
const dateString = date ? ` on ${date}` : ``;
const locationString = location ? ` at ${location}` : ``;
console.log(`You have an event: ${nameString}${dateString}${locationString}`)
}
// function to be called on addresses
function reportAddresses(address) {
for (const field in address) {
if (address[field]) {
if (field === "city") {
const city = address.city;
const state = address.state ? `, ${address.state}` : '';
const zip = address.zip ? ` ${address.zip}` : '';
console.log(`${city}${state}${zip}`);
break;
} else {
console.log(`${address[field]}`);
}
}
}
console.log(``);
}
async function main() {
const ollama = new Ollama();
const systemprompt = `You will be given a text along with a prompt and a schema. You will have to extract the information requested in the prompt from the text and generate output in JSON observing the schema provided. If the schema shows a type of integer or number, you must only show a integer for that field. A string should always be a valid string. If a value is unknown, leave it empty. Output the JSON with extra spaces to ensure that it pretty prints.`
const schema = {
"eventsQuantity": {
"type": "integer",
"description": "The number of events in the source text"
},
"addressesQuantity": {
"type": "integer",
"description": "The number of addresses in the source text"
},
"events": [{
name: {
"type": "string",
description: "Name of the event"
},
"date": {
"type": "string",
"description": "Date of the event"
},
"location": {
"type": "string",
"description": "Location of the event"
},
"extraInfo": {
"type": "string",
"description": "Any extra information that is provided about the event."
}
}],
"people": [{
"name": {
"type": "string",
"description": "Name of the person"
},
"company": {
"type": "string",
"description": "Name of the company where they work"
},
"street": {
"type": "string",
"description": "Street address of the person or company. This is only the street name and the numerical address. Do not include city, state, or zip of the address in this field."
},
"city": {
"type": "string",
"description": "City portion of the address of the person or company"
},
"state": {
"type": "string",
"description": "State portion of the address of the person or company"
},
"zip": {
"type": "string",
"description": "Zip code of the person or company"
},
"extraInfo": {
"type": "string",
"description": "Any extra information that is provided about the location."
}
}]
}
const textcontent = await readFile("./info.txt", "utf-8").then((text) => text.split(" ").slice(0, 2000).join(" "));
const prompt = `The source text is a series of emails that have been put into a single file. They are separated by three dashes. Review the source text and determine the full address of the person sending each of the emails as well as any events that we need to track. If they provide a company address use that. If any extra info is provided, such as a description of the place, or a floor, add it to extraInfo. The first field in the address JSON is quantity of events and should be set to the number of events tracked and the second field should be set to the number of addresses tracked in the file. Don't stuff an event into the output that isn't an event. Only add data to the mostly appropriate field. Don't make up fields that aren't in the schema. If there isn't a value for a field, use null. Output should be in JSON.\n\nSchema: \n${JSON.stringify(schema, null, 2)}\n\nSource Text:\n${textcontent}`
await ollama.setModel("neural-chat");
ollama.setSystemPrompt(systemprompt);
ollama.setJSONFormat(true);
const data = await ollama.generate(prompt);
const output = JSON.parse(data.output);
const events = output.events;
const addresses = output.people;
console.log(`Here are your ${output.eventsQuantity} events:`);
for (const event of events) {
reportEvents(event.name, event.date, event.location);
}
console.log(`\n\nHere are your ${output.addressesQuantity} addresses:`);
for (const address of addresses) {
reportAddresses(address);
}
}
main();

View File

@@ -0,0 +1,38 @@
import { Ollama } from "ollama-node";
import { readFile } from "fs/promises";
async function main() {
const ollama = new Ollama();
// Set the system prompt to prepare the model to receive a prompt and a schema and set some rules for the output.
const systemprompt = `You will be given a text along with a prompt and a schema. You will have to extract the information requested in the prompt from the text and generate output in JSON observing the schema provided. If the schema shows a type of integer or number, you must only show a integer for that field. A string should always be a valid string. If a value is unknown, leave it empty. Output the JSON with extra spaces to ensure that it pretty prints.`
const schema = {
"people": [{
"name": {
"type": "string",
"description": "Name of the person"
},
"title": {
"type": "string",
"description": "Title of the person"
}
}],
}
// Depending on the model chosen, you may be limited by the size of the context window, so limit the context to 2000 words.
const textcontent = await readFile("./wp.txt", "utf-8").then((text) => text.split(" ").slice(0, 2000).join(" "));
// Specific instructions for this task
const prompt = `Review the source text and determine the 10 most important people to focus on. Then extract the name and title for those people. Output should be in JSON.\n\nSchema: \n${JSON.stringify(schema, null, 2)}\n\nSource Text:\n${textcontent}`
await ollama.setModel("neural-chat");
ollama.setSystemPrompt(systemprompt);
// setJSONFormat is the equivalent of setting 'format: json' in the API
ollama.setJSONFormat(true);
await ollama.streamingGenerate(prompt, (word) => { process.stdout.write(word) })
}
main();

View File

@@ -0,0 +1,17 @@
---
Hi matt,
thanks for letting me know that you are going to come today, November 16, for my tea party. My address is 123 Falk St on Bainbridge Island. I live in the house with the red door. I will be home all day so just come by whenever you want.
Fred
---
Great, send the check to our office at 1917 1st St, Seattle, WA 98101. I will let you know when we receive it.
Mark Richardson
Big Corp
---
We are looking forward to seeing you at our Local AI Meetup. It will be held on December 3. It will be at the offices of Enormous Co. Our address is 344 1st Ave, Seattle, WA 98101. We will be meeting in the conference room on the 3rd floor.
Barbara Reilly
Enormous Co.

View File

@@ -0,0 +1,519 @@
{
"name": "typescript-functioncalling",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"dependencies": {
"ollama-node": "^0.1.27"
},
"devDependencies": {
"tsx": "^4.1.2",
"typescript": "^5.2.2"
}
},
"node_modules/@esbuild/android-arm": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.18.20.tgz",
"integrity": "sha512-fyi7TDI/ijKKNZTUJAQqiG5T7YjJXgnzkURqmGj13C6dCqckZBLdl4h7bkhHt/t0WP+zO9/zwroDvANaOqO5Sw==",
"cpu": [
"arm"
],
"dev": true,
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/android-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.18.20.tgz",
"integrity": "sha512-Nz4rJcchGDtENV0eMKUNa6L12zz2zBDXuhj/Vjh18zGqB44Bi7MBMSXjgunJgjRhCmKOjnPuZp4Mb6OKqtMHLQ==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/android-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.18.20.tgz",
"integrity": "sha512-8GDdlePJA8D6zlZYJV/jnrRAi6rOiNaCC/JclcXpB+KIuvfBN4owLtgzY2bsxnx666XjJx2kDPUmnTtR8qKQUg==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"android"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/darwin-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.18.20.tgz",
"integrity": "sha512-bxRHW5kHU38zS2lPTPOyuyTm+S+eobPUnTNkdJEfAddYgEcll4xkT8DB9d2008DtTbl7uJag2HuE5NZAZgnNEA==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/darwin-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.18.20.tgz",
"integrity": "sha512-pc5gxlMDxzm513qPGbCbDukOdsGtKhfxD1zJKXjCCcU7ju50O7MeAZ8c4krSJcOIJGFR+qx21yMMVYwiQvyTyQ==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/freebsd-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.18.20.tgz",
"integrity": "sha512-yqDQHy4QHevpMAaxhhIwYPMv1NECwOvIpGCZkECn8w2WFHXjEwrBn3CeNIYsibZ/iZEUemj++M26W3cNR5h+Tw==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/freebsd-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.18.20.tgz",
"integrity": "sha512-tgWRPPuQsd3RmBZwarGVHZQvtzfEBOreNuxEMKFcd5DaDn2PbBxfwLcj4+aenoh7ctXcbXmOQIn8HI6mCSw5MQ==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"freebsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-arm": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.18.20.tgz",
"integrity": "sha512-/5bHkMWnq1EgKr1V+Ybz3s1hWXok7mDFUMQ4cG10AfW3wL02PSZi5kFpYKrptDsgb2WAJIvRcDm+qIvXf/apvg==",
"cpu": [
"arm"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.18.20.tgz",
"integrity": "sha512-2YbscF+UL7SQAVIpnWvYwM+3LskyDmPhe31pE7/aoTMFKKzIc9lLbyGUpmmb8a8AixOL61sQ/mFh3jEjHYFvdA==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-ia32": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.18.20.tgz",
"integrity": "sha512-P4etWwq6IsReT0E1KHU40bOnzMHoH73aXp96Fs8TIT6z9Hu8G6+0SHSw9i2isWrD2nbx2qo5yUqACgdfVGx7TA==",
"cpu": [
"ia32"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-loong64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.18.20.tgz",
"integrity": "sha512-nXW8nqBTrOpDLPgPY9uV+/1DjxoQ7DoB2N8eocyq8I9XuqJ7BiAMDMf9n1xZM9TgW0J8zrquIb/A7s3BJv7rjg==",
"cpu": [
"loong64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-mips64el": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.18.20.tgz",
"integrity": "sha512-d5NeaXZcHp8PzYy5VnXV3VSd2D328Zb+9dEq5HE6bw6+N86JVPExrA6O68OPwobntbNJ0pzCpUFZTo3w0GyetQ==",
"cpu": [
"mips64el"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-ppc64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.18.20.tgz",
"integrity": "sha512-WHPyeScRNcmANnLQkq6AfyXRFr5D6N2sKgkFo2FqguP44Nw2eyDlbTdZwd9GYk98DZG9QItIiTlFLHJHjxP3FA==",
"cpu": [
"ppc64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-riscv64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.18.20.tgz",
"integrity": "sha512-WSxo6h5ecI5XH34KC7w5veNnKkju3zBRLEQNY7mv5mtBmrP/MjNBCAlsM2u5hDBlS3NGcTQpoBvRzqBcRtpq1A==",
"cpu": [
"riscv64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-s390x": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.18.20.tgz",
"integrity": "sha512-+8231GMs3mAEth6Ja1iK0a1sQ3ohfcpzpRLH8uuc5/KVDFneH6jtAJLFGafpzpMRO6DzJ6AvXKze9LfFMrIHVQ==",
"cpu": [
"s390x"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/linux-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.18.20.tgz",
"integrity": "sha512-UYqiqemphJcNsFEskc73jQ7B9jgwjWrSayxawS6UVFZGWrAAtkzjxSqnoclCXxWtfwLdzU+vTpcNYhpn43uP1w==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"linux"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/netbsd-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.18.20.tgz",
"integrity": "sha512-iO1c++VP6xUBUmltHZoMtCUdPlnPGdBom6IrO4gyKPFFVBKioIImVooR5I83nTew5UOYrk3gIJhbZh8X44y06A==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"netbsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/openbsd-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.18.20.tgz",
"integrity": "sha512-e5e4YSsuQfX4cxcygw/UCPIEP6wbIL+se3sxPdCiMbFLBWu0eiZOJ7WoD+ptCLrmjZBK1Wk7I6D/I3NglUGOxg==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"openbsd"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/sunos-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.18.20.tgz",
"integrity": "sha512-kDbFRFp0YpTQVVrqUd5FTYmWo45zGaXe0X8E1G/LKFC0v8x0vWrhOWSLITcCn63lmZIxfOMXtCfti/RxN/0wnQ==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"sunos"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/win32-arm64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.18.20.tgz",
"integrity": "sha512-ddYFR6ItYgoaq4v4JmQQaAI5s7npztfV4Ag6NrhiaW0RrnOXqBkgwZLofVTlq1daVTQNhtI5oieTvkRPfZrePg==",
"cpu": [
"arm64"
],
"dev": true,
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/win32-ia32": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.18.20.tgz",
"integrity": "sha512-Wv7QBi3ID/rROT08SABTS7eV4hX26sVduqDOTe1MvGMjNd3EjOz4b7zeexIR62GTIEKrfJXKL9LFxTYgkyeu7g==",
"cpu": [
"ia32"
],
"dev": true,
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@esbuild/win32-x64": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.18.20.tgz",
"integrity": "sha512-kTdfRcSiDfQca/y9QIkng02avJ+NCaQvrMejlsB3RRv5sE9rRoeBPISaZpKxHELzRxZyLvNts1P27W3wV+8geQ==",
"cpu": [
"x64"
],
"dev": true,
"optional": true,
"os": [
"win32"
],
"engines": {
"node": ">=12"
}
},
"node_modules/@types/node": {
"version": "20.9.0",
"resolved": "https://registry.npmjs.org/@types/node/-/node-20.9.0.tgz",
"integrity": "sha512-nekiGu2NDb1BcVofVcEKMIwzlx4NjHlcjhoxxKBNLtz15Y1z7MYf549DFvkHSId02Ax6kGwWntIBPC3l/JZcmw==",
"dependencies": {
"undici-types": "~5.26.4"
}
},
"node_modules/buffer-from": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz",
"integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==",
"dev": true
},
"node_modules/esbuild": {
"version": "0.18.20",
"resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.18.20.tgz",
"integrity": "sha512-ceqxoedUrcayh7Y7ZX6NdbbDzGROiyVBgC4PriJThBKSVPWnnFHZAkfI1lJT8QFkOwH4qOS2SJkS4wvpGl8BpA==",
"dev": true,
"hasInstallScript": true,
"bin": {
"esbuild": "bin/esbuild"
},
"engines": {
"node": ">=12"
},
"optionalDependencies": {
"@esbuild/android-arm": "0.18.20",
"@esbuild/android-arm64": "0.18.20",
"@esbuild/android-x64": "0.18.20",
"@esbuild/darwin-arm64": "0.18.20",
"@esbuild/darwin-x64": "0.18.20",
"@esbuild/freebsd-arm64": "0.18.20",
"@esbuild/freebsd-x64": "0.18.20",
"@esbuild/linux-arm": "0.18.20",
"@esbuild/linux-arm64": "0.18.20",
"@esbuild/linux-ia32": "0.18.20",
"@esbuild/linux-loong64": "0.18.20",
"@esbuild/linux-mips64el": "0.18.20",
"@esbuild/linux-ppc64": "0.18.20",
"@esbuild/linux-riscv64": "0.18.20",
"@esbuild/linux-s390x": "0.18.20",
"@esbuild/linux-x64": "0.18.20",
"@esbuild/netbsd-x64": "0.18.20",
"@esbuild/openbsd-x64": "0.18.20",
"@esbuild/sunos-x64": "0.18.20",
"@esbuild/win32-arm64": "0.18.20",
"@esbuild/win32-ia32": "0.18.20",
"@esbuild/win32-x64": "0.18.20"
}
},
"node_modules/fsevents": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
"integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
"dev": true,
"hasInstallScript": true,
"optional": true,
"os": [
"darwin"
],
"engines": {
"node": "^8.16.0 || ^10.6.0 || >=11.0.0"
}
},
"node_modules/get-tsconfig": {
"version": "4.7.2",
"resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.2.tgz",
"integrity": "sha512-wuMsz4leaj5hbGgg4IvDU0bqJagpftG5l5cXIAvo8uZrqn0NJqwtfupTN00VnkQJPcIRrxYrm1Ue24btpCha2A==",
"dev": true,
"dependencies": {
"resolve-pkg-maps": "^1.0.0"
},
"funding": {
"url": "https://github.com/privatenumber/get-tsconfig?sponsor=1"
}
},
"node_modules/ollama-node": {
"version": "0.1.27",
"resolved": "https://registry.npmjs.org/ollama-node/-/ollama-node-0.1.27.tgz",
"integrity": "sha512-tFABPf5P0sXCR5USA31E3tqbge5h/4uf/t5j8/rPvHDo0SDwXeN0kah2J7hIqqkYlO1vLRs0uLC1/Mprgv9t2g==",
"dependencies": {
"@types/node": "^20.8.4"
}
},
"node_modules/resolve-pkg-maps": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
"integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
"dev": true,
"funding": {
"url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
}
},
"node_modules/source-map": {
"version": "0.6.1",
"resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz",
"integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==",
"dev": true,
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/source-map-support": {
"version": "0.5.21",
"resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz",
"integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==",
"dev": true,
"dependencies": {
"buffer-from": "^1.0.0",
"source-map": "^0.6.0"
}
},
"node_modules/tsx": {
"version": "4.1.2",
"resolved": "https://registry.npmjs.org/tsx/-/tsx-4.1.2.tgz",
"integrity": "sha512-1spM1bFV6MP2s4tO4tDC7g52fsaFdtEWdO4GfGdqi20qUgPbnAJqixOyIAvCSx1DDj3YIUB4CD06owTWUsOAuQ==",
"dev": true,
"dependencies": {
"esbuild": "~0.18.20",
"get-tsconfig": "^4.7.2",
"source-map-support": "^0.5.21"
},
"bin": {
"tsx": "dist/cli.mjs"
},
"engines": {
"node": ">=18.0.0"
},
"optionalDependencies": {
"fsevents": "~2.3.3"
}
},
"node_modules/typescript": {
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.2.2.tgz",
"integrity": "sha512-mI4WrpHsbCIcwT9cF4FZvr80QUeKvsUsUvKDoR+X/7XHQH98xYD8YHZg7ANtz2GtZt/CBq2QJ0thkGJMHfqc1w==",
"dev": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
},
"node_modules/undici-types": {
"version": "5.26.5",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
"integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA=="
}
}
}

View File

@@ -0,0 +1,9 @@
{
"dependencies": {
"ollama-node": "^0.1.27"
},
"devDependencies": {
"tsx": "^4.1.2",
"typescript": "^5.2.2"
}
}

View File

@@ -0,0 +1,28 @@
# Function calling
![function calling 2023-11-16 16_12_58](https://github.com/jmorganca/ollama/assets/633681/a0acc247-9746-45ab-b325-b65dfbbee4fb)
One of the features added to some models is 'function calling'. It's a bit of a confusing name. It's understandable if you think that means the model can call functions, but that's not what it means. Function calling simply means that the output of the model is formatted in JSON, using a preconfigured schema, and uses the expected types. Then your code can use the output of the model and call functions with it. Using the JSON format in Ollama, you can use any model for function calling.
The two examples provided can extract information out of the provided texts. The first example uses the first couple of chapters from War and Peace by Lev Nikolayevich Tolstoy, and extracts the names and titles of the characters introduced in the story. The second example uses a more complicated schema to pull out addresses and event information from a series of emails.
## Running the examples
1. Clone this repo and navigate to the `examples/typescript-functioncalling` directory.
2. Install the dependencies with `npm install`.
3. Review the `wp.txt` file.
4. Run `tsx extractwp.ts`.
5. Review the `info.txt` file.
6. Run `tsx extractemail.ts`.
## Review the Code
Both examples do roughly the same thing with different source material. They both use the same system prompt, which tells the model to expect some instructions and a schema. Then we inject the schema into the prompt and generate an answer.
The first example, `extractwp.ts`, outputs the resulting JSON to the console, listing the characters introduced at the start of War and Peace. The second example, `extractemail.ts`, is a bit more complicated, extracting two different types of information: addresses and events. It outputs the results to a JSON blob, then the addresses are handed off to one function called `reportAddresses` and the events are handed off to another function called `reportEvents`.
Notice that both examples are using the model from Intel called `neural-chat`. This is not a model tuned for function calling, yet it performs very well at this task.
## Next Steps
Try exporting some of your real emails to the input file and seeing how well the model does. Try pointing the first example at other books. You could even have it cycle through all the sections and maybe add up the number of times any character is seen throughout the book, determining the most important characters. You can also try out different models.

View File

@@ -0,0 +1,183 @@
"Well, Prince, so Genoa and Lucca are now just family estates of the Buonapartes. But I warn you, if you don't tell me that this means war, if you still try to defend the infamies and horrors perpetrated by that Antichrist - I really believe he is Antichrist - I will have nothing more to do with you and you are no longer my friend, no longer my 'faithful slave,' as you call yourself! But how do you do? I see I have frightened you - sit down and tell me all the news."
It was in July, 1805, and the speaker was the well-known Anna Pavlovna Scherer, maid of honor and favorite of the Empress Marya Fedorovna. With these words she greeted Prince Vasili Kuragin, a man of high rank and importance, who was the first to arrive at her reception. Anna Pavlovna had had a cough for some days. She was, as she said, suffering from la grippe; grippe being then a new word in St. Petersburg, used only by the elite.
All her invitations without exception, written in French, and delivered by a scarlet-liveried footman that morning, ran as follows:
"If you have nothing better to do, Count (or Prince), and if the prospect of spending an evening with a poor invalid is not too terrible, I shall be very charmed to see you tonight between 7 and 10 - Annette Scherer."
"Heavens! what a virulent attack!" replied the prince, not in the least disconcerted by this reception. He had just entered, wearing an embroidered court uniform, knee breeches, and shoes, and had stars on his breast and a serene expression on his flat face. He spoke in that refined French in which our grandfathers not only spoke but thought, and with the gentle, patronizing intonation natural to a man of importance who had grown old in society and at court. He went up to Anna Pavlovna, kissed her hand, presenting to her his bald, scented, and shining head, and complacently seated himself on the sofa.
"First of all, dear friend, tell me how you are. Set your friend's mind at rest," said he without altering his tone, beneath the politeness and affected sympathy of which indifference and even irony could be discerned.
"Can one be well while suffering morally? Can one be calm in times like these if one has any feeling?" said Anna Pavlovna. "You are staying the whole evening, I hope?"
"And the fete at the English ambassador's? Today is Wednesday. I must put in an appearance there," said the prince. "My daughter is coming for me to take me there."
"I thought today's fete had been canceled. I confess all these festivities and fireworks are becoming wearisome."
"If they had known that you wished it, the entertainment would have been put off," said the prince, who, like a wound-up clock, by force of habit said things he did not even wish to be believed.
"Don't tease! Well, and what has been decided about Novosiltsev's dispatch? You know everything."
"What can one say about it?" replied the prince in a cold, listless tone. "What has been decided? They have decided that Buonaparte has burnt his boats, and I believe that we are ready to burn ours."
Prince Vasili always spoke languidly, like an actor repeating a stale part. Anna Pavlovna Scherer on the contrary, despite her forty years, overflowed with animation and impulsiveness. To be an enthusiast had become her social vocation and, sometimes even when she did not feel like it, she became enthusiastic in order not to disappoint the expectations of those who knew her. The subdued smile which, though it did not suit her faded features, always played round her lips expressed, as in a spoiled child, a continual consciousness of her charming defect, which she neither wished, nor could, nor considered it necessary, to correct.
In the midst of a conversation on political matters Anna Pavlovna burst out:
"Oh, don't speak to me of Austria. Perhaps I don't understand things, but Austria never has wished, and does not wish, for war. She is betraying us! Russia alone must save Europe. Our gracious sovereign recognizes his high vocation and will be true to it. That is the one thing I have faith in! Our good and wonderful sovereign has to perform the noblest role on earth, and he is so virtuous and noble that God will not forsake him. He will fulfill his vocation and crush the hydra of revolution, which has become more terrible than ever in the person of this murderer and villain! We alone must avenge the blood of the just one.... Whom, I ask you, can we rely on?... England with her commercial spirit will not and cannot understand the Emperor Alexander's loftiness of soul. She has refused to evacuate Malta. She wanted to find, and still seeks, some secret motive in our actions. What answer did Novosiltsev get? None. The English have not understood and cannot understand the self-abnegation of our Emperor who wants nothing for himself, but only desires the good of mankind. And what have they promised? Nothing! And what little they have promised they will not perform! Prussia has always declared that Buonaparte is invincible, and that all Europe is powerless before him.... And I don't believe a word that Hardenburg says, or Haugwitz either. This famous Prussian neutrality is just a trap. I have faith only in God and the lofty destiny of our adored monarch. He will save Europe!"
She suddenly paused, smiling at her own impetuosity.
"I think," said the prince with a smile, "that if you had been sent instead of our dear Wintzingerode you would have captured the King of Prussia's consent by assault. You are so eloquent. Will you give me a cup of tea?"
"In a moment. A propos," she added, becoming calm again, "I am expecting two very interesting men tonight, le Vicomte de Mortemart, who is connected with the Montmorencys through the Rohans, one of the best French families. He is one of the genuine emigres, the good ones. And also the Abbe Morio. Do you know that profound thinker? He has been received by the Emperor. Had you heard?"
"I shall be delighted to meet them," said the prince. "But tell me," he added with studied carelessness as if it had only just occurred to him, though the question he was about to ask was the chief motive of his visit, "is it true that the Dowager Empress wants Baron Funke to be appointed first secretary at Vienna? The baron by all accounts is a poor creature."
Prince Vasili wished to obtain this post for his son, but others were trying through the Dowager Empress Marya Fedorovna to secure it for the baron.
Anna Pavlovna almost closed her eyes to indicate that neither she nor anyone else had a right to criticize what the Empress desired or was pleased with.
"Baron Funke has been recommended to the Dowager Empress by her sister," was all she said, in a dry and mournful tone.
As she named the Empress, Anna Pavlovna's face suddenly assumed an expression of profound and sincere devotion and respect mingled with sadness, and this occurred every time she mentioned her illustrious patroness. She added that Her Majesty had deigned to show Baron Funke beaucoup d'estime, and again her face clouded over with sadness.
The prince was silent and looked indifferent. But, with the womanly and courtierlike quickness and tact habitual to her, Anna Pavlovna wished both to rebuke him (for daring to speak as he had done of a man recommended to the Empress) and at the same time to console him, so she said:
"Now about your family. Do you know that since your daughter came out everyone has been enraptured by her? They say she is amazingly beautiful."
The prince bowed to signify his respect and gratitude.
"I often think," she continued after a short pause, drawing nearer to the prince and smiling amiably at him as if to show that political and social topics were ended and the time had come for intimate conversation - "I often think how unfairly sometimes the joys of life are distributed. Why has fate given you two such splendid children? I don't speak of Anatole, your youngest. I don't like him," she added in a tone admitting of no rejoinder and raising her eyebrows. "Two such charming children. And really you appreciate them less than anyone, and so you don't deserve to have them."
And she smiled her ecstatic smile.
"I can't help it," said the prince. "Lavater would have said I lack the bump of paternity."
"Don't joke; I mean to have a serious talk with you. Do you know I am dissatisfied with your younger son? Between ourselves" (and her face assumed its melancholy expression), "he was mentioned at Her Majesty's and you were pitied...."
The prince answered nothing, but she looked at him significantly, awaiting a reply. He frowned.
"What would you have me do?" he said at last. "You know I did all a father could for their education, and they have both turned out fools. Hippolyte is at least a quiet fool, but Anatole is an active one. That is the only difference between them." He said this smiling in a way more natural and animated than usual, so that the wrinkles round his mouth very clearly revealed something unexpectedly coarse and unpleasant.
"And why are children born to such men as you? If you were not a father there would be nothing I could reproach you with," said Anna Pavlovna, looking up pensively.
"I am your faithful slave and to you alone I can confess that my children are the bane of my life. It is the cross I have to bear. That is how I explain it to myself. It can't be helped!"
He said no more, but expressed his resignation to cruel fate by a gesture. Anna Pavlovna meditated.
"Have you never thought of marrying your prodigal son Anatole?" she asked. "They say old maids have a mania for matchmaking, and though I don't feel that weakness in myself as yet, I know a little person who is very unhappy with her father. She is a relation of yours, Princess Mary Bolkonskaya."
Prince Vasili did not reply, though, with the quickness of memory and perception befitting a man of the world, he indicated by a movement of the head that he was considering this information.
"Do you know," he said at last, evidently unable to check the sad current of his thoughts, "that Anatole is costing me forty thousand rubles a year? And," he went on after a pause, "what will it be in five years, if he goes on like this?" Presently he added: "That's what we fathers have to put up with.... Is this princess of yours rich?"
"Her father is very rich and stingy. He lives in the country. He is the well-known Prince Bolkonski who had to retire from the army under the late Emperor, and was nicknamed 'the King of Prussia.' He is very clever but eccentric, and a bore. The poor girl is very unhappy. She has a brother; I think you know him, he married Lise Meinen lately. He is an aide-de-camp of Kutuzov's and will be here tonight."
"Listen, dear Annette," said the prince, suddenly taking Anna Pavlovna's hand and for some reason drawing it downwards. "Arrange that affair for me and I shall always be your most devoted slave-slafe with an f, as a village elder of mine writes in his reports. She is rich and of good family and that's all I want."
And with the familiarity and easy grace peculiar to him, he raised the maid of honor's hand to his lips, kissed it, and swung it to and fro as he lay back in his armchair, looking in another direction.
"Attendez," said Anna Pavlovna, reflecting, "I'll speak to Lise, young Bolkonski's wife, this very evening, and perhaps the thing can be arranged. It shall be on your family's behalf that I'll start my apprenticeship as old maid."
Anna Pavlovna's drawing room was gradually filling. The highest Petersburg society was assembled there: people differing widely in age and character but alike in the social circle to which they belonged. Prince Vasili's daughter, the beautiful Helene, came to take her father to the ambassador's entertainment; she wore a ball dress and her badge as maid of honor. The youthful little Princess Bolkonskaya, known as la femme la plus seduisante de Petersbourg, * was also there. She had been married during the previous winter, and being pregnant did not go to any large gatherings, but only to small receptions. Prince Vasili's son, Hippolyte, had come with Mortemart, whom he introduced. The Abbe Morio and many others had also come.
* The most fascinating woman in Petersburg.
To each new arrival Anna Pavlovna said, "You have not yet seen my aunt," or "You do not know my aunt?" and very gravely conducted him or her to a little old lady, wearing large bows of ribbon in her cap, who had come sailing in from another room as soon as the guests began to arrive; and slowly turning her eyes from the visitor to her aunt, Anna Pavlovna mentioned each one's name and then left them.
Each visitor performed the ceremony of greeting this old aunt whom not one of them knew, not one of them wanted to know, and not one of them cared about; Anna Pavlovna observed these greetings with mournful and solemn interest and silent approval. The aunt spoke to each of them in the same words, about their health and her own, and the health of Her Majesty, "who, thank God, was better today." And each visitor, though politeness prevented his showing impatience, left the old woman with a sense of relief at having performed a vexatious duty and did not return to her the whole evening.
The young Princess Bolkonskaya had brought some work in a gold-embroidered velvet bag. Her pretty little upper lip, on which a delicate dark down was just perceptible, was too short for her teeth, but it lifted all the more sweetly, and was especially charming when she occasionally drew it down to meet the lower lip. As is always the case with a thoroughly attractive woman, her defect - the shortness of her upper lip and her half-open mouth - seemed to be her own special and peculiar form of beauty. Everyone brightened at the sight of this pretty young woman, so soon to become a mother, so full of life and health, and carrying her burden so lightly. Old men and dull dispirited young ones who looked at her, after being in her company and talking to her a little while, felt as if they too were becoming, like her, full of life and health. All who talked to her, and at each word saw her bright smile and the constant gleam of her white teeth, thought that they were in a specially amiable mood that day.
The little princess went round the table with quick, short, swaying steps, her workbag on her arm, and gaily spreading out her dress sat down on a sofa near the silver samovar, as if all she was doing was a pleasure to herself and to all around her. "I have brought my work," said she in French, displaying her bag and addressing all present. "Mind, Annette, I hope you have not played a wicked trick on me," she added, turning to her hostess. "You wrote that it was to be quite a small reception, and just see how badly I am dressed." And she spread out her arms to show her short-waisted, lace-trimmed, dainty gray dress, girdled with a broad ribbon just below the breast.
"Soyez tranquille, Lise, you will always be prettier than anyone else," replied Anna Pavlovna.
"You know," said the princess in the same tone of voice and still in French, turning to a general, "my husband is deserting me? He is going to get himself killed. Tell me what this wretched war is for?" she added, addressing Prince Vasili, and without waiting for an answer she turned to speak to his daughter, the beautiful Helene.
"What a delightful woman this little princess is!" said Prince Vasili to Anna Pavlovna.
One of the next arrivals was a stout, heavily built young man with close-cropped hair, spectacles, the light-colored breeches fashionable at that time, a very high ruffle, and a brown dress coat. This stout young man was an illegitimate son of Count Bezukhov, a well-known grandee of Catherine's time who now lay dying in Moscow. The young man had not yet entered either the military or civil service, as he had only just returned from abroad where he had been educated, and this was his first appearance in society. Anna Pavlovna greeted him with the nod she accorded to the lowest hierarchy in her drawing room. But in spite of this lowest-grade greeting, a look of anxiety and fear, as at the sight of something too large and unsuited to the place, came over her face when she saw Pierre enter. Though he was certainly rather bigger than the other men in the room, her anxiety could only have reference to the clever though shy, but observant and natural, expression which distinguished him from everyone else in that drawing room.
"It is very good of you, Monsieur Pierre, to come and visit a poor invalid," said Anna Pavlovna, exchanging an alarmed glance with her aunt as she conducted him to her.
Pierre murmured something unintelligible, and continued to look round as if in search of something. On his way to the aunt he bowed to the little princess with a pleased smile, as to an intimate acquaintance.
Anna Pavlovna's alarm was justified, for Pierre turned away from the aunt without waiting to hear her speech about Her Majesty's health. Anna Pavlovna in dismay detained him with the words: "Do you know the Abbe Morio? He is a most interesting man."
"Yes, I have heard of his scheme for perpetual peace, and it is very interesting but hardly feasible."
"You think so?" rejoined Anna Pavlovna in order to say something and get away to attend to her duties as hostess. But Pierre now committed a reverse act of impoliteness. First he had left a lady before she had finished speaking to him, and now he continued to speak to another who wished to get away. With his head bent, and his big feet spread apart, he began explaining his reasons for thinking the abbe's plan chimerical.
"We will talk of it later," said Anna Pavlovna with a smile.
And having got rid of this young man who did not know how to behave, she resumed her duties as hostess and continued to listen and watch, ready to help at any point where the conversation might happen to flag. As the foreman of a spinning mill, when he has set the hands to work, goes round and notices here a spindle that has stopped or there one that creaks or makes more noise than it should, and hastens to check the machine or set it in proper motion, so Anna Pavlovna moved about her drawing room, approaching now a silent, now a too-noisy group, and by a word or slight rearrangement kept the conversational machine in steady, proper, and regular motion. But amid these cares her anxiety about Pierre was evident. She kept an anxious watch on him when he approached the group round Mortemart to listen to what was being said there, and again when he passed to another group whose center was the abbe.
Pierre had been educated abroad, and this reception at Anna Pavlovna's was the first he had attended in Russia. He knew that all the intellectual lights of Petersburg were gathered there and, like a child in a toyshop, did not know which way to look, afraid of missing any clever conversation that was to be heard. Seeing the self-confident and refined expression on the faces of those present he was always expecting to hear something very profound. At last he came up to Morio. Here the conversation seemed interesting and he stood waiting for an opportunity to express his own views, as young people are fond of doing.
CHAPTER III
Anna Pavlovna's reception was in full swing. The spindles hummed steadily and ceaselessly on all sides. With the exception of the aunt, beside whom sat only one elderly lady, who with her thin careworn face was rather out of place in this brilliant society, the whole company had settled into three groups. One, chiefly masculine, had formed round the abbe. Another, of young people, was grouped round the beautiful Princess Helene, Prince Vasili's daughter, and the little Princess Bolkonskaya, very pretty and rosy, though rather too plump for her age. The third group was gathered round Mortemart and Anna Pavlovna.
The vicomte was a nice-looking young man with soft features and polished manners, who evidently considered himself a celebrity but out of politeness modestly placed himself at the disposal of the circle in which he found himself. Anna Pavlovna was obviously serving him up as a treat to her guests. As a clever maitre d'hotel serves up as a specially choice delicacy a piece of meat that no one who had seen it in the kitchen would have cared to eat, so Anna Pavlovna served up to her guests, first the vicomte and then the abbe, as peculiarly choice morsels. The group about Mortemart immediately began discussing the murder of the Duc d'Enghien. The vicomte said that the Duc d'Enghien had perished by his own magnanimity, and that there were particular reasons for Buonaparte's hatred of him.
"Ah, yes! Do tell us all about it, Vicomte," said Anna Pavlovna, with a pleasant feeling that there was something A la Louis XV in the sound of that sentence: "Contez nous cela, Vicomte."
The vicomte bowed and smiled courteously in token of his willingness to comply. Anna Pavlovna arranged a group round him, inviting everyone to listen to his tale.
"The vicomte knew the duc personally," whispered Anna Pavlovna to one of the guests. "The vicomte is a wonderful raconteur," said she to another. "How evidently he belongs to the best society," said she to a third; and the vicomte was served up to the company in the choicest and most advantageous style, like a well-garnished joint of roast beef on a hot dish.
The vicomte wished to begin his story and gave a subtle smile.
"Come over here, Helene, dear," said Anna Pavlovna to the beautiful young princess who was sitting some way off, the center of another group.
The princess smiled. She rose with the same unchanging smile with which she had first entered the room - the smile of a perfectly beautiful woman. With a slight rustle of her white dress trimmed with moss and ivy, with a gleam of white shoulders, glossy hair, and sparkling diamonds, she passed between the men who made way for her, not looking at any of them but smiling on all, as if graciously allowing each the privilege of admiring her beautiful figure and shapely shoulders, back, and bosom - which in the fashion of those days were very much exposed - and she seemed to bring the glamour of a ballroom with her as she moved toward Anna Pavlovna. Helene was so lovely that not only did she not show any trace of coquetry, but on the contrary she even appeared shy of her unquestionable and all too victorious beauty. She seemed to wish, but to be unable, to diminish its effect.
"How lovely!" said everyone who saw her; and the vicomte lifted his shoulders and dropped his eyes as if startled by something extraordinary when she took her seat opposite and beamed upon him also with her unchanging smile.
"Madame, I doubt my ability before such an audience," said he, smilingly inclining his head.
The princess rested her bare round arm on a little table and considered a reply unnecessary. She smilingly waited. All the time the story was being told she sat upright, glancing now at her beautiful round arm, altered in shape by its pressure on the table, now at her still more beautiful bosom, on which she readjusted a diamond necklace. From time to time she smoothed the folds of her dress, and whenever the story produced an effect she glanced at Anna Pavlovna, at once adopted just the expression she saw on the maid of honor's face, and again relapsed into her radiant smile.
The little princess had also left the tea table and followed Helene.
"Wait a moment, I'll get my work.... Now then, what are you thinking of?" she went on, turning to Prince Hippolyte. "Fetch me my workbag."
There was a general movement as the princess, smiling and talking merrily to everyone at once, sat down and gaily arranged herself in her seat.
"Now I am all right," she said, and asking the vicomte to begin, she took up her work.
Prince Hippolyte, having brought the workbag, joined the circle and moving a chair close to hers seated himself beside her.
Le charmant Hippolyte was surprising by his extraordinary resemblance to his beautiful sister, but yet more by the fact that in spite of this resemblance he was exceedingly ugly. His features were like his sister's, but while in her case everything was lit up by a joyous, self-satisfied, youthful, and constant smile of animation, and by the wonderful classic beauty of her figure, his face on the contrary was dulled by imbecility and a constant expression of sullen self-confidence, while his body was thin and weak. His eyes, nose, and mouth all seemed puckered into a vacant, wearied grimace, and his arms and legs always fell into unnatural positions.
"It's not going to be a ghost story?" said he, sitting down beside the princess and hastily adjusting his lorgnette, as if without this instrument he could not begin to speak.
"Why no, my dear fellow," said the astonished narrator, shrugging his shoulders.
"Because I hate ghost stories," said Prince Hippolyte in a tone which showed that he only understood the meaning of his words after he had uttered them.
He spoke with such self-confidence that his hearers could not be sure whether what he said was very witty or very stupid. He was dressed in a dark-green dress coat, knee breeches of the color of cuisse de nymphe effrayee, as he called it, shoes, and silk stockings.
The vicomte told his tale very neatly. It was an anecdote, then current, to the effect that the Duc d'Enghien had gone secretly to Paris to visit Mademoiselle George; that at her house he came upon Bonaparte, who also enjoyed the famous actress' favors, and that in his presence Napoleon happened to fall into one of the fainting fits to which he was subject, and was thus at the duc's mercy. The latter spared him, and this magnanimity Bonaparte subsequently repaid by death.
The story was very pretty and interesting, especially at the point where the rivals suddenly recognized one another; and the ladies looked agitated.
"Charming!" said Anna Pavlovna with an inquiring glance at the little princess.
"Charming!" whispered the little princess, sticking the needle into her work as if to testify that the interest and fascination of the story prevented her from going on with it.
The vicomte appreciated this silent praise and smiling gratefully prepared to continue, but just then Anna Pavlovna, who had kept a watchful eye on the young man who so alarmed her, noticed that he was talking too loudly and vehemently with the abbe, so she hurried to the rescue. Pierre had managed to start a conversation with the abbe about the balance of power, and the latter, evidently interested by the young man's simple-minded eagerness, was explaining his pet theory. Both were talking and listening too eagerly and too naturally, which was why Anna Pavlovna disapproved.
"The means are ... the balance of power in Europe and the rights of the people," the abbe was saying. "It is only necessary for one powerful nation like Russia - barbaric as she is said to be - to place herself disinterestedly at the head of an alliance having for its object the maintenance of the balance of power of Europe, and it would save the world!"
"But how are you to get that balance?" Pierre was beginning.
At that moment Anna Pavlovna came up and, looking severely at Pierre, asked the Italian how he stood Russian climate. The Italian's face instantly changed and assumed an offensively affected, sugary expression, evidently habitual to him when conversing with women.
"I am so enchanted by the brilliancy of the wit and culture of the society, more especially of the feminine society, in which I have had the honor of being received, that I have not yet had time to think of the climate," said he.
Not letting the abbe and Pierre escape, Anna Pavlovna, the more conveniently to keep them under observation, brought them into the larger circle.

View File

@@ -1,16 +1,47 @@
package format
import "fmt"
import (
"fmt"
"math"
)
const (
Byte = 1
KiloByte = Byte * 1000
MegaByte = KiloByte * 1000
GigaByte = MegaByte * 1000
TeraByte = GigaByte * 1000
)
func HumanBytes(b int64) string {
var value float64
var unit string
switch {
case b > 1000*1000*1000:
return fmt.Sprintf("%d GB", b/1000/1000/1000)
case b > 1000*1000:
return fmt.Sprintf("%d MB", b/1000/1000)
case b > 1000:
return fmt.Sprintf("%d KB", b/1000)
case b >= TeraByte:
value = float64(b) / TeraByte
unit = "TB"
case b >= GigaByte:
value = float64(b) / GigaByte
unit = "GB"
case b >= MegaByte:
value = float64(b) / MegaByte
unit = "MB"
case b >= KiloByte:
value = float64(b) / KiloByte
unit = "KB"
default:
return fmt.Sprintf("%d B", b)
}
switch {
case value >= 100:
return fmt.Sprintf("%d %s", int(value), unit)
case value >= 10:
return fmt.Sprintf("%d %s", int(value), unit)
case value != math.Trunc(value):
return fmt.Sprintf("%.1f %s", value, unit)
default:
return fmt.Sprintf("%d %s", int(value), unit)
}
}

25
format/format.go Normal file
View File

@@ -0,0 +1,25 @@
package format
import (
"fmt"
"math"
)
const (
Thousand = 1000
Million = Thousand * 1000
Billion = Million * 1000
)
func HumanNumber(b uint64) string {
switch {
case b > Billion:
return fmt.Sprintf("%.0fB", math.Round(float64(b)/Billion))
case b > Million:
return fmt.Sprintf("%.0fM", math.Round(float64(b)/Million))
case b > Thousand:
return fmt.Sprintf("%.0fK", math.Round(float64(b)/Thousand))
default:
return fmt.Sprintf("%d", b)
}
}

View File

@@ -29,7 +29,7 @@ func TestHumanTime(t *testing.T) {
})
t.Run("soon", func(t *testing.T) {
v := now.Add(800*time.Millisecond)
v := now.Add(800 * time.Millisecond)
assertEqual(t, HumanTime(v, ""), "Less than a second from now")
})
}

14
go.mod
View File

@@ -3,12 +3,11 @@ module github.com/jmorganca/ollama
go 1.20
require (
github.com/dustin/go-humanize v1.0.1
github.com/emirpasic/gods v1.18.1
github.com/gin-gonic/gin v1.9.1
github.com/mattn/go-runewidth v0.0.14
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db
github.com/olekukonko/tablewriter v0.0.5
github.com/pdevine/readline v1.5.2
github.com/spf13/cobra v1.7.0
golang.org/x/sync v0.3.0
)
@@ -39,13 +38,12 @@ require (
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.11 // indirect
golang.org/x/arch v0.3.0 // indirect
golang.org/x/crypto v0.10.0
golang.org/x/crypto v0.14.0
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63
golang.org/x/net v0.10.0 // indirect
golang.org/x/sys v0.11.0 // indirect
golang.org/x/term v0.10.0
golang.org/x/text v0.10.0 // indirect
gonum.org/v1/gonum v0.13.0
golang.org/x/net v0.17.0 // indirect
golang.org/x/sys v0.13.0 // indirect
golang.org/x/term v0.13.0
golang.org/x/text v0.13.0 // indirect
google.golang.org/protobuf v1.30.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

33
go.sum
View File

@@ -4,17 +4,13 @@ github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZX
github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 h1:qSGYFH7+jGhDF8vLC+iwCD4WpbV1EBDSzWkJODFLams=
github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk=
github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM=
github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ=
github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04=
github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8=
github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU=
github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA=
github.com/gin-contrib/cors v1.4.0 h1:oJ6gwtUl3lqV0WEIwM/LxPF1QZ5qe2lGWdY2+bz7y0g=
@@ -78,8 +74,6 @@ github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N
github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0=
github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y=
github.com/pdevine/readline v1.5.2 h1:oz6Y5GdTmhPG+08hhxcAvtHitSANWuA2100Sppb38xI=
github.com/pdevine/readline v1.5.2/go.mod h1:na/LbuE5PYwxI7GyopWdIs3U8HVe89lYlNTFTXH3wOw=
github.com/pelletier/go-toml/v2 v2.0.1/go.mod h1:r9LEWfGN8R5k0VXJ+0BkIe7MYkRdwZOjgMj2KwnJFUo=
github.com/pelletier/go-toml/v2 v2.0.8 h1:0ctb6s9mE31h0/lhu+J6OPmVeDxJn+kYnJc2jZR9tGQ=
github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4=
@@ -118,35 +112,32 @@ golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUu
golang.org/x/arch v0.3.0 h1:02VY4/ZcO/gBOH6PUaoiptASxtXU10jazRCP865E97k=
golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8=
golang.org/x/crypto v0.0.0-20210711020723-a769d52b0f97/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
golang.org/x/crypto v0.14.0 h1:wBqGXzWJW6m1XrIKlAH0Hs1JJ7+9KBwnIO8v66Q9cHc=
golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63 h1:m64FZMko/V45gv0bNmrNYoDEq8U5YUhetc9cBWKS1TQ=
golang.org/x/exp v0.0.0-20230817173708-d852ddb80c63/go.mod h1:0v4NqG35kSWCMzLaMeX+IQrlSnVE/bqGSyC2cz/9Le8=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.10.0 h1:X2//UzNDwYmtCLn7To6G58Wr6f5ahEAQgKNzv9Y951M=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM=
golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
golang.org/x/sync v0.3.0 h1:ftCYgMx6zT/asHUrPw8BLLscYtGznsLAnjq5RH9P66E=
golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210806184541-e5e7981a1069/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.10.0 h1:3R7pNqamzBraeqj/Tj8qt1aQ2HpmlC+Cx/qL/7hn4/c=
golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o=
golang.org/x/term v0.13.0 h1:bb+I9cTfFazGW51MZqBVmZy7+JEJMouUHTUSKVQLBek=
golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.10.0 h1:UpjohKhiEgNc0CSauXmwYftY1+LlaC75SJwh0SgCX58=
golang.org/x/text v0.10.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k=
golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
gonum.org/v1/gonum v0.13.0 h1:a0T3bh+7fhRyqeNbiC3qVHYmkiQgit3wnNan/2c0HMM=
gonum.org/v1/gonum v0.13.0/go.mod h1:/WPYRckkfWrhWefxyYTfrTtQR0KH4iyHNuzxqXAKyAU=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=

View File

@@ -175,7 +175,8 @@ const (
// Magic constant for `ggla` files (LoRA adapter).
FILE_MAGIC_GGLA = 0x67676C61
// Magic constant for `gguf` files (versioned, gguf)
FILE_MAGIC_GGUF = 0x46554747
FILE_MAGIC_GGUF_LE = 0x46554747
FILE_MAGIC_GGUF_BE = 0x47475546
)
func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
@@ -191,8 +192,10 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
ggml.container = &containerGGJT{}
case FILE_MAGIC_GGLA:
ggml.container = &containerLORA{}
case FILE_MAGIC_GGUF:
ggml.container = &containerGGUF{}
case FILE_MAGIC_GGUF_LE:
ggml.container = &containerGGUF{bo: binary.LittleEndian}
case FILE_MAGIC_GGUF_BE:
ggml.container = &containerGGUF{bo: binary.BigEndian}
default:
return nil, errors.New("invalid file magic")
}

View File

@@ -3,12 +3,15 @@ package llm
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"github.com/jmorganca/ollama/format"
)
type containerGGUF struct {
bo binary.ByteOrder
Version uint32
V1 struct {
@@ -20,6 +23,8 @@ type containerGGUF struct {
NumTensor uint64
NumKV uint64
}
parameters uint64
}
func (c *containerGGUF) Name() string {
@@ -27,15 +32,13 @@ func (c *containerGGUF) Name() string {
}
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
binary.Read(r, binary.LittleEndian, &c.Version)
binary.Read(r, c.bo, &c.Version)
switch c.Version {
case 1:
binary.Read(r, binary.LittleEndian, &c.V1)
case 2:
binary.Read(r, binary.LittleEndian, &c.V2)
binary.Read(r, c.bo, &c.V1)
default:
return nil, errors.New("invalid version")
binary.Read(r, c.bo, &c.V2)
}
model := newGGUFModel(c)
@@ -76,6 +79,14 @@ func newGGUFModel(container *containerGGUF) *ggufModel {
}
}
func (llm *ggufModel) NumTensor() uint64 {
if llm.Version == 1 {
return uint64(llm.V1.NumTensor)
}
return llm.V2.NumTensor
}
func (llm *ggufModel) NumKV() uint64 {
if llm.Version == 1 {
return uint64(llm.V1.NumKV)
@@ -94,6 +105,10 @@ func (llm *ggufModel) ModelFamily() string {
}
func (llm *ggufModel) ModelType() string {
if llm.parameters > 0 {
return format.HumanNumber(llm.parameters)
}
switch llm.ModelFamily() {
case "llama":
if blocks, ok := llm.kv["llama.block_count"].(uint32); ok {
@@ -128,13 +143,9 @@ func (llm *ggufModel) FileType() string {
}
func (llm *ggufModel) Decode(r io.Reader) error {
read := llm.readString
if llm.Version == 1 {
read = llm.readStringV1
}
// decode key-values
for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := read(r)
k, err := llm.readString(r)
if err != nil {
return err
}
@@ -166,24 +177,14 @@ func (llm *ggufModel) Decode(r io.Reader) error {
case ggufTypeBool:
v = llm.readBool(r)
case ggufTypeString:
fn := llm.readString
if llm.Version == 1 {
fn = llm.readStringV1
}
s, err := fn(r)
s, err := llm.readString(r)
if err != nil {
return err
}
v = s
case ggufTypeArray:
fn := llm.readArray
if llm.Version == 1 {
fn = llm.readArrayV1
}
a, err := fn(r)
a, err := llm.readArray(r)
if err != nil {
return err
}
@@ -196,6 +197,25 @@ func (llm *ggufModel) Decode(r io.Reader) error {
llm.kv[k] = v
}
// decode tensors
for i := 0; uint64(i) < llm.NumTensor(); i++ {
if _, err := llm.readString(r); err != nil {
return err
}
dimensions := llm.readU32(r)
var elements uint64 = 1
for i := 0; uint32(i) < dimensions; i++ {
elements *= llm.readU64(r)
}
llm.readU32(r) // type
llm.readU64(r) // offset
llm.parameters += elements
}
return nil
}
@@ -209,75 +229,75 @@ func (llm *ggufModel) NumLayers() int64 {
return int64(v)
}
func (ggufModel) readU8(r io.Reader) uint8 {
func (llm ggufModel) readU8(r io.Reader) uint8 {
var u8 uint8
binary.Read(r, binary.LittleEndian, &u8)
binary.Read(r, llm.bo, &u8)
return u8
}
func (ggufModel) readI8(r io.Reader) int8 {
func (llm ggufModel) readI8(r io.Reader) int8 {
var i8 int8
binary.Read(r, binary.LittleEndian, &i8)
binary.Read(r, llm.bo, &i8)
return i8
}
func (ggufModel) readU16(r io.Reader) uint16 {
func (llm ggufModel) readU16(r io.Reader) uint16 {
var u16 uint16
binary.Read(r, binary.LittleEndian, &u16)
binary.Read(r, llm.bo, &u16)
return u16
}
func (ggufModel) readI16(r io.Reader) int16 {
func (llm ggufModel) readI16(r io.Reader) int16 {
var i16 int16
binary.Read(r, binary.LittleEndian, &i16)
binary.Read(r, llm.bo, &i16)
return i16
}
func (ggufModel) readU32(r io.Reader) uint32 {
func (llm ggufModel) readU32(r io.Reader) uint32 {
var u32 uint32
binary.Read(r, binary.LittleEndian, &u32)
binary.Read(r, llm.bo, &u32)
return u32
}
func (ggufModel) readI32(r io.Reader) int32 {
func (llm ggufModel) readI32(r io.Reader) int32 {
var i32 int32
binary.Read(r, binary.LittleEndian, &i32)
binary.Read(r, llm.bo, &i32)
return i32
}
func (ggufModel) readU64(r io.Reader) uint64 {
func (llm ggufModel) readU64(r io.Reader) uint64 {
var u64 uint64
binary.Read(r, binary.LittleEndian, &u64)
binary.Read(r, llm.bo, &u64)
return u64
}
func (ggufModel) readI64(r io.Reader) int64 {
func (llm ggufModel) readI64(r io.Reader) int64 {
var i64 int64
binary.Read(r, binary.LittleEndian, &i64)
binary.Read(r, llm.bo, &i64)
return i64
}
func (ggufModel) readF32(r io.Reader) float32 {
func (llm ggufModel) readF32(r io.Reader) float32 {
var f32 float32
binary.Read(r, binary.LittleEndian, &f32)
binary.Read(r, llm.bo, &f32)
return f32
}
func (ggufModel) readF64(r io.Reader) float64 {
func (llm ggufModel) readF64(r io.Reader) float64 {
var f64 float64
binary.Read(r, binary.LittleEndian, &f64)
binary.Read(r, llm.bo, &f64)
return f64
}
func (ggufModel) readBool(r io.Reader) bool {
func (llm ggufModel) readBool(r io.Reader) bool {
var b bool
binary.Read(r, binary.LittleEndian, &b)
binary.Read(r, llm.bo, &b)
return b
}
func (ggufModel) readStringV1(r io.Reader) (string, error) {
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
var nameLength uint32
binary.Read(r, binary.LittleEndian, &nameLength)
binary.Read(r, llm.bo, &nameLength)
var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -291,8 +311,12 @@ func (ggufModel) readStringV1(r io.Reader) (string, error) {
}
func (llm ggufModel) readString(r io.Reader) (string, error) {
if llm.Version == 1 {
return llm.readStringV1(r)
}
var nameLength uint64
binary.Read(r, binary.LittleEndian, &nameLength)
binary.Read(r, llm.bo, &nameLength)
var b bytes.Buffer
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
@@ -311,7 +335,7 @@ func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
arr = append(arr, llm.readI8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:
@@ -340,6 +364,10 @@ func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
}
func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
if llm.Version == 1 {
return llm.readArrayV1(r)
}
atype := llm.readU32(r)
n := llm.readU64(r)
@@ -348,7 +376,7 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
case ggufTypeUint8:
arr = append(arr, llm.readU8(r))
case ggufTypeInt8:
arr = append(arr, llm.readU8(r))
arr = append(arr, llm.readI8(r))
case ggufTypeUint16:
arr = append(arr, llm.readU16(r))
case ggufTypeInt16:

View File

@@ -7,12 +7,12 @@ package llm
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=on
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View File

@@ -12,7 +12,7 @@ package llm
//go:generate mv ggml/build/metal/bin/server ggml/build/metal/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build gguf/build/metal --target server --config Release
//go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner

View File

@@ -13,14 +13,14 @@ package llm
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cuda --target server --config Release
//go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA_PEER_MAX_BATCH_SIZE=0
//go:generate cmake --build gguf/build/cuda --target server --config Release
//go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner

View File

@@ -10,7 +10,15 @@ package llm
//go:generate cmd /c move ggml\build\cpu\bin\Release\server.exe ggml\build\cpu\bin\Release\ollama-runner.exe
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-remove-warm-up-logging.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cuda --target server --config Release
//go:generate cmd /c move ggml\build\cuda\bin\Release\server.exe ggml\build\cuda\bin\Release\ollama-runner.exe
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cuda --target server --config Release
//go:generate cmd /c move gguf\build\cuda\bin\Release\server.exe gguf\build\cuda\bin\Release\ollama-runner.exe

View File

@@ -1,25 +0,0 @@
From 07993bdc35345b67b27aa649a7c099ad42d80c4c Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Thu, 21 Sep 2023 14:43:21 -0700
Subject: [PATCH] remove warm up logging
---
common/common.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/common/common.cpp b/common/common.cpp
index 2597ba0..b56549b 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -780,8 +780,6 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
}
{
- LOG("warming up the model with an empty run\n");
-
const std::vector<llama_token> tmp = { llama_token_bos(lctx), llama_token_eos(lctx), };
llama_eval(lctx, tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, params.n_threads);
llama_reset_timings(lctx);
--
2.42.0

View File

@@ -0,0 +1,25 @@
From 6465fec6290f0a7f5d4d0fbe6bcf634e4810dde6 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 23 Oct 2023 10:39:34 -0700
Subject: [PATCH] default log stderr
---
common/log.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/log.h b/common/log.h
index b8953fd..25522cd 100644
--- a/common/log.h
+++ b/common/log.h
@@ -90,7 +90,7 @@
// }
//
#ifndef LOG_TARGET
- #define LOG_TARGET log_handler()
+ #define LOG_TARGET nullptr
#endif
#ifndef LOG_TEE_TARGET
--
2.42.0

View File

@@ -24,51 +24,83 @@ import (
"time"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
)
const jsonGrammar = `
root ::= object
value ::= object | array | string | number | ("true" | "false" | "null") ws
object ::=
"{" ws (
string ":" ws value
("," ws string ":" ws value)*
)? "}" ws
array ::=
"[" ws (
value
("," ws value)*
)? "]" ws
string ::=
"\"" (
[^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
)* "\"" ws
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
# Optional space: by convention, applied in this grammar after literal chars when allowed
ws ::= ([ \t\n] ws)?
`
//go:embed llama.cpp/*/build/*/bin/*
var llamaCppEmbed embed.FS
type ModelRunner struct {
Path string // path to the model runner executable
Path string // path to the model runner executable
Accelerated bool
}
func chooseRunners(workDir, runnerType string) []ModelRunner {
buildPath := path.Join("llama.cpp", runnerType, "build")
var runners []string
var runners []ModelRunner
// set the runners based on the OS
// IMPORTANT: the order of the runners in the array is the priority order
switch runtime.GOOS {
case "darwin":
runners = []string{
path.Join(buildPath, "metal", "bin", "ollama-runner"),
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
if runtime.GOARCH == "arm64" {
runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
} else {
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
}
case "linux":
runners = []string{
path.Join(buildPath, "cuda", "bin", "ollama-runner"),
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
runners = []ModelRunner{
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
case "windows":
// TODO: select windows GPU runner here when available
runners = []string{
path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe"),
runners = []ModelRunner{
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
}
default:
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
runners = []string{
path.Join(buildPath, "cpu", "bin", "ollama-runner"),
runners = []ModelRunner{
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
}
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
for _, r := range runners {
// find all the files in the runner's bin directory
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r), "*"))
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r.Path), "*"))
if err != nil {
// this is expected, ollama may be compiled without all runners packed in
log.Printf("%s runner not found: %v", r, err)
log.Printf("%s runner not found: %v", r.Path, err)
continue
}
@@ -115,7 +147,10 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
localRunnersByPriority := []ModelRunner{}
for _, r := range runners {
// clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: filepath.Clean(path.Join(workDir, r))})
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
Path: filepath.Clean(path.Join(workDir, r.Path)),
Accelerated: r.Accelerated,
})
}
return localRunnersByPriority
@@ -178,12 +213,12 @@ type llamaHyperparameters struct {
}
type Running struct {
Port int
Cmd *exec.Cmd
Cancel context.CancelFunc
exitOnce sync.Once
exitCh chan error // channel to receive the exit status of the subprocess
exitErr error // error returned by the subprocess
Port int
Cmd *exec.Cmd
Cancel context.CancelFunc
exitOnce sync.Once
exitCh chan error // channel to receive the exit status of the subprocess
*StatusWriter // captures error messages from the llama runner process
}
type llama struct {
@@ -191,56 +226,70 @@ type llama struct {
Running
}
var errNoGPU = errors.New("nvidia-smi command failed")
var (
errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
)
// CheckVRAM returns the available VRAM in MiB on Linux machines with NVIDIA GPUs
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (int64, error) {
cmd := exec.Command("nvidia-smi", "--query-gpu=memory.free", "--format=csv,noheader,nounits")
var stdout bytes.Buffer
cmd.Stdout = &stdout
err := cmd.Run()
if err != nil {
return 0, errNoGPU
return 0, errNvidiaSMI
}
var free int64
var freeMiB int64
scanner := bufio.NewScanner(&stdout)
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, "[Insufficient Permissions]") {
return 0, fmt.Errorf("GPU support may not enabled, check you have installed GPU drivers and have the necessary permissions to run nvidia-smi")
}
vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
}
free += vram
freeMiB += vram
}
return free, nil
freeBytes := freeMiB * 1024 * 1024
if freeBytes < 2*format.GigaByte {
log.Printf("less than 2 GB VRAM available")
return 0, errAvailableVRAM
}
return freeBytes, nil
}
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
if runtime.GOOS == "linux" {
vramMib, err := CheckVRAM()
if runtime.GOOS == "linux" || runtime.GOOS == "windows" {
freeBytes, err := CheckVRAM()
if err != nil {
if err.Error() != "nvidia-smi command failed" {
if !errors.Is(err, errNvidiaSMI) {
log.Print(err.Error())
}
// nvidia driver not installed or no nvidia GPU found
return 0
}
freeVramBytes := int64(vramMib) * 1024 * 1024 // 1 MiB = 1024^2 bytes
// Calculate bytes per layer
// TODO: this is a rough heuristic, better would be to calculate this based on number of layers and context size
/*
Calculate bytes per layer, this will roughly be the size of the model file divided by the number of layers.
We can store the model weights and the kv cache in vram,
to enable kv chache vram storage add two additional layers to the number of layers retrieved from the model file.
*/
bytesPerLayer := fileSizeBytes / numLayer
// max number of layers we can fit in VRAM, subtract 5% to prevent consuming all available VRAM and running out of memory
layers := int(freeVramBytes/bytesPerLayer) * 95 / 100
log.Printf("%d MiB VRAM available, loading up to %d GPU layers", vramMib, layers)
// 75% of the absolute max number of layers we can fit in available VRAM, off-loading too many layers to the GPU can cause OOM errors
layers := int(freeBytes/bytesPerLayer) * 3 / 4
log.Printf("%d MB VRAM available, loading up to %d GPU layers", freeBytes/(1024*1024), layers)
return layers
}
@@ -250,7 +299,8 @@ func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
// StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct {
ErrCh chan error
ErrCh chan error
LastErrMsg string
}
func NewStatusWriter() *StatusWriter {
@@ -260,10 +310,18 @@ func NewStatusWriter() *StatusWriter {
}
func (w *StatusWriter) Write(b []byte) (int, error) {
var errMsg string
if _, after, ok := bytes.Cut(b, []byte("error:")); ok {
err := fmt.Errorf("llama runner: %s", after)
w.ErrCh <- err
errMsg = string(bytes.TrimSpace(after))
} else if _, after, ok := bytes.Cut(b, []byte("CUDA error")); ok {
errMsg = string(bytes.TrimSpace(after))
}
if errMsg != "" {
w.LastErrMsg = errMsg
w.ErrCh <- fmt.Errorf("llama runner: %s", errMsg)
}
return os.Stderr.Write(b)
}
@@ -277,16 +335,27 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
}
numGPU := NumGPU(numLayers, fileInfo.Size(), opts)
params := []string{
"--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase),
"--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", NumGPU(numLayers, fileInfo.Size(), opts)),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--embedding",
}
if opts.MainGPU > 0 {
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
}
if opts.RopeFrequencyBase > 0 {
params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
}
if opts.RopeFrequencyScale > 0 {
params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
}
if opts.NumGQA > 0 {
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
}
@@ -317,6 +386,11 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
// start the llama.cpp server with a retry in case the port is already in use
for _, runner := range runners {
if runner.Accelerated && numGPU == 0 {
log.Printf("skipping accelerated runner because num_gpu=0")
continue
}
if _, err := os.Stat(runner.Path); err != nil {
log.Printf("llama runner not found: %v", err)
continue
@@ -329,7 +403,15 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
runner.Path,
append(params, "--port", strconv.Itoa(port))...,
)
cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", filepath.Dir(runner.Path)))
var libraryPaths []string
if libraryPath, ok := os.LookupEnv("LD_LIBRARY_PATH"); ok {
libraryPaths = append(libraryPaths, libraryPath)
}
libraryPaths = append(libraryPaths, filepath.Dir(runner.Path))
cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", strings.Join(libraryPaths, ":")))
cmd.Stdout = os.Stderr
statusWriter := NewStatusWriter()
cmd.Stderr = statusWriter
@@ -345,7 +427,13 @@ func newLlama(model string, adapters []string, runners []ModelRunner, numLayers
// monitor the llama runner process and signal when it exits
go func() {
err := llm.Cmd.Wait()
llm.exitErr = err
// default to printing the exit message of the command process, it will probably just say 'exit staus 1'
errMsg := err.Error()
// try to set a better error message if llama runner logs captured an error
if statusWriter.LastErrMsg != "" {
errMsg = statusWriter.LastErrMsg
}
log.Println(errMsg)
// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited
llm.exitOnce.Do(func() {
close(llm.exitCh)
@@ -415,10 +503,9 @@ func (llm *llama) Close() {
// wait for the command to exit to prevent race conditions with the next run
<-llm.exitCh
err := llm.exitErr
if err != nil {
log.Printf("llama runner stopped with error: %v", err)
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
log.Printf("llama runner stopped with error: %v", llm.StatusWriter.LastErrMsg)
} else {
log.Print("llama runner stopped successfully")
}
@@ -428,111 +515,73 @@ func (llm *llama) SetOptions(opts api.Options) {
llm.Options = opts
}
type GenerationSettings struct {
FrequencyPenalty float64 `json:"frequency_penalty"`
IgnoreEOS bool `json:"ignore_eos"`
LogitBias []interface{} `json:"logit_bias"`
Mirostat int `json:"mirostat"`
MirostatEta float64 `json:"mirostat_eta"`
MirostatTau float64 `json:"mirostat_tau"`
Model string `json:"model"`
NCtx int `json:"n_ctx"`
NKeep int `json:"n_keep"`
NPredict int `json:"n_predict"`
NProbs int `json:"n_probs"`
PenalizeNl bool `json:"penalize_nl"`
PresencePenalty float64 `json:"presence_penalty"`
RepeatLastN int `json:"repeat_last_n"`
RepeatPenalty float64 `json:"repeat_penalty"`
Seed uint32 `json:"seed"`
Stop []string `json:"stop"`
Stream bool `json:"stream"`
Temp float64 `json:"temp"`
TfsZ float64 `json:"tfs_z"`
TopK int `json:"top_k"`
TopP float64 `json:"top_p"`
TypicalP float64 `json:"typical_p"`
}
type Timings struct {
PredictedN int `json:"predicted_n"`
PredictedMS float64 `json:"predicted_ms"`
PromptN int `json:"prompt_n"`
PromptMS float64 `json:"prompt_ms"`
}
type Prediction struct {
type prediction struct {
Content string `json:"content"`
Model string `json:"model"`
Prompt string `json:"prompt"`
Stop bool `json:"stop"`
Timings `json:"timings"`
Timings struct {
PredictedN int `json:"predicted_n"`
PredictedMS float64 `json:"predicted_ms"`
PromptN int `json:"prompt_n"`
PromptMS float64 `json:"prompt_ms"`
}
}
type PredictRequest struct {
Prompt string `json:"prompt"`
Stream bool `json:"stream"`
NPredict int `json:"n_predict"`
NKeep int `json:"n_keep"`
Temperature float32 `json:"temperature"`
TopK int `json:"top_k"`
TopP float32 `json:"top_p"`
TfsZ float32 `json:"tfs_z"`
TypicalP float32 `json:"typical_p"`
RepeatLastN int `json:"repeat_last_n"`
RepeatPenalty float32 `json:"repeat_penalty"`
PresencePenalty float32 `json:"presence_penalty"`
FrequencyPenalty float32 `json:"frequency_penalty"`
Mirostat int `json:"mirostat"`
MirostatTau float32 `json:"mirostat_tau"`
MirostatEta float32 `json:"mirostat_eta"`
PenalizeNl bool `json:"penalize_nl"`
Seed int `json:"seed"`
Stop []string `json:"stop,omitempty"`
}
const maxBufferSize = 512 * format.KiloByte
const maxBufferSize = 512 * 1000 // 512KB
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, fn func(api.GenerateResponse)) error {
func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string, format string, fn func(api.GenerateResponse)) error {
prevConvo, err := llm.Decode(ctx, prevContext)
if err != nil {
return err
}
// Remove leading spaces from prevConvo if present
prevConvo = strings.TrimPrefix(prevConvo, " ")
var nextContext strings.Builder
nextContext.WriteString(prevConvo)
nextContext.WriteString(prompt)
request := map[string]any{
"prompt": nextContext.String(),
"stream": true,
"n_predict": llm.NumPredict,
"n_keep": llm.NumKeep,
"main_gpu": llm.MainGPU,
"temperature": llm.Temperature,
"top_k": llm.TopK,
"top_p": llm.TopP,
"tfs_z": llm.TFSZ,
"typical_p": llm.TypicalP,
"repeat_last_n": llm.RepeatLastN,
"repeat_penalty": llm.RepeatPenalty,
"presence_penalty": llm.PresencePenalty,
"frequency_penalty": llm.FrequencyPenalty,
"mirostat": llm.Mirostat,
"mirostat_tau": llm.MirostatTau,
"mirostat_eta": llm.MirostatEta,
"penalize_nl": llm.PenalizeNewline,
"seed": llm.Seed,
"stop": llm.Stop,
}
if format == "json" {
request["grammar"] = jsonGrammar
}
// Handling JSON marshaling with special characters unescaped.
buffer := &bytes.Buffer{}
enc := json.NewEncoder(buffer)
enc.SetEscapeHTML(false)
if err := enc.Encode(request); err != nil {
return fmt.Errorf("failed to marshal data: %v", err)
}
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
predReq := PredictRequest{
Prompt: nextContext.String(),
Stream: true,
NPredict: llm.NumPredict,
NKeep: llm.NumKeep,
Temperature: llm.Temperature,
TopK: llm.TopK,
TopP: llm.TopP,
TfsZ: llm.TFSZ,
TypicalP: llm.TypicalP,
RepeatLastN: llm.RepeatLastN,
RepeatPenalty: llm.RepeatPenalty,
PresencePenalty: llm.PresencePenalty,
FrequencyPenalty: llm.FrequencyPenalty,
Mirostat: llm.Mirostat,
MirostatTau: llm.MirostatTau,
MirostatEta: llm.MirostatEta,
PenalizeNl: llm.PenalizeNewline,
Seed: llm.Seed,
Stop: llm.Stop,
}
data, err := json.Marshal(predReq)
if err != nil {
return fmt.Errorf("error marshaling data: %v", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, buffer)
if err != nil {
return fmt.Errorf("error creating POST request: %v", err)
}
@@ -563,16 +612,14 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
// This handles the request cancellation
return ctx.Err()
default:
line := scanner.Text()
if line == "" {
line := scanner.Bytes()
if len(line) == 0 {
continue
}
// Read data from the server-side event stream
if strings.HasPrefix(line, "data: ") {
evt := line[6:]
var p Prediction
if err := json.Unmarshal([]byte(evt), &p); err != nil {
if evt, ok := bytes.CutPrefix(line, []byte("data: ")); ok {
var p prediction
if err := json.Unmarshal(evt, &p); err != nil {
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
}
@@ -590,10 +637,10 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
fn(api.GenerateResponse{
Done: true,
Context: embd,
PromptEvalCount: p.PromptN,
PromptEvalDuration: parseDurationMs(p.PromptMS),
EvalCount: p.PredictedN,
EvalDuration: parseDurationMs(p.PredictedMS),
PromptEvalCount: p.Timings.PromptN,
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
EvalCount: p.Timings.PredictedN,
EvalDuration: parseDurationMs(p.Timings.PredictedMS),
})
return nil
@@ -603,6 +650,14 @@ func (llm *llama) Predict(ctx context.Context, prevContext []int, prompt string,
}
if err := scanner.Err(); err != nil {
if strings.Contains(err.Error(), "unexpected EOF") {
// this means the llama runner subprocess crashed
llm.Close()
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
return fmt.Errorf("llama runner exited: %v", llm.StatusWriter.LastErrMsg)
}
return fmt.Errorf("llama runner exited, you may not have enough available memory to run this model")
}
return fmt.Errorf("error reading llm response: %v", err)
}
@@ -699,9 +754,6 @@ func (llm *llama) Decode(ctx context.Context, tokens []int) (string, error) {
return "", fmt.Errorf("unmarshal encode response: %w", err)
}
// decoded content contains a leading whitespace
decoded.Content, _ = strings.CutPrefix(decoded.Content, "")
return decoded.Content, nil
}

View File

@@ -10,10 +10,11 @@ import (
"github.com/pbnjay/memory"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
)
type LLM interface {
Predict(context.Context, []int, string, func(api.GenerateResponse)) error
Predict(context.Context, []int, string, string, func(api.GenerateResponse)) error
Embedding(context.Context, string) ([]float64, error)
Encode(context.Context, string) ([]int, error)
Decode(context.Context, []int) (string, error)
@@ -40,60 +41,47 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
if runtime.GOOS == "darwin" {
switch ggml.FileType() {
case "Q8_0":
case "F32", "Q5_0", "Q5_1", "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// GGML Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
case "F32", "Q5_0", "Q5_1":
if opts.NumGPU != 0 {
// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
}
}
totalResidentMemory := memory.TotalMemory()
switch ggml.ModelType() {
case "3B", "7B":
if ggml.FileType() == "F16" && totalResidentMemory < 16*1000*1000 {
return nil, fmt.Errorf("F16 model requires at least 16 GB of memory")
} else if totalResidentMemory < 8*1000*1000 {
return nil, fmt.Errorf("model requires at least 8 GB of memory")
var requiredMemory int64
var f16Multiplier int64 = 2
switch ggml.ModelType() {
case "3B", "7B":
requiredMemory = 8 * format.GigaByte
case "13B":
requiredMemory = 16 * format.GigaByte
case "30B", "34B", "40B":
requiredMemory = 32 * format.GigaByte
case "65B", "70B":
requiredMemory = 64 * format.GigaByte
case "180B":
requiredMemory = 128 * format.GigaByte
f16Multiplier = 4
}
case "13B":
if ggml.FileType() == "F16" && totalResidentMemory < 32*1000*1000 {
return nil, fmt.Errorf("F16 model requires at least 32 GB of memory")
} else if totalResidentMemory < 16*1000*1000 {
return nil, fmt.Errorf("model requires at least 16 GB of memory")
}
case "30B", "34B", "40B":
if ggml.FileType() == "F16" && totalResidentMemory < 64*1000*1000 {
return nil, fmt.Errorf("F16 model requires at least 64 GB of memory")
} else if totalResidentMemory < 32*1000*1000 {
return nil, fmt.Errorf("model requires at least 32 GB of memory")
}
case "65B", "70B":
if ggml.FileType() == "F16" && totalResidentMemory < 128*1000*1000 {
return nil, fmt.Errorf("F16 model requires at least 128 GB of memory")
} else if totalResidentMemory < 64*1000*1000 {
return nil, fmt.Errorf("model requires at least 64 GB of memory")
}
case "180B":
if ggml.FileType() == "F16" && totalResidentMemory < 512*1000*1000 {
return nil, fmt.Errorf("F16 model requires at least 512GB of memory")
} else if totalResidentMemory < 128*1000*1000 {
return nil, fmt.Errorf("model requires at least 128GB of memory")
systemMemory := int64(memory.TotalMemory())
if ggml.FileType() == "F16" && requiredMemory*f16Multiplier > systemMemory {
return nil, fmt.Errorf("F16 model requires at least %s of total memory", format.HumanBytes(requiredMemory))
} else if requiredMemory > systemMemory {
return nil, fmt.Errorf("model requires at least %s of total memory", format.HumanBytes(requiredMemory))
}
}
switch ggml.Name() {
case "gguf":
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
// TODO: gguf will load these options automatically from the model binary
opts.NumGQA = 0
opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
case "ggml", "ggmf", "ggjt", "ggla":
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)

View File

@@ -40,7 +40,7 @@ func Parse(reader io.Reader) ([]Command, error) {
command.Args = string(fields[1])
// copy command for validation
modelCommand = command
case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT", "EMBED", "ADAPTER":
case "LICENSE", "TEMPLATE", "SYSTEM", "PROMPT", "ADAPTER":
command.Name = string(bytes.ToLower(fields[0]))
command.Args = string(fields[1])
case "PARAMETER":
@@ -51,6 +51,8 @@ func Parse(reader io.Reader) ([]Command, error) {
command.Name = string(fields[0])
command.Args = string(fields[1])
case "EMBED":
return nil, fmt.Errorf("deprecated command: EMBED is no longer supported, use the /embed API endpoint instead")
default:
if !bytes.HasPrefix(fields[0], []byte("#")) {
// log a warning for unknown commands

215
progress/bar.go Normal file
View File

@@ -0,0 +1,215 @@
package progress
import (
"fmt"
"os"
"strings"
"time"
"github.com/jmorganca/ollama/format"
"golang.org/x/term"
)
type Bar struct {
message string
messageWidth int
maxValue int64
initialValue int64
currentValue int64
started time.Time
stopped time.Time
maxBuckets int
buckets []bucket
}
type bucket struct {
updated time.Time
value int64
}
func NewBar(message string, maxValue, initialValue int64) *Bar {
b := Bar{
message: message,
messageWidth: -1,
maxValue: maxValue,
initialValue: initialValue,
currentValue: initialValue,
started: time.Now(),
maxBuckets: 10,
}
if initialValue >= maxValue {
b.stopped = time.Now()
}
return &b
}
// formatDuration limits the rendering of a time.Duration to 2 units
func formatDuration(d time.Duration) string {
switch {
case d >= 100*time.Hour:
return "99h+"
case d >= time.Hour:
return fmt.Sprintf("%dh%dm", int(d.Hours()), int(d.Minutes())%60)
default:
return d.Round(time.Second).String()
}
}
func (b *Bar) String() string {
termWidth, _, err := term.GetSize(int(os.Stderr.Fd()))
if err != nil {
termWidth = 80
}
var pre strings.Builder
if len(b.message) > 0 {
message := strings.TrimSpace(b.message)
if b.messageWidth > 0 && len(message) > b.messageWidth {
message = message[:b.messageWidth]
}
fmt.Fprintf(&pre, "%s", message)
if padding := b.messageWidth - pre.Len(); padding > 0 {
pre.WriteString(repeat(" ", padding))
}
pre.WriteString(" ")
}
fmt.Fprintf(&pre, "%3.0f%%", b.percent())
var suf strings.Builder
// max 13 characters: "999 MB/999 MB"
if b.stopped.IsZero() {
curValue := format.HumanBytes(b.currentValue)
suf.WriteString(repeat(" ", 6-len(curValue)))
suf.WriteString(curValue)
suf.WriteString("/")
maxValue := format.HumanBytes(b.maxValue)
suf.WriteString(repeat(" ", 6-len(maxValue)))
suf.WriteString(maxValue)
} else {
maxValue := format.HumanBytes(b.maxValue)
suf.WriteString(repeat(" ", 6-len(maxValue)))
suf.WriteString(maxValue)
suf.WriteString(repeat(" ", 7))
}
rate := b.rate()
// max 10 characters: " 999 MB/s"
if b.stopped.IsZero() && rate > 0 {
suf.WriteString(" ")
humanRate := format.HumanBytes(int64(rate))
suf.WriteString(repeat(" ", 6-len(humanRate)))
suf.WriteString(humanRate)
suf.WriteString("/s")
} else {
suf.WriteString(repeat(" ", 10))
}
// max 8 characters: " 59m59s"
if b.stopped.IsZero() && rate > 0 {
suf.WriteString(" ")
var remaining time.Duration
if rate > 0 {
remaining = time.Duration(int64(float64(b.maxValue-b.currentValue)/rate)) * time.Second
}
humanRemaining := formatDuration(remaining)
suf.WriteString(repeat(" ", 6-len(humanRemaining)))
suf.WriteString(humanRemaining)
} else {
suf.WriteString(repeat(" ", 8))
}
var mid strings.Builder
// add 5 extra spaces: 2 boundary characters and 1 space at each end
f := termWidth - pre.Len() - suf.Len() - 5
n := int(float64(f) * b.percent() / 100)
mid.WriteString(" ▕")
if n > 0 {
mid.WriteString(repeat("█", n))
}
if f-n > 0 {
mid.WriteString(repeat(" ", f-n))
}
mid.WriteString("▏ ")
return pre.String() + mid.String() + suf.String()
}
func (b *Bar) Set(value int64) {
if value >= b.maxValue {
value = b.maxValue
}
b.currentValue = value
if b.currentValue >= b.maxValue {
b.stopped = time.Now()
}
// throttle bucket updates to 1 per second
if len(b.buckets) == 0 || time.Since(b.buckets[len(b.buckets)-1].updated) > time.Second {
b.buckets = append(b.buckets, bucket{
updated: time.Now(),
value: value,
})
if len(b.buckets) > b.maxBuckets {
b.buckets = b.buckets[1:]
}
}
}
func (b *Bar) percent() float64 {
if b.maxValue > 0 {
return float64(b.currentValue) / float64(b.maxValue) * 100
}
return 0
}
func (b *Bar) rate() float64 {
var numerator, denominator float64
if !b.stopped.IsZero() {
numerator = float64(b.currentValue - b.initialValue)
denominator = b.stopped.Sub(b.started).Round(time.Second).Seconds()
} else {
switch len(b.buckets) {
case 0:
// noop
case 1:
numerator = float64(b.buckets[0].value - b.initialValue)
denominator = b.buckets[0].updated.Sub(b.started).Round(time.Second).Seconds()
default:
first, last := b.buckets[0], b.buckets[len(b.buckets)-1]
numerator = float64(last.value - first.value)
denominator = last.updated.Sub(first.updated).Round(time.Second).Seconds()
}
}
if denominator != 0 {
return numerator / denominator
}
return 0
}
func repeat(s string, n int) string {
if n > 0 {
return strings.Repeat(s, n)
}
return ""
}

113
progress/progress.go Normal file
View File

@@ -0,0 +1,113 @@
package progress
import (
"fmt"
"io"
"sync"
"time"
)
type State interface {
String() string
}
type Progress struct {
mu sync.Mutex
w io.Writer
pos int
ticker *time.Ticker
states []State
}
func NewProgress(w io.Writer) *Progress {
p := &Progress{w: w}
go p.start()
return p
}
func (p *Progress) stop() bool {
for _, state := range p.states {
if spinner, ok := state.(*Spinner); ok {
spinner.Stop()
}
}
if p.ticker != nil {
p.ticker.Stop()
p.ticker = nil
p.render()
return true
}
return false
}
func (p *Progress) Stop() bool {
stopped := p.stop()
if stopped {
fmt.Fprint(p.w, "\n")
}
return stopped
}
func (p *Progress) StopAndClear() bool {
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
stopped := p.stop()
if stopped {
// clear all progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
}
return stopped
}
func (p *Progress) Add(key string, state State) {
p.mu.Lock()
defer p.mu.Unlock()
p.states = append(p.states, state)
}
func (p *Progress) render() error {
p.mu.Lock()
defer p.mu.Unlock()
fmt.Fprint(p.w, "\033[?25l")
defer fmt.Fprint(p.w, "\033[?25h")
// clear already rendered progress lines
for i := 0; i < p.pos; i++ {
if i > 0 {
fmt.Fprint(p.w, "\033[A")
}
fmt.Fprint(p.w, "\033[2K\033[1G")
}
// render progress lines
for i, state := range p.states {
fmt.Fprint(p.w, state.String())
if i < len(p.states)-1 {
fmt.Fprint(p.w, "\n")
}
}
p.pos = len(p.states)
return nil
}
func (p *Progress) start() {
p.ticker = time.NewTicker(100 * time.Millisecond)
for range p.ticker.C {
p.render()
}
}

73
progress/spinner.go Normal file
View File

@@ -0,0 +1,73 @@
package progress
import (
"fmt"
"strings"
"time"
)
type Spinner struct {
message string
messageWidth int
parts []string
value int
ticker *time.Ticker
started time.Time
stopped time.Time
}
func NewSpinner(message string) *Spinner {
s := &Spinner{
message: message,
parts: []string{
"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏",
},
started: time.Now(),
}
go s.start()
return s
}
func (s *Spinner) String() string {
var sb strings.Builder
if len(s.message) > 0 {
message := strings.TrimSpace(s.message)
if s.messageWidth > 0 && len(message) > s.messageWidth {
message = message[:s.messageWidth]
}
fmt.Fprintf(&sb, "%s", message)
if padding := s.messageWidth - sb.Len(); padding > 0 {
sb.WriteString(strings.Repeat(" ", padding))
}
sb.WriteString(" ")
}
if s.stopped.IsZero() {
spinner := s.parts[s.value]
sb.WriteString(spinner)
sb.WriteString(" ")
}
return sb.String()
}
func (s *Spinner) start() {
s.ticker = time.NewTicker(100 * time.Millisecond)
for range s.ticker.C {
s.value = (s.value + 1) % len(s.parts)
if !s.stopped.IsZero() {
return
}
}
}
func (s *Spinner) Stop() {
if s.stopped.IsZero() {
s.stopped = time.Now()
}
}

View File

@@ -1,21 +0,0 @@
MIT License
Copyright (c) 2017 Zack
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,121 +0,0 @@
# progressbar
[![CI](https://github.com/schollz/progressbar/actions/workflows/ci.yml/badge.svg?branch=main&event=push)](https://github.com/schollz/progressbar/actions/workflows/ci.yml)
[![go report card](https://goreportcard.com/badge/github.com/schollz/progressbar)](https://goreportcard.com/report/github.com/schollz/progressbar)
[![coverage](https://img.shields.io/badge/coverage-84%25-brightgreen.svg)](https://gocover.io/github.com/schollz/progressbar)
[![godocs](https://godoc.org/github.com/schollz/progressbar?status.svg)](https://godoc.org/github.com/schollz/progressbar/v3)
A very simple thread-safe progress bar which should work on every OS without problems. I needed a progressbar for [croc](https://github.com/schollz/croc) and everything I tried had problems, so I made another one. In order to be OS agnostic I do not plan to support [multi-line outputs](https://github.com/schollz/progressbar/issues/6).
## Install
```
go get -u github.com/schollz/progressbar/v3
```
## Usage
### Basic usage
```golang
bar := progressbar.Default(100)
for i := 0; i < 100; i++ {
bar.Add(1)
time.Sleep(40 * time.Millisecond)
}
```
which looks like:
![Example of basic bar](examples/basic/basic.gif)
### I/O operations
The `progressbar` implements an `io.Writer` so it can automatically detect the number of bytes written to a stream, so you can use it as a progressbar for an `io.Reader`.
```golang
req, _ := http.NewRequest("GET", "https://dl.google.com/go/go1.14.2.src.tar.gz", nil)
resp, _ := http.DefaultClient.Do(req)
defer resp.Body.Close()
f, _ := os.OpenFile("go1.14.2.src.tar.gz", os.O_CREATE|os.O_WRONLY, 0644)
defer f.Close()
bar := progressbar.DefaultBytes(
resp.ContentLength,
"downloading",
)
io.Copy(io.MultiWriter(f, bar), resp.Body)
```
which looks like:
![Example of download bar](examples/download/download.gif)
### Progress bar with unknown length
A progressbar with unknown length is a spinner. Any bar with -1 length will automatically convert it to a spinner with a customizable spinner type. For example, the above code can be run and set the `resp.ContentLength` to `-1`.
which looks like:
![Example of download bar with unknown length](examples/download-unknown/download-unknown.gif)
### Customization
There is a lot of customization that you can do - change the writer, the color, the width, description, theme, etc. See [all the options](https://pkg.go.dev/github.com/schollz/progressbar/v3?tab=doc#Option).
```golang
bar := progressbar.NewOptions(1000,
progressbar.OptionSetWriter(ansi.NewAnsiStdout()),
progressbar.OptionEnableColorCodes(true),
progressbar.OptionShowBytes(true),
progressbar.OptionSetWidth(15),
progressbar.OptionSetDescription("[cyan][1/3][reset] Writing moshable file..."),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "[green]=[reset]",
SaucerHead: "[green]>[reset]",
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}))
for i := 0; i < 1000; i++ {
bar.Add(1)
time.Sleep(5 * time.Millisecond)
}
```
which looks like:
![Example of customized bar](examples/customization/customization.gif)
## Contributing
Pull requests are welcome. Feel free to...
- Revise documentation
- Add new features
- Fix bugs
- Suggest improvements
## Thanks
Thanks [@Dynom](https://github.com/dynom) for massive improvements in version 2.0!
Thanks [@CrushedPixel](https://github.com/CrushedPixel) for adding descriptions and color code support!
Thanks [@MrMe42](https://github.com/MrMe42) for adding some minor features!
Thanks [@tehstun](https://github.com/tehstun) for some great PRs!
Thanks [@Benzammour](https://github.com/Benzammour) and [@haseth](https://github.com/haseth) for helping create v3!
Thanks [@briandowns](https://github.com/briandowns) for compiling the list of spinners.
## License
MIT

File diff suppressed because it is too large Load Diff

View File

@@ -1,80 +0,0 @@
package progressbar
var spinners = map[int][]string{
0: {"←", "↖", "↑", "↗", "→", "↘", "↓", "↙"},
1: {"▁", "▃", "▄", "▅", "▆", "▇", "█", "▇", "▆", "▅", "▄", "▃", "▁"},
2: {"▖", "▘", "▝", "▗"},
3: {"┤", "┘", "┴", "└", "├", "┌", "┬", "┐"},
4: {"◢", "◣", "◤", "◥"},
5: {"◰", "◳", "◲", "◱"},
6: {"◴", "◷", "◶", "◵"},
7: {"◐", "◓", "◑", "◒"},
8: {".", "o", "O", "@", "*"},
9: {"|", "/", "-", "\\"},
10: {"◡◡", "⊙⊙", "◠◠"},
11: {"⣾", "⣽", "⣻", "⢿", "⡿", "⣟", "⣯", "⣷"},
12: {">))'>", " >))'>", " >))'>", " >))'>", " >))'>", " <'((<", " <'((<", " <'((<"},
13: {"⠁", "⠂", "⠄", "⡀", "⢀", "⠠", "⠐", "⠈"},
14: {"⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"},
15: {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"},
16: {"▉", "▊", "▋", "▌", "▍", "▎", "▏", "▎", "▍", "▌", "▋", "▊", "▉"},
17: {"■", "□", "▪", "▫"},
18: {"←", "↑", "→", "↓"},
19: {"╫", "╪"},
20: {"⇐", "⇖", "⇑", "⇗", "⇒", "⇘", "⇓", "⇙"},
21: {"⠁", "⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈", "⠈"},
22: {"⠈", "⠉", "⠋", "⠓", "⠒", "⠐", "⠐", "⠒", "⠖", "⠦", "⠤", "⠠", "⠠", "⠤", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋", "⠉", "⠈"},
23: {"⠁", "⠉", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠤", "⠄", "⠄", "⠤", "⠴", "⠲", "⠒", "⠂", "⠂", "⠒", "⠚", "⠙", "⠉", "⠁"},
24: {"⠋", "⠙", "⠚", "⠒", "⠂", "⠂", "⠒", "⠲", "⠴", "⠦", "⠖", "⠒", "⠐", "⠐", "⠒", "⠓", "⠋"},
25: {"ヲ", "ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ッ", "ア", "イ", "ウ", "エ", "オ", "カ", "キ", "ク", "ケ", "コ", "サ", "シ", "ス", "セ", "ソ", "タ", "チ", "ツ", "テ", "ト", "ナ", "ニ", "ヌ", "ネ", "ノ", "ハ", "ヒ", "フ", "ヘ", "ホ", "マ", "ミ", "ム", "メ", "モ", "ヤ", "ユ", "ヨ", "ラ", "リ", "ル", "レ", "ロ", "ワ", "ン"},
26: {".", "..", "..."},
27: {"▁", "▂", "▃", "▄", "▅", "▆", "▇", "█", "▉", "▊", "▋", "▌", "▍", "▎", "▏", "▏", "▎", "▍", "▌", "▋", "▊", "▉", "█", "▇", "▆", "▅", "▄", "▃", "▂", "▁"},
28: {".", "o", "O", "°", "O", "o", "."},
29: {"+", "x"},
30: {"v", "<", "^", ">"},
31: {">>--->", " >>--->", " >>--->", " >>--->", " >>--->", " <---<<", " <---<<", " <---<<", " <---<<", "<---<<"},
32: {"|", "||", "|||", "||||", "|||||", "|||||||", "||||||||", "|||||||", "||||||", "|||||", "||||", "|||", "||", "|"},
33: {"[ ]", "[= ]", "[== ]", "[=== ]", "[==== ]", "[===== ]", "[====== ]", "[======= ]", "[======== ]", "[========= ]", "[==========]"},
34: {"(*---------)", "(-*--------)", "(--*-------)", "(---*------)", "(----*-----)", "(-----*----)", "(------*---)", "(-------*--)", "(--------*-)", "(---------*)"},
35: {"█▒▒▒▒▒▒▒▒▒", "███▒▒▒▒▒▒▒", "█████▒▒▒▒▒", "███████▒▒▒", "██████████"},
36: {"[ ]", "[=> ]", "[===> ]", "[=====> ]", "[======> ]", "[========> ]", "[==========> ]", "[============> ]", "[==============> ]", "[================> ]", "[==================> ]", "[===================>]"},
37: {"", ""},
38: {"▌", "▀", "▐▄"},
39: {"🌍", "🌎", "🌏"},
40: {"◜", "◝", "◞", "◟"},
41: {"⬒", "⬔", "⬓", "⬕"},
42: {"⬖", "⬘", "⬗", "⬙"},
43: {"[>>> >]", "[]>>>> []", "[] >>>> []", "[] >>>> []", "[] >>>> []", "[] >>>>[]", "[>> >>]"},
44: {"♠", "♣", "♥", "♦"},
45: {"➞", "➟", "➠", "➡", "➠", "➟"},
46: {" | ", ` \ `, "_ ", ` \ `, " | ", " / ", " _", " / "},
47: {" . . . .", ". . . .", ". . . .", ". . . .", ". . . . ", ". . . . ."},
48: {" | ", " / ", " _ ", ` \ `, " | ", ` \ `, " _ ", " / "},
49: {"⎺", "⎻", "⎼", "⎽", "⎼", "⎻"},
50: {"▹▹▹▹▹", "▸▹▹▹▹", "▹▸▹▹▹", "▹▹▸▹▹", "▹▹▹▸▹", "▹▹▹▹▸"},
51: {"[ ]", "[ =]", "[ ==]", "[ ===]", "[====]", "[=== ]", "[== ]", "[= ]"},
52: {"( ● )", "( ● )", "( ● )", "( ● )", "( ●)", "( ● )", "( ● )", "( ● )", "( ● )"},
53: {"✶", "✸", "✹", "✺", "✹", "✷"},
54: {"▐|\\____________▌", "▐_|\\___________▌", "▐__|\\__________▌", "▐___|\\_________▌", "▐____|\\________▌", "▐_____|\\_______▌", "▐______|\\______▌", "▐_______|\\_____▌", "▐________|\\____▌", "▐_________|\\___▌", "▐__________|\\__▌", "▐___________|\\_▌", "▐____________|\\▌", "▐____________/|▌", "▐___________/|_▌", "▐__________/|__▌", "▐_________/|___▌", "▐________/|____▌", "▐_______/|_____▌", "▐______/|______▌", "▐_____/|_______▌", "▐____/|________▌", "▐___/|_________▌", "▐__/|__________▌", "▐_/|___________▌", "▐/|____________▌"},
55: {"▐⠂ ▌", "▐⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂▌", "▐ ⠠▌", "▐ ⡀▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐ ⠠ ▌", "▐ ⠂ ▌", "▐ ⠈ ▌", "▐ ⠂ ▌", "▐ ⠠ ▌", "▐ ⡀ ▌", "▐⠠ ▌"},
56: {"¿", "?"},
57: {"⢹", "⢺", "⢼", "⣸", "⣇", "⡧", "⡗", "⡏"},
58: {"⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"},
59: {". ", ".. ", "...", " ..", " .", " "},
60: {".", "o", "O", "°", "O", "o", "."},
61: {"▓", "▒", "░"},
62: {"▌", "▀", "▐", "▄"},
63: {"⊶", "⊷"},
64: {"▪", "▫"},
65: {"□", "■"},
66: {"▮", "▯"},
67: {"-", "=", "≡"},
68: {"d", "q", "p", "b"},
69: {"∙∙∙", "●∙∙", "∙●∙", "∙∙●", "∙∙∙"},
70: {"🌑 ", "🌒 ", "🌓 ", "🌔 ", "🌕 ", "🌖 ", "🌗 ", "🌘 "},
71: {"☗", "☖"},
72: {"⧇", "⧆"},
73: {"◉", "◎"},
74: {"㊂", "㊀", "㊁"},
75: {"⦾", "⦿"},
}

372
readline/buffer.go Normal file
View File

@@ -0,0 +1,372 @@
package readline
import (
"fmt"
"os"
"github.com/emirpasic/gods/lists/arraylist"
"golang.org/x/term"
)
type Buffer struct {
Pos int
Buf *arraylist.List
Prompt *Prompt
LineWidth int
Width int
Height int
}
func NewBuffer(prompt *Prompt) (*Buffer, error) {
fd := int(os.Stdout.Fd())
width, height, err := term.GetSize(fd)
if err != nil {
fmt.Println("Error getting size:", err)
return nil, err
}
lwidth := width - len(prompt.Prompt)
if prompt.UseAlt {
lwidth = width - len(prompt.AltPrompt)
}
b := &Buffer{
Pos: 0,
Buf: arraylist.New(),
Prompt: prompt,
Width: width,
Height: height,
LineWidth: lwidth,
}
return b, nil
}
func (b *Buffer) MoveLeft() {
if b.Pos > 0 {
if b.Pos%b.LineWidth == 0 {
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width))
} else {
fmt.Print(CursorLeft)
}
b.Pos -= 1
}
}
func (b *Buffer) MoveLeftWord() {
if b.Pos > 0 {
var foundNonspace bool
for {
v, _ := b.Buf.Get(b.Pos - 1)
if v == ' ' {
if foundNonspace {
break
}
} else {
foundNonspace = true
}
b.MoveLeft()
if b.Pos == 0 {
break
}
}
}
}
func (b *Buffer) MoveRight() {
if b.Pos < b.Size() {
b.Pos += 1
if b.Pos%b.LineWidth == 0 {
fmt.Printf(CursorDown + CursorBOL + cursorRightN(b.PromptSize()))
} else {
fmt.Print(CursorRight)
}
}
}
func (b *Buffer) MoveRightWord() {
if b.Pos < b.Size() {
for {
b.MoveRight()
v, _ := b.Buf.Get(b.Pos)
if v == ' ' {
break
}
if b.Pos == b.Size() {
break
}
}
}
}
func (b *Buffer) MoveToStart() {
if b.Pos > 0 {
currLine := b.Pos / b.LineWidth
if currLine > 0 {
for cnt := 0; cnt < currLine; cnt++ {
fmt.Print(CursorUp)
}
}
fmt.Printf(CursorBOL + cursorRightN(b.PromptSize()))
b.Pos = 0
}
}
func (b *Buffer) MoveToEnd() {
if b.Pos < b.Size() {
currLine := b.Pos / b.LineWidth
totalLines := b.Size() / b.LineWidth
if currLine < totalLines {
for cnt := 0; cnt < totalLines-currLine; cnt++ {
fmt.Print(CursorDown)
}
remainder := b.Size() % b.LineWidth
fmt.Printf(CursorBOL + cursorRightN(b.PromptSize()+remainder))
} else {
fmt.Print(cursorRightN(b.Size() - b.Pos))
}
b.Pos = b.Size()
}
}
func (b *Buffer) Size() int {
return b.Buf.Size()
}
func min(n, m int) int {
if n > m {
return m
}
return n
}
func (b *Buffer) PromptSize() int {
if b.Prompt.UseAlt {
return len(b.Prompt.AltPrompt)
}
return len(b.Prompt.Prompt)
}
func (b *Buffer) Add(r rune) {
if b.Pos == b.Buf.Size() {
fmt.Printf("%c", r)
b.Buf.Add(r)
b.Pos += 1
if b.Pos > 0 && b.Pos%b.LineWidth == 0 {
fmt.Printf("\n%s", b.Prompt.AltPrompt)
}
} else {
fmt.Printf("%c", r)
b.Buf.Insert(b.Pos, r)
b.Pos += 1
if b.Pos > 0 && b.Pos%b.LineWidth == 0 {
fmt.Printf("\n%s", b.Prompt.AltPrompt)
}
b.drawRemaining()
}
}
func (b *Buffer) drawRemaining() {
var place int
remainingText := b.StringN(b.Pos)
if b.Pos > 0 {
place = b.Pos % b.LineWidth
}
fmt.Print(CursorHide)
// render the rest of the current line
currLine := remainingText[:min(b.LineWidth-place, len(remainingText))]
if len(currLine) > 0 {
fmt.Printf(ClearToEOL + currLine)
fmt.Print(cursorLeftN(len(currLine)))
} else {
fmt.Print(ClearToEOL)
}
// render the other lines
if len(remainingText) > len(currLine) {
remaining := []rune(remainingText[len(currLine):])
var totalLines int
for i, c := range remaining {
if i%b.LineWidth == 0 {
fmt.Printf("\n%s", b.Prompt.AltPrompt)
totalLines += 1
}
fmt.Printf("%c", c)
}
fmt.Print(ClearToEOL)
fmt.Print(cursorUpN(totalLines))
fmt.Printf(CursorBOL + cursorRightN(b.Width-len(currLine)))
}
fmt.Print(CursorShow)
}
func (b *Buffer) Remove() {
if b.Buf.Size() > 0 && b.Pos > 0 {
if b.Pos%b.LineWidth == 0 {
// if the user backspaces over the word boundary, do this magic to clear the line
// and move to the end of the previous line
fmt.Printf(CursorBOL + ClearToEOL)
fmt.Printf(CursorUp + CursorBOL + cursorRightN(b.Width) + " " + CursorLeft)
} else {
fmt.Printf(CursorLeft + " " + CursorLeft)
}
var eraseExtraLine bool
if (b.Size()-1)%b.LineWidth == 0 {
eraseExtraLine = true
}
b.Pos -= 1
b.Buf.Remove(b.Pos)
if b.Pos < b.Size() {
b.drawRemaining()
// this erases a line which is left over when backspacing in the middle of a line and there
// are trailing characters which go over the line width boundary
if eraseExtraLine {
remainingLines := (b.Size() - b.Pos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines+1) + CursorBOL + ClearToEOL)
place := b.Pos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines+1) + cursorRightN(place+len(b.Prompt.Prompt)))
}
}
}
}
func (b *Buffer) Delete() {
if b.Size() > 0 && b.Pos < b.Size() {
b.Buf.Remove(b.Pos)
b.drawRemaining()
if b.Size()%b.LineWidth == 0 {
if b.Pos != b.Size() {
remainingLines := (b.Size() - b.Pos) / b.LineWidth
fmt.Printf(cursorDownN(remainingLines) + CursorBOL + ClearToEOL)
place := b.Pos % b.LineWidth
fmt.Printf(cursorUpN(remainingLines) + cursorRightN(place+len(b.Prompt.Prompt)))
}
}
}
}
func (b *Buffer) DeleteBefore() {
if b.Pos > 0 {
for cnt := b.Pos - 1; cnt >= 0; cnt-- {
b.Remove()
}
}
}
func (b *Buffer) DeleteRemaining() {
if b.Size() > 0 && b.Pos < b.Size() {
charsToDel := b.Size() - b.Pos
for cnt := 0; cnt < charsToDel; cnt++ {
b.Delete()
}
}
}
func (b *Buffer) DeleteWord() {
if b.Buf.Size() > 0 && b.Pos > 0 {
var foundNonspace bool
for {
v, _ := b.Buf.Get(b.Pos - 1)
if v == ' ' {
if !foundNonspace {
b.Remove()
} else {
break
}
} else {
foundNonspace = true
b.Remove()
}
if b.Pos == 0 {
break
}
}
}
}
func (b *Buffer) ClearScreen() {
fmt.Printf(ClearScreen + CursorReset + b.Prompt.Prompt)
if b.IsEmpty() {
ph := b.Prompt.Placeholder
fmt.Printf(ColorGrey + ph + cursorLeftN(len(ph)) + ColorDefault)
} else {
currPos := b.Pos
b.Pos = 0
b.drawRemaining()
fmt.Printf(CursorReset + cursorRightN(len(b.Prompt.Prompt)))
if currPos > 0 {
targetLine := currPos / b.LineWidth
if targetLine > 0 {
for cnt := 0; cnt < targetLine; cnt++ {
fmt.Print(CursorDown)
}
}
remainder := currPos % b.LineWidth
if remainder > 0 {
fmt.Print(cursorRightN(remainder))
}
if currPos%b.LineWidth == 0 {
fmt.Printf(CursorBOL + b.Prompt.AltPrompt)
}
}
b.Pos = currPos
}
}
func (b *Buffer) IsEmpty() bool {
return b.Buf.Empty()
}
func (b *Buffer) Replace(r []rune) {
b.Pos = 0
b.Buf.Clear()
fmt.Printf(ClearLine + CursorBOL + b.Prompt.Prompt)
for _, c := range r {
b.Add(c)
}
}
func (b *Buffer) String() string {
return b.StringN(0)
}
func (b *Buffer) StringN(n int) string {
return b.StringNM(n, 0)
}
func (b *Buffer) StringNM(n, m int) string {
var s string
if m == 0 {
m = b.Size()
}
for cnt := n; cnt < m; cnt++ {
c, _ := b.Buf.Get(cnt)
s += string(c.(rune))
}
return s
}
func cursorLeftN(n int) string {
return fmt.Sprintf(CursorLeftN, n)
}
func cursorRightN(n int) string {
return fmt.Sprintf(CursorRightN, n)
}
func cursorUpN(n int) string {
return fmt.Sprintf(CursorUpN, n)
}
func cursorDownN(n int) string {
return fmt.Sprintf(CursorDownN, n)
}

17
readline/errors.go Normal file
View File

@@ -0,0 +1,17 @@
package readline
import (
"errors"
)
var (
ErrInterrupt = errors.New("Interrupt")
)
type InterruptError struct {
Line []rune
}
func (*InterruptError) Error() string {
return "Interrupted"
}

155
readline/history.go Normal file
View File

@@ -0,0 +1,155 @@
package readline
import (
"bufio"
"errors"
"io"
"os"
"path/filepath"
"strings"
"github.com/emirpasic/gods/lists/arraylist"
)
type History struct {
Buf *arraylist.List
Autosave bool
Pos int
Limit int
Filename string
Enabled bool
}
func NewHistory() (*History, error) {
h := &History{
Buf: arraylist.New(),
Limit: 100, //resizeme
Autosave: true,
Enabled: true,
}
err := h.Init()
if err != nil {
return nil, err
}
return h, nil
}
func (h *History) Init() error {
home, err := os.UserHomeDir()
if err != nil {
return err
}
path := filepath.Join(home, ".ollama", "history")
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
h.Filename = path
f, err := os.OpenFile(path, os.O_CREATE|os.O_RDONLY, 0600)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return nil
}
return err
}
defer f.Close()
r := bufio.NewReader(f)
for {
line, err := r.ReadString('\n')
if err != nil {
if err == io.EOF {
break
}
return err
}
line = strings.TrimSpace(line)
if len(line) == 0 {
continue
}
h.Add([]rune(line))
}
return nil
}
func (h *History) Add(l []rune) {
h.Buf.Add(l)
h.Compact()
h.Pos = h.Size()
if h.Autosave {
h.Save()
}
}
func (h *History) Compact() {
s := h.Buf.Size()
if s > h.Limit {
for cnt := 0; cnt < s-h.Limit; cnt++ {
h.Buf.Remove(0)
}
}
}
func (h *History) Clear() {
h.Buf.Clear()
}
func (h *History) Prev() []rune {
var line []rune
if h.Pos > 0 {
h.Pos -= 1
}
v, _ := h.Buf.Get(h.Pos)
line, _ = v.([]rune)
return line
}
func (h *History) Next() []rune {
var line []rune
if h.Pos < h.Buf.Size() {
h.Pos += 1
v, _ := h.Buf.Get(h.Pos)
line, _ = v.([]rune)
}
return line
}
func (h *History) Size() int {
return h.Buf.Size()
}
func (h *History) Save() error {
if !h.Enabled {
return nil
}
tmpFile := h.Filename + ".tmp"
f, err := os.OpenFile(tmpFile, os.O_CREATE|os.O_WRONLY|os.O_TRUNC|os.O_APPEND, 0666)
if err != nil {
return err
}
defer f.Close()
buf := bufio.NewWriter(f)
for cnt := 0; cnt < h.Size(); cnt++ {
v, _ := h.Buf.Get(cnt)
line, _ := v.([]rune)
buf.WriteString(string(line) + "\n")
}
buf.Flush()
f.Close()
if err = os.Rename(tmpFile, h.Filename); err != nil {
return err
}
return nil
}

253
readline/readline.go Normal file
View File

@@ -0,0 +1,253 @@
package readline
import (
"bufio"
"fmt"
"io"
"os"
"syscall"
)
type Prompt struct {
Prompt string
AltPrompt string
Placeholder string
AltPlaceholder string
UseAlt bool
}
type Terminal struct {
outchan chan rune
}
type Instance struct {
Prompt *Prompt
Terminal *Terminal
History *History
Pasting bool
}
func New(prompt Prompt) (*Instance, error) {
term, err := NewTerminal()
if err != nil {
return nil, err
}
history, err := NewHistory()
if err != nil {
return nil, err
}
return &Instance{
Prompt: &prompt,
Terminal: term,
History: history,
}, nil
}
func (i *Instance) Readline() (string, error) {
prompt := i.Prompt.Prompt
if i.Prompt.UseAlt || i.Pasting {
prompt = i.Prompt.AltPrompt
}
fmt.Print(prompt)
fd := int(syscall.Stdin)
termios, err := SetRawMode(fd)
if err != nil {
return "", err
}
defer UnsetRawMode(fd, termios)
buf, _ := NewBuffer(i.Prompt)
var esc bool
var escex bool
var metaDel bool
var currentLineBuf []rune
for {
// don't show placeholder when pasting unless we're in multiline mode
showPlaceholder := !i.Pasting || i.Prompt.UseAlt
if buf.IsEmpty() && showPlaceholder {
ph := i.Prompt.Placeholder
if i.Prompt.UseAlt {
ph = i.Prompt.AltPlaceholder
}
fmt.Printf(ColorGrey + ph + fmt.Sprintf(CursorLeftN, len(ph)) + ColorDefault)
}
r, err := i.Terminal.Read()
if buf.IsEmpty() {
fmt.Print(ClearToEOL)
}
if err != nil {
return "", io.EOF
}
if escex {
escex = false
switch r {
case KeyUp:
if i.History.Pos > 0 {
if i.History.Pos == i.History.Size() {
currentLineBuf = []rune(buf.String())
}
buf.Replace(i.History.Prev())
}
case KeyDown:
if i.History.Pos < i.History.Size() {
buf.Replace(i.History.Next())
if i.History.Pos == i.History.Size() {
buf.Replace(currentLineBuf)
}
}
case KeyLeft:
buf.MoveLeft()
case KeyRight:
buf.MoveRight()
case CharBracketedPaste:
var code string
for cnt := 0; cnt < 3; cnt++ {
r, err = i.Terminal.Read()
if err != nil {
return "", io.EOF
}
code += string(r)
}
if code == CharBracketedPasteStart {
i.Pasting = true
} else if code == CharBracketedPasteEnd {
i.Pasting = false
}
case KeyDel:
if buf.Size() > 0 {
buf.Delete()
}
metaDel = true
case MetaStart:
buf.MoveToStart()
case MetaEnd:
buf.MoveToEnd()
default:
// skip any keys we don't know about
continue
}
continue
} else if esc {
esc = false
switch r {
case 'b':
buf.MoveLeftWord()
case 'f':
buf.MoveRightWord()
case CharBackspace:
buf.DeleteWord()
case CharEscapeEx:
escex = true
}
continue
}
switch r {
case CharNull:
continue
case CharEsc:
esc = true
case CharInterrupt:
return "", ErrInterrupt
case CharLineStart:
buf.MoveToStart()
case CharLineEnd:
buf.MoveToEnd()
case CharBackward:
buf.MoveLeft()
case CharForward:
buf.MoveRight()
case CharBackspace, CharCtrlH:
buf.Remove()
case CharTab:
// todo: convert back to real tabs
for cnt := 0; cnt < 8; cnt++ {
buf.Add(' ')
}
case CharDelete:
if buf.Size() > 0 {
buf.Delete()
} else {
return "", io.EOF
}
case CharKill:
buf.DeleteRemaining()
case CharCtrlU:
buf.DeleteBefore()
case CharCtrlL:
buf.ClearScreen()
case CharCtrlW:
buf.DeleteWord()
case CharEnter:
output := buf.String()
if output != "" {
i.History.Add([]rune(output))
}
buf.MoveToEnd()
fmt.Println()
return output, nil
default:
if metaDel {
metaDel = false
continue
}
if r >= CharSpace || r == CharEnter {
buf.Add(r)
}
}
}
}
func (i *Instance) HistoryEnable() {
i.History.Enabled = true
}
func (i *Instance) HistoryDisable() {
i.History.Enabled = false
}
func NewTerminal() (*Terminal, error) {
t := &Terminal{
outchan: make(chan rune),
}
go t.ioloop()
return t, nil
}
func (t *Terminal) ioloop() {
buf := bufio.NewReader(os.Stdin)
for {
r, _, err := buf.ReadRune()
if err != nil {
close(t.outchan)
break
}
t.outchan <- r
}
}
func (t *Terminal) Read() (rune, error) {
r, ok := <-t.outchan
if !ok {
return 0, io.EOF
}
return r, nil
}

36
readline/term.go Normal file
View File

@@ -0,0 +1,36 @@
//go:build aix || darwin || dragonfly || freebsd || (linux && !appengine) || netbsd || openbsd || os400 || solaris
package readline
import (
"syscall"
)
type Termios syscall.Termios
func SetRawMode(fd int) (*Termios, error) {
termios, err := getTermios(fd)
if err != nil {
return nil, err
}
newTermios := *termios
newTermios.Iflag &^= syscall.IGNBRK | syscall.BRKINT | syscall.PARMRK | syscall.ISTRIP | syscall.INLCR | syscall.IGNCR | syscall.ICRNL | syscall.IXON
newTermios.Lflag &^= syscall.ECHO | syscall.ECHONL | syscall.ICANON | syscall.ISIG | syscall.IEXTEN
newTermios.Cflag &^= syscall.CSIZE | syscall.PARENB
newTermios.Cflag |= syscall.CS8
newTermios.Cc[syscall.VMIN] = 1
newTermios.Cc[syscall.VTIME] = 0
return termios, setTermios(fd, &newTermios)
}
func UnsetRawMode(fd int, termios *Termios) error {
return setTermios(fd, termios)
}
// IsTerminal returns true if the given file descriptor is a terminal.
func IsTerminal(fd int) bool {
_, err := getTermios(fd)
return err == nil
}

25
readline/term_bsd.go Normal file
View File

@@ -0,0 +1,25 @@
//go:build darwin || freebsd || netbsd || openbsd
package readline
import (
"syscall"
"unsafe"
)
func getTermios(fd int) (*Termios, error) {
termios := new(Termios)
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCGETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return nil, err
}
return termios, nil
}
func setTermios(fd int, termios *Termios) error {
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), syscall.TIOCSETA, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return err
}
return nil
}

28
readline/term_linux.go Normal file
View File

@@ -0,0 +1,28 @@
//go:build linux || solaris
package readline
import (
"syscall"
"unsafe"
)
const tcgets = 0x5401
const tcsets = 0x5402
func getTermios(fd int) (*Termios, error) {
termios := new(Termios)
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcgets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return nil, err
}
return termios, nil
}
func setTermios(fd int, termios *Termios) error {
_, _, err := syscall.Syscall6(syscall.SYS_IOCTL, uintptr(fd), tcsets, uintptr(unsafe.Pointer(termios)), 0, 0, 0)
if err != 0 {
return err
}
return nil
}

62
readline/term_windows.go Normal file
View File

@@ -0,0 +1,62 @@
package readline
import (
"syscall"
"unsafe"
)
const (
enableLineInput = 2
enableWindowInput = 8
enableMouseInput = 16
enableInsertMode = 32
enableQuickEditMode = 64
enableExtendedFlags = 128
enableProcessedOutput = 1
enableWrapAtEolOutput = 2
enableAutoPosition = 256 // Cursor position is not affected by writing data to the console.
enableEchoInput = 4 // Characters are written to the console as they're read.
enableProcessedInput = 1 // Enables input processing (like recognizing Ctrl+C).
)
var kernel32 = syscall.NewLazyDLL("kernel32.dll")
var (
procGetConsoleMode = kernel32.NewProc("GetConsoleMode")
procSetConsoleMode = kernel32.NewProc("SetConsoleMode")
)
type State struct {
mode uint32
}
// IsTerminal checks if the given file descriptor is associated with a terminal
func IsTerminal(fd int) bool {
var st uint32
r, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
// if the call succeeds and doesn't produce an error, it's a terminal
return r != 0 && e == 0
}
func SetRawMode(fd int) (*State, error) {
var st uint32
// retrieve the current mode of the terminal
_, _, e := syscall.SyscallN(procGetConsoleMode.Addr(), uintptr(fd), uintptr(unsafe.Pointer(&st)), 0)
if e != 0 {
return nil, error(e)
}
// modify the mode to set it to raw
raw := st &^ (enableEchoInput | enableProcessedInput | enableLineInput | enableProcessedOutput)
// apply the new mode to the terminal
_, _, e = syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(raw), 0)
if e != 0 {
return nil, error(e)
}
// return the original state so that it can be restored later
return &State{st}, nil
}
func UnsetRawMode(fd int, state *State) error {
_, _, err := syscall.SyscallN(procSetConsoleMode.Addr(), uintptr(fd), uintptr(state.mode), 0)
return err
}

78
readline/types.go Normal file
View File

@@ -0,0 +1,78 @@
package readline
const (
CharNull = 0
CharLineStart = 1
CharBackward = 2
CharInterrupt = 3
CharDelete = 4
CharLineEnd = 5
CharForward = 6
CharBell = 7
CharCtrlH = 8
CharTab = 9
CharCtrlJ = 10
CharKill = 11
CharCtrlL = 12
CharEnter = 13
CharNext = 14
CharPrev = 16
CharBckSearch = 18
CharFwdSearch = 19
CharTranspose = 20
CharCtrlU = 21
CharCtrlW = 23
CharCtrlY = 25
CharCtrlZ = 26
CharEsc = 27
CharSpace = 32
CharEscapeEx = 91
CharBackspace = 127
)
const (
KeyDel = 51
KeyUp = 65
KeyDown = 66
KeyRight = 67
KeyLeft = 68
MetaEnd = 70
MetaStart = 72
)
const (
CursorUp = "\033[1A"
CursorDown = "\033[1B"
CursorRight = "\033[1C"
CursorLeft = "\033[1D"
CursorSave = "\033[s"
CursorRestore = "\033[u"
CursorUpN = "\033[%dA"
CursorDownN = "\033[%dB"
CursorRightN = "\033[%dC"
CursorLeftN = "\033[%dD"
CursorEOL = "\033[E"
CursorBOL = "\033[1G"
CursorHide = "\033[?25l"
CursorShow = "\033[?25h"
ClearToEOL = "\033[K"
ClearLine = "\033[2K"
ClearScreen = "\033[2J"
CursorReset = "\033[0;0f"
ColorGrey = "\033[38;5;245m"
ColorDefault = "\033[0m"
StartBracketedPaste = "\033[?2004h"
EndBracketedPaste = "\033[?2004l"
)
const (
CharBracketedPaste = 50
CharBracketedPasteStart = "00~"
CharBracketedPasteEnd = "01~"
)

View File

@@ -10,6 +10,7 @@ mkdir -p dist
for TARGETARCH in arm64 amd64; do
GOOS=darwin GOARCH=$TARGETARCH go generate ./...
GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
rm -rf llm/llama.cpp/*/build
done
lipo -create -output dist/ollama dist/ollama-darwin-*

View File

@@ -10,6 +10,8 @@ docker buildx build \
--platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \
--build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
--cache-to type=local,dest=.cache \
-f Dockerfile \
-t ollama \
-t ollama/ollama:$VERSION \
.

View File

@@ -26,7 +26,8 @@ require() {
[ "$(uname -s)" = "Linux" ] || error 'This script is intended to run on Linux only.'
case "$(uname -m)" in
ARCH=$(uname -m)
case "$ARCH" in
x86_64) ARCH="amd64" ;;
aarch64|arm64) ARCH="arm64" ;;
*) error "Unsupported architecture: $ARCH" ;;
@@ -62,7 +63,10 @@ status "Installing ollama to $BINDIR..."
$SUDO install -o0 -g0 -m755 -d $BINDIR
$SUDO install -o0 -g0 -m755 $TEMP_DIR/ollama $BINDIR/ollama
install_success() { status 'Install complete. Run "ollama" from the command line.'; }
install_success() {
status 'The Ollama API is now available at 0.0.0.0:11434.'
status 'Install complete. Run "ollama" from the command line.'
}
trap install_success EXIT
# Everything from this point onwards is optional.
@@ -73,6 +77,9 @@ configure_systemd() {
$SUDO useradd -r -s /bin/false -m -d /usr/share/ollama ollama
fi
status "Adding current user to ollama group..."
$SUDO usermod -a -G ollama $(whoami)
status "Creating ollama systemd service..."
cat <<EOF | $SUDO tee /etc/systemd/system/ollama.service >/dev/null
[Unit]
@@ -85,7 +92,6 @@ User=ollama
Group=ollama
Restart=always
RestartSec=3
Environment="HOME=/usr/share/ollama"
Environment="PATH=$PATH"
[Install]
@@ -127,6 +133,7 @@ if check_gpu nvidia-smi; then
fi
if ! check_gpu lspci && ! check_gpu lshw; then
install_success
warning "No NVIDIA GPU detected. Ollama will run in CPU-only mode."
exit 0
fi
@@ -173,7 +180,10 @@ install_cuda_driver_apt() {
case $1 in
debian)
status 'Enabling contrib sources...'
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list | sudo tee /etc/apt/sources.list.d/contrib.list > /dev/null
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list | $SUDO tee /etc/apt/sources.list.d/contrib.list > /dev/null
if [ -f "/etc/apt/sources.list.d/debian.sources" ]; then
$SUDO sed 's/main/contrib/' < /etc/apt/sources.list.d/debian.sources | $SUDO tee /etc/apt/sources.list.d/contrib.sources > /dev/null
fi
;;
esac

16
scripts/push_docker.sh Executable file
View File

@@ -0,0 +1,16 @@
#!/bin/sh
set -eu
export VERSION=${VERSION:-0.0.0}
export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version=$VERSION\" \"-X=github.com/jmorganca/ollama/server.mode=release\"'"
docker buildx build \
--push \
--platform=linux/arm64,linux/amd64 \
--build-arg=VERSION \
--build-arg=GOFLAGS \
--cache-from type=local,src=.cache \
-f Dockerfile \
-t ollama/ollama -t ollama/ollama:$VERSION \
.

View File

@@ -91,7 +91,7 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect) (string, error) {
}
s := SignatureData{
Method: "GET",
Method: http.MethodGet,
Path: redirectURL.String(),
Data: nil,
}
@@ -103,9 +103,10 @@ func getAuthToken(ctx context.Context, redirData AuthRedirect) (string, error) {
headers := make(http.Header)
headers.Set("Authorization", sig)
resp, err := makeRequest(ctx, "GET", redirectURL, headers, nil, nil)
resp, err := makeRequest(ctx, http.MethodGet, redirectURL, headers, nil, nil)
if err != nil {
log.Printf("couldn't get token: %q", err)
return "", err
}
defer resp.Body.Close()

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"log"
"math"
"net/http"
"net/url"
"os"
@@ -15,6 +16,7 @@ import (
"strings"
"sync"
"sync/atomic"
"syscall"
"time"
"golang.org/x/sync/errgroup"
@@ -52,8 +54,8 @@ type blobDownloadPart struct {
const (
numDownloadParts = 64
minDownloadPartSize int64 = 32 * 1000 * 1000
maxDownloadPartSize int64 = 256 * 1000 * 1000
minDownloadPartSize int64 = 100 * format.MegaByte
maxDownloadPartSize int64 = 1000 * format.MegaByte
)
func (p *blobDownloadPart) Name() string {
@@ -88,17 +90,12 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *R
}
if len(b.Parts) == 0 {
resp, err := makeRequest(ctx, "HEAD", requestURL, nil, nil, opts)
resp, err := makeRequestWithRetry(ctx, http.MethodHead, requestURL, nil, nil, opts)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= http.StatusBadRequest {
body, _ := io.ReadAll(resp.Body)
return fmt.Errorf("registry responded with code %d: %v", resp.StatusCode, string(body))
}
b.Total, _ = strconv.ParseInt(resp.Header.Get("Content-Length"), 10, 64)
var size = b.Total / numDownloadParts
@@ -133,7 +130,6 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *Regis
func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *RegistryOptions) error {
defer blobDownloadManager.Delete(b.Digest)
ctx, b.CancelFunc = context.WithCancel(ctx)
file, err := os.OpenFile(b.Name+"-partial", os.O_CREATE|os.O_RDWR, 0644)
@@ -152,23 +148,26 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
continue
}
i := i
g.Go(func() error {
var err error
for try := 0; try < maxRetries; try++ {
w := io.NewOffsetWriter(file, part.StartsAt())
err := b.downloadChunk(inner, requestURL, w, part, opts)
err = b.downloadChunk(inner, requestURL, w, part, opts)
switch {
case errors.Is(err, context.Canceled):
case errors.Is(err, context.Canceled), errors.Is(err, syscall.ENOSPC):
// return immediately if the context is canceled or the device is out of space
return err
case err != nil:
log.Printf("%s part %d attempt %d failed: %v, retrying", b.Digest[7:19], i, try, err)
sleep := time.Second * time.Duration(math.Pow(2, float64(try)))
log.Printf("%s part %d attempt %d failed: %v, retrying in %s", b.Digest[7:19], part.N, try, err, sleep)
time.Sleep(sleep)
continue
default:
return nil
}
}
return errors.New("max retries exceeded")
return fmt.Errorf("%w: %w", errMaxRetriesExceeded, err)
})
}
@@ -198,14 +197,14 @@ func (b *blobDownload) run(ctx context.Context, requestURL *url.URL, opts *Regis
func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w io.Writer, part *blobDownloadPart, opts *RegistryOptions) error {
headers := make(http.Header)
headers.Set("Range", fmt.Sprintf("bytes=%d-%d", part.StartsAt(), part.StopsAt()-1))
resp, err := makeRequest(ctx, "GET", requestURL, headers, nil, opts)
resp, err := makeRequestWithRetry(ctx, http.MethodGet, requestURL, headers, nil, opts)
if err != nil {
return err
}
defer resp.Body.Close()
n, err := io.Copy(w, io.TeeReader(resp.Body, b))
if err != nil && !errors.Is(err, context.Canceled) {
if err != nil && !errors.Is(err, context.Canceled) && !errors.Is(err, io.ErrUnexpectedEOF) {
// rollback progress
b.Completed.Add(-n)
return err
@@ -216,7 +215,7 @@ func (b *blobDownload) downloadChunk(ctx context.Context, requestURL *url.URL, w
return err
}
// return nil or context.Canceled
// return nil or context.Canceled or UnexpectedEOF (resumable)
return err
}
@@ -285,7 +284,7 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
}
fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", b.Digest),
Status: fmt.Sprintf("pulling %s", b.Digest[7:19]),
Digest: b.Digest,
Total: b.Total,
Completed: b.Completed.Load(),
@@ -304,7 +303,9 @@ type downloadOpts struct {
fn func(api.ProgressResponse)
}
const maxRetries = 3
const maxRetries = 6
var errMaxRetriesExceeded = errors.New("max retries exceeded")
// downloadBlob downloads a blob from the registry and stores it in the blobs directory
func downloadBlob(ctx context.Context, opts downloadOpts) error {
@@ -320,7 +321,7 @@ func downloadBlob(ctx context.Context, opts downloadOpts) error {
return err
default:
opts.fn(api.ProgressResponse{
Status: fmt.Sprintf("downloading %s", opts.digest),
Status: fmt.Sprintf("pulling %s", opts.digest[7:19]),
Digest: opts.digest,
Total: fi.Size(),
Completed: fi.Size(),

File diff suppressed because it is too large Load Diff

Some files were not shown because too many files have changed in this diff Show More