diff --git a/docs/openai.md b/docs/openai.md index 9dda05c3a..318ed71be 100644 --- a/docs/openai.md +++ b/docs/openai.md @@ -27,6 +27,11 @@ chat_completion = client.chat.completions.create( ], model='llama3', ) + +completion = client.completions.create( + model="llama3", + prompt="Say this is a test" +) ``` ### OpenAI JavaScript library @@ -45,6 +50,11 @@ const chatCompletion = await openai.chat.completions.create({ messages: [{ role: 'user', content: 'Say this is a test' }], model: 'llama3', }) + +const completion = await openai.completions.create({ + model: "llama3", + prompt: "Say this is a test.", +}) ``` ### `curl` @@ -66,6 +76,12 @@ curl http://localhost:11434/v1/chat/completions \ ] }' +curl https://api.openai.com/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "llama3", + "prompt": "Say this is a test" + }' ``` ## Endpoints @@ -107,6 +123,40 @@ curl http://localhost:11434/v1/chat/completions \ - `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached +### `/v1/completions` + +#### Supported features + +- [x] Completions +- [x] Streaming +- [x] JSON mode +- [x] Reproducible outputs +- [ ] Logprobs + +#### Supported request fields + +- [x] `model` +- [x] `prompt` +- [x] `frequency_penalty` +- [x] `presence_penalty` +- [x] `seed` +- [x] `stop` +- [x] `stream` +- [x] `temperature` +- [x] `top_p` +- [x] `max_tokens` +- [ ] `best_of` +- [ ] `echo` +- [ ] `suffix` +- [ ] `logit_bias` +- [ ] `user` +- [ ] `n` + +#### Notes + +- `prompt` currently only accepts a string +- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached + ## Models Before using a model, pull it locally `ollama pull`: diff --git a/llm/llama.cpp b/llm/llama.cpp index a8db2a9ce..7c26775ad 160000 --- a/llm/llama.cpp +++ b/llm/llama.cpp @@ -1 +1 @@ -Subproject commit a8db2a9ce64cd4417f6a312ab61858f17f0f8584 +Subproject commit 7c26775adb579e92b59c82e8084c07a1d0f75e9c