diff --git a/docs/openai.md b/docs/openai.md
index 9dda05c3a..318ed71be 100644
--- a/docs/openai.md
+++ b/docs/openai.md
@@ -27,6 +27,11 @@ chat_completion = client.chat.completions.create(
     ],
     model='llama3',
 )
+
+completion = client.completions.create(
+    model="llama3",
+    prompt="Say this is a test"
+)
 ```
 
 ### OpenAI JavaScript library
@@ -45,6 +50,11 @@ const chatCompletion = await openai.chat.completions.create({
   messages: [{ role: 'user', content: 'Say this is a test' }],
   model: 'llama3',
 })
+
+const completion = await openai.completions.create({
+    model: "llama3",
+    prompt: "Say this is a test.",
+})
 ```
 
 ### `curl`
@@ -66,6 +76,12 @@ curl http://localhost:11434/v1/chat/completions \
         ]
     }'
 
+curl https://api.openai.com/v1/completions \
+    -H "Content-Type: application/json" \
+    -d '{
+        "model": "llama3",
+        "prompt": "Say this is a test"
+    }'
 ```
 
 ## Endpoints
@@ -107,6 +123,40 @@ curl http://localhost:11434/v1/chat/completions \
 
 - `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
 
+### `/v1/completions`
+
+#### Supported features
+
+- [x] Completions
+- [x] Streaming
+- [x] JSON mode
+- [x] Reproducible outputs
+- [ ] Logprobs
+
+#### Supported request fields
+
+- [x] `model`
+- [x] `prompt`
+- [x] `frequency_penalty`
+- [x] `presence_penalty`
+- [x] `seed`
+- [x] `stop`
+- [x] `stream`
+- [x] `temperature`
+- [x] `top_p`
+- [x] `max_tokens`
+- [ ] `best_of`
+- [ ] `echo`
+- [ ] `suffix`
+- [ ] `logit_bias`
+- [ ] `user`
+- [ ] `n`
+
+#### Notes
+
+- `prompt` currently only accepts a string
+- `usage.prompt_tokens` will be 0 for completions where prompt evaluation is cached
+
 ## Models
 
 Before using a model, pull it locally `ollama pull`:
diff --git a/llm/llama.cpp b/llm/llama.cpp
index a8db2a9ce..7c26775ad 160000
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
-Subproject commit a8db2a9ce64cd4417f6a312ab61858f17f0f8584
+Subproject commit 7c26775adb579e92b59c82e8084c07a1d0f75e9c