diff --git a/api/types.go b/api/types.go index 64f61d24a..534d9b3e2 100644 --- a/api/types.go +++ b/api/types.go @@ -216,6 +216,8 @@ type EmbedRequest struct { // this request. KeepAlive *Duration `json:"keep_alive,omitempty"` + Truncate *bool `json:"truncate,omitempty"` + // Options lists model-specific options. Options map[string]interface{} `json:"options"` } diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp index 801d5b755..b55d5f190 100644 --- a/llm/ext_server/server.cpp +++ b/llm/ext_server/server.cpp @@ -1206,6 +1206,7 @@ struct llama_server_context res.result_json = json { {"embedding", std::vector(n_embd, 0.0f)}, + {"truncated", slot.truncated} }; } else @@ -1223,6 +1224,7 @@ struct llama_server_context res.result_json = json { {"embedding", std::vector(n_embd, 0.0f)}, + {"truncated", slot.truncated} }; continue; } @@ -1231,6 +1233,7 @@ struct llama_server_context res.result_json = json { {"embedding", std::vector(embd, embd + n_embd)}, + {"truncated", slot.truncated} }; } } @@ -3060,6 +3063,7 @@ int main(int argc, char **argv) { if (!json_value(data, "stream", false)) { std::string completion_text; task_result result = llama.queue_results.recv(task_id); + LOG_INFO("completion", {{"result", result.result_json}}); if (!result.error && result.stop) { res.set_content(result.result_json.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8"); } @@ -3075,6 +3079,7 @@ int main(int argc, char **argv) { while (true) { task_result result = llama.queue_results.recv(task_id); + LOG_INFO("completion", {{"result", result.result_json}}); if (!result.error) { const std::string str = "data: " + @@ -3180,6 +3185,7 @@ int main(int argc, char **argv) { if (result.result_json.count("results")) { // result for multi-task responses = result.result_json.at("results"); + LOG_INFO("results", {result.result_json}); } else { // result for single task responses = std::vector(1, result.result_json); @@ -3198,6 +3204,8 @@ int main(int argc, char **argv) { } // send the result json result = json{{"embedding", embeddings}}; + // log result + return res.set_content(result.dump(), "application/json; charset=utf-8"); } else { // return error diff --git a/llm/ext_server/utils.hpp b/llm/ext_server/utils.hpp index ade49f796..ee63cf786 100644 --- a/llm/ext_server/utils.hpp +++ b/llm/ext_server/utils.hpp @@ -658,7 +658,7 @@ static json probs_vector_to_json(const llama_context *ctx, const std::vector normalize_vector(const std::vector& vec, int size) { +static std::vector normalize_vector(const std::vector& vec, int size) { double sum = 0.0; for (float value : vec) { sum += value * value; diff --git a/server/routes.go b/server/routes.go index e6ff55b01..1051a3c2a 100644 --- a/server/routes.go +++ b/server/routes.go @@ -356,6 +356,11 @@ func (s *Server) EmbedHandler(c *gin.Context) { return } + if req.Truncate == nil { + truncate := true + req.Truncate = &truncate + } + model, err := GetModel(req.Model) if err != nil { var pErr *fs.PathError