From 694388db9028686ecd5c1e78411ce8bde9239d52 Mon Sep 17 00:00:00 2001 From: Roy Han Date: Wed, 10 Jul 2024 15:21:46 -0700 Subject: [PATCH] set context length --- server/routes.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/server/routes.go b/server/routes.go index 782028c0a..fe46ab038 100644 --- a/server/routes.go +++ b/server/routes.go @@ -320,6 +320,12 @@ func (s *Server) EmbedHandler(c *gin.Context) { return } + kvData, err := getKVData(model.ModelPath, false) + if err != nil { + c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) + return + } + checkFit := func(s string, truncate bool) (string, error) { tokens, err := r.Tokenize(c.Request.Context(), s) if err != nil { @@ -327,9 +333,10 @@ func (s *Server) EmbedHandler(c *gin.Context) { return "", err } - if len(tokens) > opts.NumCtx { + ctxLen := min(opts.NumCtx, int(kvData.ContextLength())) + if len(tokens) > ctxLen { if truncate { - tokens = tokens[:opts.NumCtx] + tokens = tokens[:ctxLen] return r.Detokenize(c.Request.Context(), tokens) } else { return "", fmt.Errorf("input length exceeds maximum context length")