User interface prototype

Remove /template API
Add dry run option for chat request
2024-12-19 16:43:36 -08:00 · 2024-12-19 14:47:51 -08:00 · 2024-12-19 14:17:29 -08:00 · 2024-12-19 13:48:25 -08:00 · 2024-12-18 15:23:27 -08:00
3 changed files with 52 additions and 0 deletions
--- a/api/types.go
+++ b/api/types.go
@ -103,10 +103,18 @@ type ChatRequest struct {
 	// Tools is an optional list of tools the model has access to.
 	Tools `json:"tools,omitempty"`

+	Debug *Debug `json:"debug,omitempty"`
+
+	Dry bool `json:"dry,omitempty"`
+
 	// Options lists model-specific options.
 	Options map[string]interface{} `json:"options"`
 }

+type Debug struct {
+	Include []string `json:"include,omitempty"`
+}
+
 type Tools []Tool

 func (t Tools) String() string {
@ -190,6 +198,8 @@ type ChatResponse struct {
 	Message    Message   `json:"message"`
 	DoneReason string    `json:"done_reason,omitempty"`

+	Debug map[string]any `json:"debug,omitempty"`
+
 	Done bool `json:"done"`

 	Metrics
--- a/server/prompt.go
+++ b/server/prompt.go
@ -82,6 +82,10 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 	}

 	currMsgIdx := n
+	// Warn user if messages are truncated from the input
+	if numTruncatedMessages := len(msgs[0:currMsgIdx]); numTruncatedMessages > 0 {
+		slog.Warn("truncated first messages from input", "num_truncated", numTruncatedMessages)
+	}

 	for cnt, msg := range msgs[currMsgIdx:] {
 		prefix := ""
--- a/server/routes.go
+++ b/server/routes.go
@ -1539,6 +1539,34 @@ func (s *Server) ChatHandler(c *gin.Context) {
 		return
 	}

+	if req.Dry {
+		var debug map[string]any
+		if req.Debug != nil && req.Debug.Include != nil && slices.Contains(req.Debug.Include, "prompt") {
+			debug = map[string]any{"prompt": prompt}
+		}
+		tokens, err := r.Tokenize(c.Request.Context(), prompt)
+		if err != nil {
+			slog.Error("tokenize error", "error", err)
+			c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
+			return
+		}
+		c.JSON(http.StatusOK, api.ChatResponse{
+			Model:      req.Model,
+			CreatedAt:  time.Now().UTC(),
+			Message:    api.Message{Role: "assistant", Content: ""},
+			Done:       true,
+			DoneReason: "dry_run",
+			Debug:      debug,
+			Metrics: api.Metrics{
+				PromptEvalCount:    len(tokens),
+				PromptEvalDuration: 0,
+				EvalCount:          0,
+				EvalDuration:       0,
+			},
+		})
+		return
+	}
+
 	slog.Debug("chat request", "images", len(images), "prompt", prompt)

 	ch := make(chan any)
@ -1571,6 +1599,16 @@ func (s *Server) ChatHandler(c *gin.Context) {
 				res.LoadDuration = checkpointLoaded.Sub(checkpointStart)
 			}

+			if req.Debug != nil && req.Debug.Include != nil && slices.Contains(req.Debug.Include, "prompt") {
+				res.Debug = map[string]any{"prompt": prompt}
+				if req.Stream != nil && !*req.Stream {
+					tempMsg := res.Message
+					res.Message = api.Message{Role: "assistant", Content: ""}
+					ch <- res
+					res.Message = tempMsg
+				}
+			}
+
 			// TODO: tool call checking and filtering should be moved outside of this callback once streaming
 			// however this was a simple change for now without reworking streaming logic of this (and other)
 			// handlers
Author	SHA1	Message	Date
ParthSareen	6556540655	User interface prototype	2024-12-19 16:43:36 -08:00
ParthSareen	3f60fd57e3	Remove /template API	2024-12-19 14:47:51 -08:00
ParthSareen	38cd80d52c	Add dry run option for chat request	2024-12-19 14:17:29 -08:00
ParthSareen	c9a46140e6	Warn user on truncation - ollama logs	2024-12-19 13:48:25 -08:00
ParthSareen	1d529d8b7b	Add /template endpoint	2024-12-18 15:23:27 -08:00