Working e2e logits
This commit is contained in:
parent
c92d418a7c
commit
f9928b677f
@ -310,12 +310,10 @@ func flushPending(seq *Sequence) bool {
|
|||||||
|
|
||||||
// Add logits if requested and available
|
// Add logits if requested and available
|
||||||
if seq.returnLogits && seq.logits != nil {
|
if seq.returnLogits && seq.logits != nil {
|
||||||
slog.Info("returning logits - flushPending")
|
|
||||||
resp.Logits = seq.logits
|
resp.Logits = seq.logits
|
||||||
seq.logits = nil
|
seq.logits = nil
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("returning logits - flushPending", "logits", resp.Logits[0])
|
|
||||||
select {
|
select {
|
||||||
case seq.responses <- resp:
|
case seq.responses <- resp:
|
||||||
return true
|
return true
|
||||||
@ -503,9 +501,11 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
|
|||||||
|
|
||||||
// Before sampling:
|
// Before sampling:
|
||||||
if seq.returnLogits { // New flag we need to add to Sequence struct
|
if seq.returnLogits { // New flag we need to add to Sequence struct
|
||||||
slog.Info("returning logits")
|
logits := s.lc.GetLogits()
|
||||||
seq.logits = s.lc.GetLogits() // Using our new GetLogits() method
|
seq.logits = make([]float32, len(logits))
|
||||||
|
slog.Info("copying logits")
|
||||||
|
copy(seq.logits, logits)
|
||||||
|
slog.Info("copying logits success")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then sample token
|
// Then sample token
|
||||||
@ -728,7 +728,6 @@ func (s *Server) completion(w http.ResponseWriter, r *http.Request) {
|
|||||||
close(seq.quit)
|
close(seq.quit)
|
||||||
return
|
return
|
||||||
case content, ok := <-seq.responses:
|
case content, ok := <-seq.responses:
|
||||||
slog.Info("logits in last chan", "content", content.Logits[0])
|
|
||||||
if ok {
|
if ok {
|
||||||
slog.Info("content", "content", content.Content)
|
slog.Info("content", "content", content.Content)
|
||||||
if err := json.NewEncoder(w).Encode(&content); err != nil {
|
if err := json.NewEncoder(w).Encode(&content); err != nil {
|
||||||
|
@ -633,7 +633,8 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
|||||||
ws ::= ([ \t\n] ws)?
|
ws ::= ([ \t\n] ws)?
|
||||||
`
|
`
|
||||||
|
|
||||||
const maxBufferSize = 512 * format.KiloByte
|
// TODO: change back to 512 * format.KiloByte
|
||||||
|
const maxBufferSize = 2048 * format.KiloByte
|
||||||
|
|
||||||
type ImageData struct {
|
type ImageData struct {
|
||||||
Data []byte `json:"data"`
|
Data []byte `json:"data"`
|
||||||
|
@ -1543,8 +1543,6 @@ func (s *Server) ChatHandler(c *gin.Context) {
|
|||||||
|
|
||||||
slog.Debug("chat request", "images", len(images), "prompt", prompt)
|
slog.Debug("chat request", "images", len(images), "prompt", prompt)
|
||||||
|
|
||||||
slog.Info("chat request", "return_logits", req.ReturnLogits)
|
|
||||||
|
|
||||||
ch := make(chan any)
|
ch := make(chan any)
|
||||||
go func() {
|
go func() {
|
||||||
defer close(ch)
|
defer close(ch)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user