limit num_predict
to num_ctx
This commit is contained in:
parent
3e22611200
commit
ca7c3f7e0f
@ -172,6 +172,19 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
|||||||
slog.Info(fmt.Sprintf("loaded %d images", len(predict.Images)))
|
slog.Info(fmt.Sprintf("loaded %d images", len(predict.Images)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Limit the number of predictions to the maximum context length
|
||||||
|
// this will cause no more than two context shifts
|
||||||
|
// TODO: limit this further to num_ctx - len(prompt) to avoid
|
||||||
|
// any context shifts at all
|
||||||
|
if predict.Options.NumPredict > llm.options.NumCtx {
|
||||||
|
slog.Warn(fmt.Sprintf("requested num_predict is greater than the context length (%d > %d), using %d instead", predict.Options.NumPredict, llm.options.NumCtx, llm.options.NumCtx))
|
||||||
|
predict.Options.NumPredict = llm.options.NumCtx
|
||||||
|
}
|
||||||
|
|
||||||
|
if predict.Options.NumPredict == -1 {
|
||||||
|
predict.Options.NumPredict = llm.options.NumCtx
|
||||||
|
}
|
||||||
|
|
||||||
request := map[string]any{
|
request := map[string]any{
|
||||||
"prompt": predict.Prompt,
|
"prompt": predict.Prompt,
|
||||||
"stream": true,
|
"stream": true,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user