llm: enable flash attention by default

This commit is contained in:
jmorganca 2024-06-08 22:55:22 -07:00
parent 85169e8d6f
commit d8b3e09fb7

View File

@ -90,6 +90,7 @@ func init() {
NumParallel = 1
MaxRunners = 1
MaxQueuedRequests = 512
FlashAttention = true
LoadConfig()
}