llm: enable flash attention by default
This commit is contained in:
parent
85169e8d6f
commit
d8b3e09fb7
@ -90,6 +90,7 @@ func init() {
|
||||
NumParallel = 1
|
||||
MaxRunners = 1
|
||||
MaxQueuedRequests = 512
|
||||
FlashAttention = true
|
||||
|
||||
LoadConfig()
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user