llm: enable flash attention by default
This commit is contained in:
parent
85169e8d6f
commit
d8b3e09fb7
@ -90,6 +90,7 @@ func init() {
|
|||||||
NumParallel = 1
|
NumParallel = 1
|
||||||
MaxRunners = 1
|
MaxRunners = 1
|
||||||
MaxQueuedRequests = 512
|
MaxQueuedRequests = 512
|
||||||
|
FlashAttention = true
|
||||||
|
|
||||||
LoadConfig()
|
LoadConfig()
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user