Prevent underflow when FreeMemory < overhead (#8014)
Co-authored-by: Richard Lyons <frob@cloudstaff.com>
This commit is contained in:
parent
900f64e6be
commit
63269668c0
@ -182,7 +182,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
|
|||||||
gzo = gpuZeroOverhead
|
gzo = gpuZeroOverhead
|
||||||
}
|
}
|
||||||
// Only include GPUs that can fit the graph, gpu minimum, the layer buffer and at least more layer
|
// Only include GPUs that can fit the graph, gpu minimum, the layer buffer and at least more layer
|
||||||
if (gpus[i].FreeMemory - overhead) < gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize {
|
if gpus[i].FreeMemory < overhead+gzo+max(graphPartialOffload, graphFullOffload)+gpus[i].MinimumMemory+2*layerSize {
|
||||||
slog.Debug("gpu has too little memory to allocate any layers",
|
slog.Debug("gpu has too little memory to allocate any layers",
|
||||||
"id", gpus[i].ID,
|
"id", gpus[i].ID,
|
||||||
"library", gpus[i].Library,
|
"library", gpus[i].Library,
|
||||||
@ -228,7 +228,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
|
|||||||
for j := len(gpusWithSpace); j > 0; j-- {
|
for j := len(gpusWithSpace); j > 0; j-- {
|
||||||
g := gpusWithSpace[i%j]
|
g := gpusWithSpace[i%j]
|
||||||
used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload)
|
used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload)
|
||||||
if (g.g.FreeMemory - overhead) > used+layerSize {
|
if g.g.FreeMemory > overhead+used+layerSize {
|
||||||
gpuAllocations[g.i] += layerSize
|
gpuAllocations[g.i] += layerSize
|
||||||
layerCounts[g.i]++
|
layerCounts[g.i]++
|
||||||
layerCount++
|
layerCount++
|
||||||
@ -251,7 +251,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
|
|||||||
for j := len(gpusWithSpace); j > 0; j-- {
|
for j := len(gpusWithSpace); j > 0; j-- {
|
||||||
g := gpusWithSpace[layerCount%j]
|
g := gpusWithSpace[layerCount%j]
|
||||||
used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload)
|
used := gpuAllocations[g.i] + max(graphPartialOffload, graphFullOffload)
|
||||||
if (g.g.FreeMemory - overhead) > used+memoryLayerOutput {
|
if g.g.FreeMemory > overhead+used+memoryLayerOutput {
|
||||||
gpuAllocations[g.i] += memoryLayerOutput
|
gpuAllocations[g.i] += memoryLayerOutput
|
||||||
layerCounts[g.i]++
|
layerCounts[g.i]++
|
||||||
layerCount++
|
layerCount++
|
||||||
|
Loading…
x
Reference in New Issue
Block a user