mirror of https://github.com/ollama/ollama.git
Workaround broken NVIDIA iGPU free VRAM data (#12490)
The CUDA APIs for reporting free VRAM are useless on NVIDIA iGPU systems as they only return the kernels actual free memory and ignore buff/cache allocations which on a typical system will quickly fill up most of the free system memory. As a result, we incorrectly think there's very little available for GPU allocations which is wrong.
This commit is contained in:
parent
2fa1e92a99
commit
e4340667e3
|
@ -330,6 +330,9 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Apply any iGPU workarounds
|
||||||
|
iGPUWorkarounds(devices)
|
||||||
|
|
||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -540,3 +543,32 @@ func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceIn
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func iGPUWorkarounds(devices []ml.DeviceInfo) {
|
||||||
|
// short circuit if we have no iGPUs
|
||||||
|
anyiGPU := false
|
||||||
|
for i := range devices {
|
||||||
|
if devices[i].Integrated {
|
||||||
|
anyiGPU = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !anyiGPU {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
memInfo, err := GetCPUMem()
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("failed to fetch system memory information for iGPU", "error", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for i := range devices {
|
||||||
|
if !devices[i].Integrated {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// NVIDIA iGPUs return useless free VRAM data which ignores system buff/cache
|
||||||
|
if devices[i].Library == "CUDA" {
|
||||||
|
devices[i].FreeMemory = memInfo.FreeMemory
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue