Workaround broken NVIDIA iGPU free VRAM data (#12490)

The CUDA APIs for reporting free VRAM are useless on NVIDIA iGPU systems as they only return the kernels actual free memory and ignore buff/cache allocations which on a typical system will quickly fill up most of the free system memory. As a result, we incorrectly think there's very little available for GPU allocations which is wrong.
2025-10-03 12:17:21 -07:00 · 2025-10-03 12:17:21 -07:00 · e4340667e3
parent 2fa1e92a99
commit e4340667e3
1 changed files with 32 additions and 0 deletions
--- a/discover/runner.go
+++ b/discover/runner.go
@ -330,6 +330,9 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
 		}
 	}
 	// Apply any iGPU workarounds
 	iGPUWorkarounds(devices)
 	return devices
 }
@ -540,3 +543,32 @@ func GetDevicesFromRunner(ctx context.Context, runner BaseRunner) ([]ml.DeviceIn
 		}
 	}
 }
 func iGPUWorkarounds(devices []ml.DeviceInfo) {
 	// short circuit if we have no iGPUs
 	anyiGPU := false
 	for i := range devices {
 		if devices[i].Integrated {
 			anyiGPU = true
 			break
 		}
 	}
 	if !anyiGPU {
 		return
 	}
 	memInfo, err := GetCPUMem()
 	if err != nil {
 		slog.Debug("failed to fetch system memory information for iGPU", "error", err)
 		return
 	}
 	for i := range devices {
 		if !devices[i].Integrated {
 			continue
 		}
 		// NVIDIA iGPUs return useless free VRAM data which ignores system buff/cache
 		if devices[i].Library == "CUDA" {
 			devices[i].FreeMemory = memInfo.FreeMemory
 		}
 	}
 }