diff --git a/discover/cuda_common.go b/discover/cuda_common.go index 3c7a92114..a2c43420e 100644 --- a/discover/cuda_common.go +++ b/discover/cuda_common.go @@ -16,7 +16,7 @@ import ( // Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices. var CudaTegra string = os.Getenv("JETSON_JETPACK") -func cudaVariant(gpuInfo CudaGPUInfo) string { +func cudaVariant(gpuInfos []CudaGPUInfo) string { if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" { if CudaTegra != "" { ver := strings.Split(CudaTegra, ".") @@ -45,20 +45,19 @@ func cudaVariant(gpuInfo CudaGPUInfo) string { } } - // Check GPU compute capability FIRST - isOldGPU := gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5) - if isOldGPU { - // GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1) - return "v12" + // Check GPU compute capability FIRST, lowest common denominator if multi-gpu + for _, gpuInfo := range gpuInfos { + if gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5) { + // GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1) + return "v12" + } } // GPU is Turing or newer (CC >= 7.5) - can use newer CUDA - if gpuInfo.DriverMajor < 13 { + if len(gpuInfos) > 0 && gpuInfos[0].DriverMajor < 13 { // The detected driver is older than 580 (Aug 2025) // Warn if their CC is compatible with v13 and they should upgrade their driver to get better performance - if !isOldGPU { - slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor)) - } + slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfos[0].DriverMajor, gpuInfos[0].DriverMinor)) return "v12" } return "v13" diff --git a/discover/gpu.go b/discover/gpu.go index b09626118..a39bc7c3d 100644 --- a/discover/gpu.go +++ b/discover/gpu.go @@ -284,18 +284,8 @@ func GetGPUInfo() GpuInfoList { gpuInfo.MinimumMemory = cudaMinimumMemory gpuInfo.DriverMajor = driverMajor gpuInfo.DriverMinor = driverMinor - variant := cudaVariant(gpuInfo) - // Start with our bundled libraries - if variant != "" { - variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant) - if _, err := os.Stat(variantPath); err == nil { - // Put the variant directory first in the search path to avoid runtime linking to the wrong library - gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...) - } - } gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) - gpuInfo.Variant = variant if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) { unsupportedGPUs = append(unsupportedGPUs, @@ -333,6 +323,24 @@ func GetGPUInfo() GpuInfoList { // TODO potentially sort on our own algorithm instead of what the underlying GPU library does... cudaGPUs = append(cudaGPUs, gpuInfo) } + // Second pass on NVIDIA GPUs to set lowest common denominator variant and DependencyPaths + variant := cudaVariant(cudaGPUs) + var variantPath string + // Start with our bundled libraries + if variant != "" { + variantPath = filepath.Join(LibOllamaPath, "cuda_"+variant) + if _, err := os.Stat(variantPath); err != nil { + variantPath = "" + } + } + + for i := range cudaGPUs { + cudaGPUs[i].Variant = variant + if variantPath != "" { + // Put the variant directory first in the search path to avoid runtime linking to the wrong library + cudaGPUs[i].DependencyPath = append([]string{variantPath}, cudaGPUs[i].DependencyPath...) + } + } } // Intel