mirror of https://github.com/ollama/ollama.git
fix: multi-cuda version skew (#12318)
Ensure that in a version skewed multi-cuda setup we use the lowest version for all GPUs
This commit is contained in:
parent
564b558c92
commit
9c5bf342bc
|
|
@ -16,7 +16,7 @@ import (
|
|||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||
|
||||
func cudaVariant(gpuInfo CudaGPUInfo) string {
|
||||
func cudaVariant(gpuInfos []CudaGPUInfo) string {
|
||||
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||
if CudaTegra != "" {
|
||||
ver := strings.Split(CudaTegra, ".")
|
||||
|
|
@ -45,20 +45,19 @@ func cudaVariant(gpuInfo CudaGPUInfo) string {
|
|||
}
|
||||
}
|
||||
|
||||
// Check GPU compute capability FIRST
|
||||
isOldGPU := gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5)
|
||||
if isOldGPU {
|
||||
// GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1)
|
||||
return "v12"
|
||||
// Check GPU compute capability FIRST, lowest common denominator if multi-gpu
|
||||
for _, gpuInfo := range gpuInfos {
|
||||
if gpuInfo.computeMajor < 7 || (gpuInfo.computeMajor == 7 && gpuInfo.computeMinor < 5) {
|
||||
// GPU is Pascal or older (CC <= 7.4) - use CUDA v12 (supports CC 6.1)
|
||||
return "v12"
|
||||
}
|
||||
}
|
||||
|
||||
// GPU is Turing or newer (CC >= 7.5) - can use newer CUDA
|
||||
if gpuInfo.DriverMajor < 13 {
|
||||
if len(gpuInfos) > 0 && gpuInfos[0].DriverMajor < 13 {
|
||||
// The detected driver is older than 580 (Aug 2025)
|
||||
// Warn if their CC is compatible with v13 and they should upgrade their driver to get better performance
|
||||
if !isOldGPU {
|
||||
slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfo.DriverMajor, gpuInfo.DriverMinor))
|
||||
}
|
||||
slog.Warn("old CUDA driver detected - please upgrade to a newer driver for best performance", "version", fmt.Sprintf("%d.%d", gpuInfos[0].DriverMajor, gpuInfos[0].DriverMinor))
|
||||
return "v12"
|
||||
}
|
||||
return "v13"
|
||||
|
|
|
|||
|
|
@ -284,18 +284,8 @@ func GetGPUInfo() GpuInfoList {
|
|||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||
gpuInfo.DriverMajor = driverMajor
|
||||
gpuInfo.DriverMinor = driverMinor
|
||||
variant := cudaVariant(gpuInfo)
|
||||
|
||||
// Start with our bundled libraries
|
||||
if variant != "" {
|
||||
variantPath := filepath.Join(LibOllamaPath, "cuda_"+variant)
|
||||
if _, err := os.Stat(variantPath); err == nil {
|
||||
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
|
||||
gpuInfo.DependencyPath = append([]string{variantPath}, gpuInfo.DependencyPath...)
|
||||
}
|
||||
}
|
||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||
gpuInfo.Variant = variant
|
||||
|
||||
if int(memInfo.major) < cudaComputeMajorMin || (int(memInfo.major) == cudaComputeMajorMin && int(memInfo.minor) < cudaComputeMinorMin) {
|
||||
unsupportedGPUs = append(unsupportedGPUs,
|
||||
|
|
@ -333,6 +323,24 @@ func GetGPUInfo() GpuInfoList {
|
|||
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
||||
cudaGPUs = append(cudaGPUs, gpuInfo)
|
||||
}
|
||||
// Second pass on NVIDIA GPUs to set lowest common denominator variant and DependencyPaths
|
||||
variant := cudaVariant(cudaGPUs)
|
||||
var variantPath string
|
||||
// Start with our bundled libraries
|
||||
if variant != "" {
|
||||
variantPath = filepath.Join(LibOllamaPath, "cuda_"+variant)
|
||||
if _, err := os.Stat(variantPath); err != nil {
|
||||
variantPath = ""
|
||||
}
|
||||
}
|
||||
|
||||
for i := range cudaGPUs {
|
||||
cudaGPUs[i].Variant = variant
|
||||
if variantPath != "" {
|
||||
// Put the variant directory first in the search path to avoid runtime linking to the wrong library
|
||||
cudaGPUs[i].DependencyPath = append([]string{variantPath}, cudaGPUs[i].DependencyPath...)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Intel
|
||||
|
|
|
|||
Loading…
Reference in New Issue