discover: CPU supports flash attention

We already run flash attention on CPUs in cases where we have partial offloading but were disabling it if running on pure CPU, which is unnecessary.
2025-08-11 14:45:45 -07:00 · 2025-08-11 14:45:45 -07:00 · f0dd7b63d2
parent 654082b587
commit f0dd7b63d2
1 changed files with 2 additions and 1 deletions
--- a/discover/types.go
+++ b/discover/types.go
@ -171,7 +171,8 @@ func (si SystemInfo) GetOptimalThreadCount() int {
 // For each GPU, check if it does NOT support flash attention
 func (l GpuInfoList) FlashAttentionSupported() bool {
 	for _, gpu := range l {
-		supportsFA := gpu.Library == "metal" ||
+		supportsFA := gpu.Library == "cpu" ||
+			gpu.Library == "metal" ||
 			(gpu.Library == "cuda" && gpu.DriverMajor >= 7) ||
 			gpu.Library == "rocm"