mirror of https://github.com/ollama/ollama.git
213 lines
5.7 KiB
Go
213 lines
5.7 KiB
Go
package discover
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
|
|
"github.com/ollama/ollama/format"
|
|
"github.com/ollama/ollama/ml"
|
|
)
|
|
|
|
type memInfo struct {
|
|
TotalMemory uint64 `json:"total_memory,omitempty"`
|
|
FreeMemory uint64 `json:"free_memory,omitempty"`
|
|
FreeSwap uint64 `json:"free_swap,omitempty"` // TODO split this out for system only
|
|
}
|
|
|
|
// Beginning of an `ollama info` command
|
|
type GpuInfo struct { // TODO better name maybe "InferenceProcessor"?
|
|
ml.DeviceID
|
|
memInfo
|
|
|
|
// Optional variant to select (e.g. versions, cpu feature flags)
|
|
Variant string `json:"variant"`
|
|
|
|
// MinimumMemory represents the minimum memory required to use the GPU
|
|
MinimumMemory uint64 `json:"-"`
|
|
|
|
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
|
|
DependencyPath []string `json:"lib_path,omitempty"`
|
|
|
|
// Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
|
|
// the FreeMemory is best effort, and may over or under report actual memory usage
|
|
// False indicates FreeMemory can generally be trusted on this GPU
|
|
UnreliableFreeMemory bool
|
|
|
|
// GPU information
|
|
filterID string // AMD Workaround: The numeric ID of the device used to filter out other devices
|
|
Name string `json:"name"` // user friendly name if available
|
|
Compute string `json:"compute"` // Compute Capability or gfx
|
|
|
|
// Driver Information - TODO no need to put this on each GPU
|
|
DriverMajor int `json:"driver_major,omitempty"`
|
|
DriverMinor int `json:"driver_minor,omitempty"`
|
|
|
|
// TODO other performance capability info to help in scheduling decisions
|
|
}
|
|
|
|
func (gpu GpuInfo) RunnerName() string {
|
|
if gpu.Variant != "" {
|
|
return gpu.Library + "_" + gpu.Variant
|
|
}
|
|
return gpu.Library
|
|
}
|
|
|
|
type CPUInfo struct {
|
|
GpuInfo
|
|
CPUs []CPU
|
|
}
|
|
|
|
// CPU type represents a CPU Package occupying a socket
|
|
type CPU struct {
|
|
ID string `cpuinfo:"processor"`
|
|
VendorID string `cpuinfo:"vendor_id"`
|
|
ModelName string `cpuinfo:"model name"`
|
|
CoreCount int
|
|
EfficiencyCoreCount int // Performance = CoreCount - Efficiency
|
|
ThreadCount int
|
|
}
|
|
|
|
type GpuInfoList []GpuInfo
|
|
|
|
func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
|
resp := []GpuInfoList{}
|
|
libs := []string{}
|
|
for _, info := range l {
|
|
found := false
|
|
requested := info.Library
|
|
if info.Variant != "" {
|
|
requested += "_" + info.Variant
|
|
}
|
|
for i, lib := range libs {
|
|
if lib == requested {
|
|
resp[i] = append(resp[i], info)
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
libs = append(libs, requested)
|
|
resp = append(resp, []GpuInfo{info})
|
|
}
|
|
}
|
|
return resp
|
|
}
|
|
|
|
func LogDetails(devices []ml.DeviceInfo) {
|
|
for _, dev := range devices {
|
|
var libs []string
|
|
for _, dir := range dev.LibraryPath {
|
|
if strings.Contains(dir, filepath.Join("lib", "ollama")) {
|
|
libs = append(libs, filepath.Base(dir))
|
|
}
|
|
}
|
|
typeStr := "discrete"
|
|
if dev.Integrated {
|
|
typeStr = "iGPU"
|
|
}
|
|
slog.Info("inference compute",
|
|
"id", dev.ID,
|
|
"library", dev.Library,
|
|
"compute", dev.Compute(),
|
|
"name", dev.Name,
|
|
"description", dev.Description,
|
|
"libdirs", strings.Join(libs, ","),
|
|
"driver", dev.Driver(),
|
|
"pci_id", dev.PCIID,
|
|
"type", typeStr,
|
|
"total", format.HumanBytes2(dev.TotalMemory),
|
|
"available", format.HumanBytes2(dev.FreeMemory),
|
|
)
|
|
}
|
|
// CPU inference
|
|
if len(devices) == 0 {
|
|
dev, _ := GetCPUMem()
|
|
slog.Info("inference compute",
|
|
"id", "cpu",
|
|
"library", "cpu",
|
|
"compute", "",
|
|
"name", "cpu",
|
|
"description", "cpu",
|
|
"libdirs", "ollama",
|
|
"driver", "",
|
|
"pci_id", "",
|
|
"type", "",
|
|
"total", format.HumanBytes2(dev.TotalMemory),
|
|
"available", format.HumanBytes2(dev.FreeMemory),
|
|
)
|
|
}
|
|
}
|
|
|
|
// Sort by Free Space
|
|
type ByFreeMemory []GpuInfo
|
|
|
|
func (a ByFreeMemory) Len() int { return len(a) }
|
|
func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
|
|
|
|
type SystemInfo struct {
|
|
System CPUInfo `json:"system"`
|
|
GPUs []GpuInfo `json:"gpus"`
|
|
}
|
|
|
|
// Return the optimal number of threads to use for inference
|
|
func (si SystemInfo) GetOptimalThreadCount() int {
|
|
if len(si.System.CPUs) == 0 {
|
|
// Fall back to Go's num CPU
|
|
return runtime.NumCPU()
|
|
}
|
|
|
|
coreCount := 0
|
|
for _, c := range si.System.CPUs {
|
|
coreCount += c.CoreCount - c.EfficiencyCoreCount
|
|
}
|
|
|
|
return coreCount
|
|
}
|
|
|
|
// For each GPU, check if it does NOT support flash attention
|
|
func (l GpuInfoList) FlashAttentionSupported() bool {
|
|
for _, gpu := range l {
|
|
supportsFA := gpu.Library == "cpu" ||
|
|
gpu.Name == "Metal" || gpu.Library == "Metal" ||
|
|
(gpu.Library == "CUDA" && gpu.DriverMajor >= 7) ||
|
|
gpu.Library == "ROCm"
|
|
|
|
if !supportsFA {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
type BaseRunner interface {
|
|
// GetPort returns the localhost port number the runner is running on
|
|
GetPort() int
|
|
|
|
// HasExited indicates if the runner is no longer running. This can be used during
|
|
// bootstrap to detect if a given filtered device is incompatible and triggered an assert
|
|
HasExited() bool
|
|
}
|
|
|
|
type RunnerDiscovery interface {
|
|
BaseRunner
|
|
|
|
// GetDeviceInfos will perform a query of the underlying device libraries
|
|
// for device identification and free VRAM information
|
|
// During bootstrap scenarios, this routine may take seconds to complete
|
|
GetDeviceInfos(ctx context.Context) []ml.DeviceInfo
|
|
}
|
|
|
|
type FilteredRunnerDiscovery interface {
|
|
RunnerDiscovery
|
|
|
|
// GetActiveDeviceIDs returns the filtered set of devices actively in
|
|
// use by this runner for running models. If the runner is a bootstrap runner, no devices
|
|
// will be active yet so no device IDs are returned.
|
|
// This routine will not query the underlying device and will return immediately
|
|
GetActiveDeviceIDs() []ml.DeviceID
|
|
}
|