mirror of https://github.com/ollama/ollama.git
Compare commits
2 Commits
bc71278670
...
303be9304c
Author | SHA1 | Date |
---|---|---|
|
303be9304c | |
|
bd15eba4e4 |
|
@ -6,7 +6,9 @@ import (
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/ollama/ollama/format"
|
"github.com/ollama/ollama/format"
|
||||||
|
@ -146,3 +148,35 @@ func GetSystemInfo() SystemInfo {
|
||||||
GPUs: gpus,
|
GPUs: gpus,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func cudaJetpack() string {
|
||||||
|
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
|
||||||
|
if CudaTegra != "" {
|
||||||
|
ver := strings.Split(CudaTegra, ".")
|
||||||
|
if len(ver) > 0 {
|
||||||
|
return "jetpack" + ver[0]
|
||||||
|
}
|
||||||
|
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
|
||||||
|
r := regexp.MustCompile(` R(\d+) `)
|
||||||
|
m := r.FindSubmatch(data)
|
||||||
|
if len(m) != 2 {
|
||||||
|
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
|
||||||
|
} else {
|
||||||
|
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
|
||||||
|
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
|
||||||
|
// https://developer.nvidia.com/embedded/jetpack-archive
|
||||||
|
switch l4t {
|
||||||
|
case 35:
|
||||||
|
return "jetpack5"
|
||||||
|
case 36:
|
||||||
|
return "jetpack6"
|
||||||
|
default:
|
||||||
|
// Newer Jetson systems use the SBSU runtime
|
||||||
|
slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
|
@ -78,6 +78,8 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("discovering available GPUs...")
|
slog.Info("discovering available GPUs...")
|
||||||
|
requested := envconfig.LLMLibrary()
|
||||||
|
jetpack := cudaJetpack()
|
||||||
|
|
||||||
// For our initial discovery pass, we gather all the known GPUs through
|
// For our initial discovery pass, we gather all the known GPUs through
|
||||||
// all the libraries that were detected. This pass may include GPUs that
|
// all the libraries that were detected. This pass may include GPUs that
|
||||||
|
@ -86,6 +88,14 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
|
||||||
// times concurrently leading to memory contention
|
// times concurrently leading to memory contention
|
||||||
for dir := range libDirs {
|
for dir := range libDirs {
|
||||||
var dirs []string
|
var dirs []string
|
||||||
|
if dir != "" {
|
||||||
|
if requested != "" && filepath.Base(dir) != requested {
|
||||||
|
slog.Debug("skipping available library at users request", "requested", requested, "libDir", dir)
|
||||||
|
continue
|
||||||
|
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
if dir == "" {
|
if dir == "" {
|
||||||
dirs = []string{LibOllamaPath}
|
dirs = []string{LibOllamaPath}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -38,26 +38,14 @@ Join the [Discord](https://discord.gg/ollama) for help interpreting the logs.
|
||||||
|
|
||||||
## LLM libraries
|
## LLM libraries
|
||||||
|
|
||||||
Ollama includes multiple LLM libraries compiled for different GPUs and CPU vector features. Ollama tries to pick the best one based on the capabilities of your system. If this autodetection has problems, or you run into other problems (e.g. crashes in your GPU) you can workaround this by forcing a specific LLM library. `cpu_avx2` will perform the best, followed by `cpu_avx` and the slowest but most compatible is `cpu`. Rosetta emulation under MacOS will work with the `cpu` library.
|
Ollama includes multiple LLM libraries compiled for different GPU libraries and versions. Ollama tries to pick the best one based on the capabilities of your system. If this autodetection has problems, or you run into other problems (e.g. crashes in your GPU) you can workaround this by forcing a specific LLM library.
|
||||||
|
|
||||||
In the server log, you will see a message that looks something like this (varies from release to release):
|
|
||||||
|
|
||||||
```
|
|
||||||
Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5]
|
|
||||||
```
|
|
||||||
|
|
||||||
**Experimental LLM Library Override**
|
**Experimental LLM Library Override**
|
||||||
|
|
||||||
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass autodetection, so for example, if you have a CUDA card, but want to force the CPU LLM library with AVX2 vector support, use:
|
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to limit autodetection, so for example, if you have both CUDA and AMD GPUs, but want to force the CUDA v13 only, use:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
OLLAMA_LLM_LIBRARY="cpu_avx2" ollama serve
|
OLLAMA_LLM_LIBRARY="cuda_v13" ollama serve
|
||||||
```
|
|
||||||
|
|
||||||
You can see what features your CPU has with the following.
|
|
||||||
|
|
||||||
```shell
|
|
||||||
cat /proc/cpuinfo| grep flags | head -1
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installing older or pre-release versions on Linux
|
## Installing older or pre-release versions on Linux
|
||||||
|
|
Loading…
Reference in New Issue