Compare commits

...

3 Commits

Author SHA1 Message Date
Daniel Müller 449fd8bb7e
Merge bcdf66e583 into bd15eba4e4 2025-10-08 01:17:30 +02:00
Daniel Hiltgen bd15eba4e4
Bring back escape valve for llm libraries and fix Jetpack6 crash (#12529)
* Bring back escape valve for llm libraries

If the new discovery logic picks the wrong library, this gives users the
ability to force a specific one using the same pattern as before. This
can also potentially speed up bootstrap discovery if one of the libraries
takes a long time to load and ultimately bind to no devices.  For example
unsupported AMD iGPUS can sometimes take a while to discover and rule out.

* Bypass extra discovery on jetpack systems

On at least Jetpack6, cuda_v12 appears to expose the iGPU, but crashes later on in
cublasInit so if we detect a Jetpack, short-circuit and use that variant.
2025-10-07 16:06:14 -07:00
Daniel Müller bcdf66e583
docs: fix Gentoo package link
The Gentoo Guru overlay has reclassified ollama in the sci-ml category.
Update the link accordingly.
2025-07-31 19:22:21 -07:00
4 changed files with 47 additions and 9 deletions

View File

@ -478,7 +478,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
### Package managers
- [Pacman](https://archlinux.org/packages/extra/x86_64/ollama/)
- [Gentoo](https://github.com/gentoo/guru/tree/master/app-misc/ollama)
- [Gentoo](https://github.com/gentoo/guru/tree/master/sci-ml/ollama)
- [Homebrew](https://formulae.brew.sh/formula/ollama)
- [Helm Chart](https://artifacthub.io/packages/helm/ollama-helm/ollama)
- [Guix channel](https://codeberg.org/tusharhero/ollama-guix)

View File

@ -6,7 +6,9 @@ import (
"log/slog"
"os"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
"github.com/ollama/ollama/format"
@ -146,3 +148,35 @@ func GetSystemInfo() SystemInfo {
GPUs: gpus,
}
}
func cudaJetpack() string {
if runtime.GOARCH == "arm64" && runtime.GOOS == "linux" {
if CudaTegra != "" {
ver := strings.Split(CudaTegra, ".")
if len(ver) > 0 {
return "jetpack" + ver[0]
}
} else if data, err := os.ReadFile("/etc/nv_tegra_release"); err == nil {
r := regexp.MustCompile(` R(\d+) `)
m := r.FindSubmatch(data)
if len(m) != 2 {
slog.Info("Unexpected format for /etc/nv_tegra_release. Set JETSON_JETPACK to select version")
} else {
if l4t, err := strconv.Atoi(string(m[1])); err == nil {
// Note: mapping from L4t -> JP is inconsistent (can't just subtract 30)
// https://developer.nvidia.com/embedded/jetpack-archive
switch l4t {
case 35:
return "jetpack5"
case 36:
return "jetpack6"
default:
// Newer Jetson systems use the SBSU runtime
slog.Debug("unrecognized L4T version", "nv_tegra_release", string(data))
}
}
}
}
}
return ""
}

View File

@ -78,6 +78,8 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
}
slog.Info("discovering available GPUs...")
requested := envconfig.LLMLibrary()
jetpack := cudaJetpack()
// For our initial discovery pass, we gather all the known GPUs through
// all the libraries that were detected. This pass may include GPUs that
@ -86,6 +88,14 @@ func GPUDevices(ctx context.Context, runners []FilteredRunnerDiscovery) []ml.Dev
// times concurrently leading to memory contention
for dir := range libDirs {
var dirs []string
if dir != "" {
if requested != "" && filepath.Base(dir) != requested {
slog.Debug("skipping available library at users request", "requested", requested, "libDir", dir)
continue
} else if jetpack != "" && filepath.Base(dir) != "cuda_"+jetpack {
continue
}
}
if dir == "" {
dirs = []string{LibOllamaPath}
} else {

View File

@ -48,16 +48,10 @@ Dynamic LLM libraries [rocm_v6 cpu cpu_avx cpu_avx2 cuda_v12 rocm_v5]
**Experimental LLM Library Override**
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to bypass autodetection, so for example, if you have a CUDA card, but want to force the CPU LLM library with AVX2 vector support, use:
You can set OLLAMA_LLM_LIBRARY to any of the available LLM libraries to limit autodetection, so for example, if you have both CUDA and AMD GPUs, but want to force the CUDA v13 only, use:
```shell
OLLAMA_LLM_LIBRARY="cpu_avx2" ollama serve
```
You can see what features your CPU has with the following.
```shell
cat /proc/cpuinfo| grep flags | head -1
OLLAMA_LLM_LIBRARY="cuda_v13" ollama serve
```
## Installing older or pre-release versions on Linux