mirror of https://github.com/ollama/ollama.git
ggml: update qwen25vl vision size estimate (#10711)
This commit is contained in:
parent
ff80718e9c
commit
bd68d3ae50
|
@ -6,7 +6,6 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"math"
|
|
||||||
"slices"
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
@ -653,24 +652,15 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
|
||||||
numPatches*numPatches*headCount)
|
numPatches*numPatches*headCount)
|
||||||
case "qwen25vl":
|
case "qwen25vl":
|
||||||
maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280))
|
maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280))
|
||||||
mergeSize := uint64(llm.KV().Uint("vision.spatial_merge_size", 2))
|
|
||||||
temporalPatchSize := uint64(2)
|
|
||||||
|
|
||||||
// Calculate max possible patches based on max_pixels
|
numPatches := maxPixels / (patchSize * patchSize)
|
||||||
maxHeight := uint64(math.Sqrt(float64(maxPixels)))
|
|
||||||
maxWidth := maxPixels / maxHeight
|
|
||||||
maxGridHeight := maxHeight / patchSize
|
|
||||||
maxGridWidth := maxWidth / patchSize
|
|
||||||
// Account for merged patches (2x2 grid)
|
|
||||||
numPatches := (maxGridHeight * maxGridWidth) / (mergeSize * mergeSize)
|
|
||||||
|
|
||||||
// Calculate graph size based on typical operations in ProcessImage and createPatches
|
|
||||||
graphSize = 4 * (maxPixels*numChannels + // Original image storage
|
graphSize = 4 * (maxPixels*numChannels + // Original image storage
|
||||||
// Normalized pixels
|
// Normalized pixels
|
||||||
maxPixels*numChannels +
|
maxPixels*numChannels +
|
||||||
// Patches storage (numPatches * channels * temporalPatchSize * patchSize^2)
|
// Patches storage (numPatches * channels * patchSize^2)
|
||||||
numPatches*numChannels*temporalPatchSize*patchSize*patchSize +
|
numPatches*numChannels*patchSize*patchSize +
|
||||||
// Self-attention calculations (similar to other architectures)
|
// Self-attention calculations
|
||||||
numPatches*numPatches*headCount +
|
numPatches*numPatches*headCount +
|
||||||
// Additional buffer for processing
|
// Additional buffer for processing
|
||||||
embeddingLength*numPatches)
|
embeddingLength*numPatches)
|
||||||
|
|
Loading…
Reference in New Issue