ollama/model/models/gemma3/process_image.go

53 lines
1.3 KiB
Go
Raw Normal View History

2025-02-08 07:58:15 +08:00
package gemma3
import (
"image"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model/imageproc"
)
type ImageProcessor struct {
2025-03-07 04:16:54 +08:00
imageSize, patchSize, numChannels int
2025-02-08 07:58:15 +08:00
}
func newImageProcessor(c ml.Config) ImageProcessor {
return ImageProcessor{
imageSize: int(c.Uint("vision.image_size")),
2025-03-07 04:16:54 +08:00
patchSize: int(c.Uint("vision.patch_size")),
2025-02-08 07:58:15 +08:00
numChannels: int(c.Uint("vision.num_channels")),
}
}
func (p *ImageProcessor) pack(img image.Image, mean, std [3]float32) []float32 {
var pixelVals []float32
bounds := img.Bounds()
2025-03-09 04:31:57 +08:00
for x := bounds.Min.X; x < bounds.Max.X; x++ {
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
2025-02-08 07:58:15 +08:00
c := img.At(x, y)
r, g, b, _ := c.RGBA()
rVal := float32(r>>8) / 255.0
gVal := float32(g>>8) / 255.0
bVal := float32(b>>8) / 255.0
rVal = (rVal - mean[0]) / std[0]
gVal = (gVal - mean[1]) / std[1]
bVal = (bVal - mean[2]) / std[2]
2025-03-09 04:31:57 +08:00
pixelVals = append(pixelVals, rVal, gVal, bVal)
2025-02-08 07:58:15 +08:00
}
}
return pixelVals
}
func (p ImageProcessor) ProcessImage(img image.Image) ([]float32, error) {
outputSize := image.Point{p.imageSize, p.imageSize}
newImage := imageproc.Composite(img)
newImage = imageproc.Resize(newImage, outputSize, imageproc.ResizeBilinear)
data := p.pack(newImage, imageproc.ImageNetStandardMean, imageproc.ImageNetStandardSTD)
return data, nil
}