ollama/model/models/gemma3/process_image.go

59 lines
1.5 KiB
Go
Raw Normal View History

2025-02-08 07:58:15 +08:00
package gemma3
import (
"image"
"github.com/ollama/ollama/ml"
"github.com/ollama/ollama/model/imageproc"
)
type ImageProcessor struct {
2025-03-07 04:16:54 +08:00
imageSize, patchSize, numChannels int
2025-02-08 07:58:15 +08:00
}
func newImageProcessor(c ml.Config) ImageProcessor {
return ImageProcessor{
imageSize: int(c.Uint("vision.image_size")),
2025-03-07 04:16:54 +08:00
patchSize: int(c.Uint("vision.patch_size")),
2025-02-08 07:58:15 +08:00
numChannels: int(c.Uint("vision.num_channels")),
}
}
func (p *ImageProcessor) pack(img image.Image, mean, std [3]float32) []float32 {
2025-03-10 07:18:13 +08:00
var pixelVals, rVals, gVals, bVals []float32
2025-02-08 07:58:15 +08:00
bounds := img.Bounds()
2025-03-10 07:18:13 +08:00
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
for x := bounds.Min.X; x < bounds.Max.X; x++ {
2025-02-08 07:58:15 +08:00
c := img.At(x, y)
r, g, b, _ := c.RGBA()
rVal := float32(r>>8) / 255.0
gVal := float32(g>>8) / 255.0
bVal := float32(b>>8) / 255.0
rVal = (rVal - mean[0]) / std[0]
gVal = (gVal - mean[1]) / std[1]
bVal = (bVal - mean[2]) / std[2]
2025-03-10 07:18:13 +08:00
rVals = append(rVals, rVal)
gVals = append(gVals, gVal)
bVals = append(bVals, bVal)
2025-02-08 07:58:15 +08:00
}
}
2025-03-10 07:18:13 +08:00
pixelVals = append(pixelVals, rVals...)
pixelVals = append(pixelVals, gVals...)
pixelVals = append(pixelVals, bVals...)
2025-02-08 07:58:15 +08:00
return pixelVals
}
func (p ImageProcessor) ProcessImage(img image.Image) ([]float32, error) {
outputSize := image.Point{p.imageSize, p.imageSize}
newImage := imageproc.Composite(img)
newImage = imageproc.Resize(newImage, outputSize, imageproc.ResizeBilinear)
data := p.pack(newImage, imageproc.ImageNetStandardMean, imageproc.ImageNetStandardSTD)
return data, nil
}