| 
									
										
										
										
											2025-02-08 07:58:15 +08:00
										 |  |  | package gemma3 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							|  |  |  | 	"image" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	"github.com/ollama/ollama/ml" | 
					
						
							|  |  |  | 	"github.com/ollama/ollama/model/imageproc" | 
					
						
							|  |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type ImageProcessor struct { | 
					
						
							| 
									
										
										
										
											2025-03-07 04:16:54 +08:00
										 |  |  | 	imageSize, patchSize, numChannels int | 
					
						
							| 
									
										
										
										
											2025-02-08 07:58:15 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func newImageProcessor(c ml.Config) ImageProcessor { | 
					
						
							|  |  |  | 	return ImageProcessor{ | 
					
						
							|  |  |  | 		imageSize:   int(c.Uint("vision.image_size")), | 
					
						
							| 
									
										
										
										
											2025-03-07 04:16:54 +08:00
										 |  |  | 		patchSize:   int(c.Uint("vision.patch_size")), | 
					
						
							| 
									
										
										
										
											2025-02-08 07:58:15 +08:00
										 |  |  | 		numChannels: int(c.Uint("vision.num_channels")), | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (p *ImageProcessor) pack(img image.Image, mean, std [3]float32) []float32 { | 
					
						
							| 
									
										
										
										
											2025-03-10 07:18:13 +08:00
										 |  |  | 	var pixelVals, rVals, gVals, bVals []float32 | 
					
						
							| 
									
										
										
										
											2025-02-08 07:58:15 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 	bounds := img.Bounds() | 
					
						
							| 
									
										
										
										
											2025-03-10 07:18:13 +08:00
										 |  |  | 	for y := bounds.Min.Y; y < bounds.Max.Y; y++ { | 
					
						
							|  |  |  | 		for x := bounds.Min.X; x < bounds.Max.X; x++ { | 
					
						
							| 
									
										
										
										
											2025-02-08 07:58:15 +08:00
										 |  |  | 			c := img.At(x, y) | 
					
						
							|  |  |  | 			r, g, b, _ := c.RGBA() | 
					
						
							|  |  |  | 			rVal := float32(r>>8) / 255.0 | 
					
						
							|  |  |  | 			gVal := float32(g>>8) / 255.0 | 
					
						
							|  |  |  | 			bVal := float32(b>>8) / 255.0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 			rVal = (rVal - mean[0]) / std[0] | 
					
						
							|  |  |  | 			gVal = (gVal - mean[1]) / std[1] | 
					
						
							|  |  |  | 			bVal = (bVal - mean[2]) / std[2] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-10 07:18:13 +08:00
										 |  |  | 			rVals = append(rVals, rVal) | 
					
						
							|  |  |  | 			gVals = append(gVals, gVal) | 
					
						
							|  |  |  | 			bVals = append(bVals, bVal) | 
					
						
							| 
									
										
										
										
											2025-02-08 07:58:15 +08:00
										 |  |  | 		} | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-03-10 07:18:13 +08:00
										 |  |  | 	pixelVals = append(pixelVals, rVals...) | 
					
						
							|  |  |  | 	pixelVals = append(pixelVals, gVals...) | 
					
						
							|  |  |  | 	pixelVals = append(pixelVals, bVals...) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-08 07:58:15 +08:00
										 |  |  | 	return pixelVals | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | func (p ImageProcessor) ProcessImage(img image.Image) ([]float32, error) { | 
					
						
							|  |  |  | 	outputSize := image.Point{p.imageSize, p.imageSize} | 
					
						
							|  |  |  | 	newImage := imageproc.Composite(img) | 
					
						
							|  |  |  | 	newImage = imageproc.Resize(newImage, outputSize, imageproc.ResizeBilinear) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	data := p.pack(newImage, imageproc.ImageNetStandardMean, imageproc.ImageNetStandardSTD) | 
					
						
							|  |  |  | 	return data, nil | 
					
						
							|  |  |  | } |