mirror of https://github.com/ollama/ollama.git
332 lines
8.5 KiB
Go
332 lines
8.5 KiB
Go
package model
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
_ "image/jpeg"
|
|
_ "image/png"
|
|
"log/slog"
|
|
"os"
|
|
"reflect"
|
|
"strconv"
|
|
"strings"
|
|
|
|
_ "golang.org/x/image/bmp"
|
|
_ "golang.org/x/image/tiff"
|
|
_ "golang.org/x/image/webp"
|
|
|
|
"github.com/ollama/ollama/fs"
|
|
fsggml "github.com/ollama/ollama/fs/ggml"
|
|
"github.com/ollama/ollama/kvcache"
|
|
"github.com/ollama/ollama/logutil"
|
|
"github.com/ollama/ollama/ml"
|
|
_ "github.com/ollama/ollama/ml/backend"
|
|
"github.com/ollama/ollama/ml/nn/pooling"
|
|
"github.com/ollama/ollama/model/input"
|
|
)
|
|
|
|
var (
|
|
ErrNoVisionModel = errors.New("this model is missing data required for image input")
|
|
ErrUnsupportedModel = errors.New("model not supported")
|
|
ErrUnsupportedTokenizer = errors.New("tokenizer not supported")
|
|
)
|
|
|
|
// Model implements a specific model architecture, defining the forward pass and any model-specific configuration
|
|
type Model interface {
|
|
Forward(ml.Context, input.Batch) (ml.Tensor, error)
|
|
|
|
Backend() ml.Backend
|
|
Config() config
|
|
}
|
|
|
|
// MultimodalProcessor must be implemented by multimodal models.
|
|
type MultimodalProcessor interface {
|
|
// EncodeMultimodal processes a single input (such as an image) and
|
|
// generates an output (typically an embedding) that can be used by the model.
|
|
//
|
|
// The return value is one or more tensors, each with optional model-specific
|
|
// opaque metadata. Typically, the tensors might be views into an embedding
|
|
// with each view representing a chunk of data that can be processed independently
|
|
// in different batches.
|
|
//
|
|
// The result may be cached by the runner.
|
|
EncodeMultimodal(ml.Context, []byte) ([]input.Multimodal, error)
|
|
|
|
// PostTokenize is called after tokenization to allow the model to edit the
|
|
// input stream to correctly arrange multimodal elements.
|
|
//
|
|
// The input is a slice of tokens with the results of EncodeMultimodal interleaved
|
|
// in the order that the user provided them. Each element of the slice will be
|
|
// either a single token or single multimodal object.
|
|
//
|
|
// The model must ensure that inputs are stored according to how they will be
|
|
// processed and stored in the cache. For example, Llava-style models should insert
|
|
// placeholder tokens equal to the feature size of the corresponding image with
|
|
// the image itself attached to and split across these tokens. When Forward is called
|
|
// a partial subset of these tokens may be submitted according to the batch size.
|
|
//
|
|
// This function is also responsible for updating MultimodalHash for any Multimodal
|
|
// that is modified to ensure that there is a unique hash value that accurately
|
|
// represents the contents.
|
|
PostTokenize([]*input.Input) ([]*input.Input, error)
|
|
}
|
|
|
|
// Base implements the common fields and methods for all models
|
|
type Base struct {
|
|
b ml.Backend
|
|
config
|
|
}
|
|
|
|
type config struct {
|
|
Cache kvcache.Cache
|
|
}
|
|
|
|
// Backend returns the underlying backend that will run the model
|
|
func (m *Base) Backend() ml.Backend {
|
|
return m.b
|
|
}
|
|
|
|
func (m *Base) Config() config {
|
|
return m.config
|
|
}
|
|
|
|
var models = make(map[string]func(fs.Config) (Model, error))
|
|
|
|
// Register registers a model constructor for the given architecture
|
|
func Register(name string, f func(fs.Config) (Model, error)) {
|
|
if _, ok := models[name]; ok {
|
|
panic("model: model already registered")
|
|
}
|
|
|
|
models[name] = f
|
|
}
|
|
|
|
// New initializes a new model instance with the provided configuration based on the metadata in the model file
|
|
func New(modelPath string, params ml.BackendParams) (Model, error) {
|
|
b, err := ml.NewBackend(modelPath, params)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
m, err := modelForArch(b.Config())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
base := Base{b: b, config: m.Config()}
|
|
v := reflect.ValueOf(m)
|
|
v.Elem().Set(populateFields(base, v.Elem()))
|
|
return m, nil
|
|
}
|
|
|
|
func NewTextProcessor(s string) (TextProcessor, error) {
|
|
r, err := os.Open(s)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer r.Close()
|
|
|
|
meta, err := fsggml.Decode(r, -1)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
m, err := modelForArch(meta.KV())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
tp, ok := m.(TextProcessor)
|
|
if !ok {
|
|
return nil, ErrUnsupportedTokenizer
|
|
}
|
|
return tp, nil
|
|
}
|
|
|
|
func modelForArch(c fs.Config) (Model, error) {
|
|
arch := c.Architecture()
|
|
if pooling.Type(c.Uint("pooling_type")) != pooling.TypeNone {
|
|
arch = arch + "_embed"
|
|
}
|
|
|
|
f, ok := models[arch]
|
|
if !ok {
|
|
return nil, ErrUnsupportedModel
|
|
}
|
|
|
|
return f(c)
|
|
}
|
|
|
|
func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|
t := v.Type()
|
|
|
|
if t.Kind() == reflect.Struct {
|
|
allNil := true
|
|
for i := range t.NumField() {
|
|
tt := t.Field(i).Type
|
|
vv := v.Field(i)
|
|
if !vv.CanSet() {
|
|
continue
|
|
}
|
|
|
|
// make a copy
|
|
tagsCopy := tags
|
|
if tag := t.Field(i).Tag.Get("gguf"); tag != "" {
|
|
tagsCopy = append(tagsCopy, parseTag(tag))
|
|
}
|
|
|
|
if tt == reflect.TypeOf((*Base)(nil)).Elem() {
|
|
vv.Set(reflect.ValueOf(base))
|
|
} else if tt == reflect.TypeOf((*ml.Tensor)(nil)).Elem() {
|
|
var fn func([]Tag, string, string) [][]string
|
|
fn = func(tags []Tag, prefix, suffix string) (fullNames [][]string) {
|
|
if len(tags) > 0 {
|
|
var names []string
|
|
if tags[0].name != "" {
|
|
for _, n := range append([]string{tags[0].name}, tags[0].alternatives...) {
|
|
names = append(names, prefix+n+suffix)
|
|
}
|
|
}
|
|
|
|
if childNames := fn(tags[1:], tags[0].prefix, tags[0].suffix); len(childNames) == 0 {
|
|
// no child names, append current names
|
|
fullNames = append(fullNames, names)
|
|
} else if len(names) == 0 {
|
|
// no current names, append child names
|
|
fullNames = append(fullNames, childNames...)
|
|
} else {
|
|
// combine current and child names
|
|
for _, name := range names {
|
|
for _, childName := range childNames {
|
|
fullNames = append(fullNames, append([]string{name}, childName...))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return fullNames
|
|
}
|
|
|
|
names := fn(tagsCopy, "", "")
|
|
for _, name := range names {
|
|
if tensor := base.Backend().Get(strings.Join(name, ".")); tensor != nil {
|
|
logutil.Trace("found tensor", "", tensor)
|
|
vv.Set(reflect.ValueOf(tensor))
|
|
break
|
|
}
|
|
}
|
|
} else if tt.Kind() == reflect.Pointer || tt.Kind() == reflect.Interface {
|
|
setPointer(base, vv, tagsCopy)
|
|
} else if tt.Kind() == reflect.Slice || tt.Kind() == reflect.Array {
|
|
for i := range vv.Len() {
|
|
vvv := vv.Index(i)
|
|
if vvv.Kind() == reflect.Pointer || vvv.Kind() == reflect.Interface {
|
|
setPointer(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)}))
|
|
} else {
|
|
vvv.Set(populateFields(base, vvv, append(tagsCopy, Tag{name: strconv.Itoa(i)})...))
|
|
}
|
|
}
|
|
}
|
|
|
|
if !canNil(tt) || !vv.IsNil() {
|
|
allNil = false
|
|
}
|
|
}
|
|
|
|
if allNil {
|
|
return reflect.Zero(t)
|
|
}
|
|
}
|
|
|
|
return v
|
|
}
|
|
|
|
func setPointer(base Base, v reflect.Value, tags []Tag) {
|
|
vv := v
|
|
if v.Kind() == reflect.Interface {
|
|
if v.IsNil() {
|
|
return
|
|
}
|
|
|
|
vv = vv.Elem()
|
|
}
|
|
|
|
vv = reflect.Indirect(vv)
|
|
if v.IsNil() {
|
|
vv = reflect.New(v.Type().Elem()).Elem()
|
|
}
|
|
|
|
if f := populateFields(base, vv, tags...); f.CanAddr() {
|
|
v.Set(f.Addr())
|
|
}
|
|
}
|
|
|
|
type Tag struct {
|
|
name,
|
|
// prefix and suffix are applied to child tags
|
|
prefix,
|
|
suffix string
|
|
alternatives []string
|
|
}
|
|
|
|
func parseTag(s string) (tag Tag) {
|
|
parts := strings.Split(s, ",")
|
|
if len(parts) > 0 {
|
|
tag.name = parts[0]
|
|
|
|
for _, part := range parts[1:] {
|
|
if value, ok := strings.CutPrefix(part, "alt:"); ok && tag.name == "" {
|
|
// elevate alternative to primary if no primary given
|
|
tag.name = value
|
|
slog.Warn("gguf tag has alt: but no primary name", "tag", s)
|
|
} else if ok {
|
|
tag.alternatives = append(tag.alternatives, value)
|
|
}
|
|
if value, ok := strings.CutPrefix(part, "pre:"); ok {
|
|
tag.prefix = value
|
|
}
|
|
if value, ok := strings.CutPrefix(part, "suf:"); ok {
|
|
tag.suffix = value
|
|
}
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func canNil(t reflect.Type) bool {
|
|
return t.Kind() == reflect.Chan ||
|
|
t.Kind() == reflect.Func ||
|
|
t.Kind() == reflect.Interface ||
|
|
t.Kind() == reflect.Map ||
|
|
t.Kind() == reflect.Pointer ||
|
|
t.Kind() == reflect.Slice
|
|
}
|
|
|
|
func Forward(ctx ml.Context, m Model, batch input.Batch) (ml.Tensor, error) {
|
|
if len(batch.Positions) != len(batch.Sequences) {
|
|
return nil, fmt.Errorf("length of positions (%v) must match length of seqs (%v)", len(batch.Positions), len(batch.Sequences))
|
|
}
|
|
|
|
if len(batch.Positions) < 1 {
|
|
return nil, errors.New("batch size cannot be less than 1")
|
|
}
|
|
|
|
cache := m.Config().Cache
|
|
if cache != nil {
|
|
err := cache.StartForward(ctx, batch, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
t, err := m.Forward(ctx, batch)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ctx.Forward(t)
|
|
|
|
return t, nil
|
|
}
|