mirror of https://github.com/ollama/ollama.git
289 lines
5.8 KiB
Go
289 lines
5.8 KiB
Go
package gguf
|
|
|
|
import (
|
|
"log/slog"
|
|
"strings"
|
|
)
|
|
|
|
type TensorInfo struct {
|
|
Name string
|
|
Offset uint64
|
|
Shape []uint64
|
|
Type TensorType
|
|
}
|
|
|
|
func (ti TensorInfo) Valid() bool {
|
|
return ti.Name != "" && ti.NumBytes() > 0
|
|
}
|
|
|
|
func (ti TensorInfo) NumValues() int64 {
|
|
var numItems int64 = 1
|
|
for _, dim := range ti.Shape {
|
|
numItems *= int64(dim)
|
|
}
|
|
return numItems
|
|
}
|
|
|
|
// NumBytes returns the number of bytes in the tensor.
|
|
func (ti TensorInfo) NumBytes() int64 {
|
|
return int64(float64(ti.NumValues()) * ti.Type.NumBytes())
|
|
}
|
|
|
|
func (ti TensorInfo) LogValue() slog.Value {
|
|
return slog.GroupValue(
|
|
slog.String("name", ti.Name),
|
|
slog.Int64("offset", int64(ti.Offset)),
|
|
slog.Any("shape", ti.Shape),
|
|
slog.Int64("num_values", ti.NumValues()),
|
|
slog.Int64("num_bytes", ti.NumBytes()),
|
|
slog.Any("type", ti.Type),
|
|
)
|
|
}
|
|
|
|
type TensorType uint32
|
|
|
|
const (
|
|
TensorTypeF32 TensorType = iota
|
|
TensorTypeF16
|
|
TensorTypeQ4_0
|
|
TensorTypeQ4_1
|
|
|
|
// unexported // unused in gguf
|
|
tensorTypeQ4_2
|
|
tensorTypeQ4_3
|
|
|
|
TensorTypeQ5_0
|
|
TensorTypeQ5_1
|
|
TensorTypeQ8_0
|
|
TensorTypeQ8_1
|
|
TensorTypeQ2_K
|
|
TensorTypeQ3_K
|
|
TensorTypeQ4_K
|
|
TensorTypeQ5_K
|
|
TensorTypeQ6_K
|
|
TensorTypeQ8_K
|
|
|
|
// unexported // unquantizable by ollama
|
|
tensorTypeIQ2_XXS
|
|
tensorTypeIQ2_XS
|
|
tensorTypeIQ3_XXS
|
|
tensorTypeIQ1_S
|
|
tensorTypeIQ4_NL
|
|
tensorTypeIQ3_S
|
|
tensorTypeIQ2_S
|
|
tensorTypeIQ4_XS
|
|
|
|
TensorTypeI8
|
|
TensorTypeI16
|
|
TensorTypeI32
|
|
TensorTypeI64
|
|
TensorTypeF64
|
|
|
|
// unexported // unquantizable by ollama
|
|
tensorTypeIQ1_M
|
|
|
|
TensorTypeBF16
|
|
|
|
// unexported // unused in gguf
|
|
tensorTypeQ4_0_4_4
|
|
tensorTypeQ4_0_4_8
|
|
tensorTypeQ4_0_8_8
|
|
|
|
// unexported // unquantizable by ollama
|
|
tensorTypeTQ1_0
|
|
tensorTypeTQ2_0
|
|
|
|
// unexported // unused in gguf
|
|
tensorTypeIQ4_NL_4_4
|
|
tensorTypeIQ4_NL_4_8
|
|
tensorTypeIQ4_NL_8_8
|
|
)
|
|
|
|
func (tt TensorType) NumBytes() float64 {
|
|
return float64(tt.typeSize()) / float64(tt.blockSize())
|
|
}
|
|
|
|
func (tt TensorType) typeSize() int64 {
|
|
switch tt {
|
|
case TensorTypeF32:
|
|
return 4
|
|
case TensorTypeF16:
|
|
return 2
|
|
case TensorTypeQ4_0:
|
|
return 2 + tt.blockSize()/2
|
|
case TensorTypeQ4_1:
|
|
return 2 + 2 + tt.blockSize()/2
|
|
case TensorTypeQ5_0:
|
|
return 2 + 4 + tt.blockSize()/2
|
|
case TensorTypeQ5_1:
|
|
return 2 + 2 + 4 + tt.blockSize()/2
|
|
case TensorTypeQ8_0:
|
|
return 2 + tt.blockSize()
|
|
case TensorTypeQ8_1:
|
|
return 2 + 2 + tt.blockSize()
|
|
case TensorTypeQ2_K:
|
|
return tt.blockSize()/16 + tt.blockSize()/4 + 2 + 2
|
|
case TensorTypeQ3_K:
|
|
return tt.blockSize()/8 + tt.blockSize()/4 + 12 + 2
|
|
case TensorTypeQ4_K:
|
|
return 2 + 2 + 12 + tt.blockSize()/2
|
|
case TensorTypeQ5_K:
|
|
return 2 + 2 + 12 + tt.blockSize()/8 + tt.blockSize()/2
|
|
case TensorTypeQ6_K:
|
|
return tt.blockSize()/2 + tt.blockSize()/4 + tt.blockSize()/16 + 2
|
|
case TensorTypeQ8_K:
|
|
return 4 + tt.blockSize() + 2*tt.blockSize()/16
|
|
case tensorTypeIQ2_XXS:
|
|
return 2 + 2*tt.blockSize()/8
|
|
case tensorTypeIQ2_XS:
|
|
return 2 + 2*tt.blockSize()/8 + tt.blockSize()/32
|
|
case tensorTypeIQ3_XXS:
|
|
return 2 + tt.blockSize()/4 + tt.blockSize()/8
|
|
case tensorTypeIQ1_S:
|
|
return 2 + tt.blockSize()/8 + tt.blockSize()/16
|
|
case tensorTypeIQ4_NL:
|
|
return 2 + tt.blockSize()/2
|
|
case tensorTypeIQ3_S:
|
|
return 2 + tt.blockSize()/4 + tt.blockSize()/8 + tt.blockSize()/32 + 4
|
|
case tensorTypeIQ2_S:
|
|
return 2 + tt.blockSize()/4 + tt.blockSize()/16
|
|
case tensorTypeIQ4_XS:
|
|
return 2 + 2 + tt.blockSize()/2 + tt.blockSize()/64
|
|
case TensorTypeI8:
|
|
return 1
|
|
case TensorTypeI16:
|
|
return 2
|
|
case TensorTypeI32:
|
|
return 4
|
|
case TensorTypeI64:
|
|
return 8
|
|
case TensorTypeF64:
|
|
return 8
|
|
case tensorTypeIQ1_M:
|
|
return tt.blockSize()/8 + tt.blockSize()/16 + tt.blockSize()/32
|
|
case TensorTypeBF16:
|
|
return 2
|
|
default:
|
|
return 0
|
|
}
|
|
}
|
|
|
|
func (tt TensorType) blockSize() int64 {
|
|
switch tt {
|
|
case TensorTypeF32,
|
|
TensorTypeF16,
|
|
TensorTypeI8,
|
|
TensorTypeI16,
|
|
TensorTypeI32,
|
|
TensorTypeI64,
|
|
TensorTypeF64,
|
|
TensorTypeBF16:
|
|
return 1
|
|
case TensorTypeQ4_0,
|
|
TensorTypeQ4_1,
|
|
TensorTypeQ5_0,
|
|
TensorTypeQ5_1,
|
|
TensorTypeQ8_0,
|
|
TensorTypeQ8_1,
|
|
tensorTypeIQ4_NL:
|
|
return 32
|
|
default:
|
|
return 256
|
|
}
|
|
}
|
|
|
|
func (tt TensorType) String() string {
|
|
switch tt {
|
|
case TensorTypeF32:
|
|
return "f32"
|
|
case TensorTypeF16:
|
|
return "f16"
|
|
case TensorTypeQ4_0:
|
|
return "q4_0"
|
|
case TensorTypeQ4_1:
|
|
return "q4_1"
|
|
case tensorTypeQ4_2:
|
|
return "q4_2"
|
|
case tensorTypeQ4_3:
|
|
return "q4_3"
|
|
case TensorTypeQ5_0:
|
|
return "q5_0"
|
|
case TensorTypeQ5_1:
|
|
return "q5_1"
|
|
case TensorTypeQ8_0:
|
|
return "q8_0"
|
|
case TensorTypeQ8_1:
|
|
return "q8_1"
|
|
case TensorTypeQ2_K:
|
|
return "q2_k"
|
|
case TensorTypeQ3_K:
|
|
return "q3_k"
|
|
case TensorTypeQ4_K:
|
|
return "q4_k"
|
|
case TensorTypeQ5_K:
|
|
return "q5_k"
|
|
case TensorTypeQ6_K:
|
|
return "q6_k"
|
|
case TensorTypeQ8_K:
|
|
return "q8_k"
|
|
case tensorTypeIQ2_XXS:
|
|
return "iq2_xxs"
|
|
case tensorTypeIQ2_XS:
|
|
return "iq2_xs"
|
|
case tensorTypeIQ3_XXS:
|
|
return "iq3_xxs"
|
|
case tensorTypeIQ1_S:
|
|
return "iq1_s"
|
|
case tensorTypeIQ4_NL:
|
|
return "iq4_nl"
|
|
case tensorTypeIQ3_S:
|
|
return "iq3_s"
|
|
case tensorTypeIQ2_S:
|
|
return "iq2_s"
|
|
case tensorTypeIQ4_XS:
|
|
return "iq4_xs"
|
|
case TensorTypeI8:
|
|
return "i8"
|
|
case TensorTypeI16:
|
|
return "i16"
|
|
case TensorTypeI32:
|
|
return "i32"
|
|
case TensorTypeI64:
|
|
return "i64"
|
|
case TensorTypeF64:
|
|
return "f64"
|
|
case tensorTypeIQ1_M:
|
|
return "iq1_m"
|
|
case TensorTypeBF16:
|
|
return "bf16"
|
|
case tensorTypeQ4_0_4_4:
|
|
return "q4_0_4_4"
|
|
case tensorTypeQ4_0_4_8:
|
|
return "q4_0_4_8"
|
|
case tensorTypeQ4_0_8_8:
|
|
return "q4_0_8_8"
|
|
case tensorTypeTQ1_0:
|
|
return "tq1_0"
|
|
case tensorTypeTQ2_0:
|
|
return "tq2_0"
|
|
case tensorTypeIQ4_NL_4_4:
|
|
return "iq4_nl_4_4"
|
|
case tensorTypeIQ4_NL_4_8:
|
|
return "iq4_nl_4_8"
|
|
case tensorTypeIQ4_NL_8_8:
|
|
return "iq4_nl_8_8"
|
|
default:
|
|
return "unknown"
|
|
}
|
|
}
|
|
|
|
func (tt TensorType) LogValue() slog.Value {
|
|
return slog.GroupValue(
|
|
slog.Uint64("value", uint64(tt)),
|
|
slog.String("name", strings.ToUpper(tt.String())),
|
|
slog.Int64("size", tt.typeSize()),
|
|
slog.Int64("block_size", tt.blockSize()),
|
|
slog.Float64("num_bytes", tt.NumBytes()),
|
|
)
|
|
}
|