2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								package llm
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								import (
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-31 04:35:03 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									"context"
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									"fmt"
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-04 06:40:16 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									"log"
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									"os"
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-06 00:53:47 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									"runtime"
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-04 06:47:36 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									"github.com/pbnjay/memory"
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									"github.com/jmorganca/ollama/api"
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-13 00:34:16 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									"github.com/jmorganca/ollama/format"
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								type LLM interface {
							 | 
						
					
						
							
								
									
										
										
										
											2023-12-06 03:57:33 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									Predict(context.Context, PredictOpts, func(PredictResult)) error
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-31 04:35:03 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									Embedding(context.Context, string) ([]float64, error)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									Encode(context.Context, string) ([]int, error)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									Decode(context.Context, []int) (string, error)
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									SetOptions(api.Options)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									Close()
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-31 04:35:03 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									Ping(context.Context) error
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-12-01 02:30:23 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									if _, err := os.Stat(model); err != nil {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										return nil, err
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									f, err := os.Open(model)
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									if err != nil {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										return nil, err
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									}
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-15 07:08:02 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									defer f.Close()
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-08 01:55:37 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									ggml, err := DecodeGGML(f)
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									if err != nil {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										return nil, err
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-06 00:53:47 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									if runtime.GOOS == "darwin" {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										switch ggml.FileType() {
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-21 05:44:12 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										case "F32", "Q5_0", "Q5_1", "Q8_0":
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-06 00:53:47 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
											if ggml.Name() != "gguf" && opts.NumGPU != 0 {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
												// GGML Q8_0 do not support Metal API and will
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
												// cause the runner to segmentation fault so disable GPU
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
												log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
												opts.NumGPU = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											}
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-04 06:40:16 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										}
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-13 00:47:17 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										var requiredMemory int64
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										var f16Multiplier int64 = 2
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-13 01:36:23 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										switch ggml.ModelType() {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										case "3B", "7B":
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											requiredMemory = 8 * format.GigaByte
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										case "13B":
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											requiredMemory = 16 * format.GigaByte
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										case "30B", "34B", "40B":
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											requiredMemory = 32 * format.GigaByte
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										case "65B", "70B":
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											requiredMemory = 64 * format.GigaByte
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										case "180B":
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											requiredMemory = 128 * format.GigaByte
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											f16Multiplier = 4
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										}
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-13 01:36:23 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										systemMemory := int64(memory.TotalMemory())
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-13 01:36:23 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										if ggml.FileType() == "F16" && requiredMemory*f16Multiplier > systemMemory {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											return nil, fmt.Errorf("F16 model requires at least %s of total memory", format.HumanBytes(requiredMemory))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										} else if requiredMemory > systemMemory {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
											return nil, fmt.Errorf("model requires at least %s of total memory", format.HumanBytes(requiredMemory))
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										}
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-04 06:47:36 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-08 01:55:37 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									switch ggml.Name() {
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
									case "gguf":
							 | 
						
					
						
							
								
									
										
										
										
											2023-11-02 23:41:30 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										// TODO: gguf will load these options automatically from the model binary
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										opts.NumGQA = 0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										opts.RopeFrequencyBase = 0.0
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
										opts.RopeFrequencyScale = 0.0
							 | 
						
					
						
							
								
									
										
										
										
											2023-12-01 02:30:23 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										return newLlama(model, adapters, projectors, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
							 | 
						
					
						
							
								
									
										
										
										
											2023-09-08 01:55:37 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
									case "ggml", "ggmf", "ggjt", "ggla":
							 | 
						
					
						
							
								
									
										
										
										
											2023-12-01 02:30:23 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										return newLlama(model, adapters, projectors, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									default:
							 | 
						
					
						
							
								
									
										
										
										
											2023-08-18 02:37:27 +08:00
										 
									 
								 
							 | 
							
								
									
										
									
								
							 | 
							
								
							 | 
							
							
										return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
							 | 
						
					
						
							
								
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 
									 
								 
							 | 
							
								
							 | 
							
								
							 | 
							
							
									}
							 | 
						
					
						
							| 
								
							 | 
							
								
							 | 
							
								
							 | 
							
							
								}
							 |