| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | package llm | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import ( | 
					
						
							| 
									
										
										
										
											2023-08-31 04:35:03 +08:00
										 |  |  | 	"context" | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	"fmt" | 
					
						
							| 
									
										
										
										
											2023-08-04 06:40:16 +08:00
										 |  |  | 	"log" | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	"os" | 
					
						
							| 
									
										
										
										
											2023-10-06 00:53:47 +08:00
										 |  |  | 	"runtime" | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-08-04 06:47:36 +08:00
										 |  |  | 	"github.com/pbnjay/memory" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	"github.com/jmorganca/ollama/api" | 
					
						
							| 
									
										
										
										
											2023-10-13 00:34:16 +08:00
										 |  |  | 	"github.com/jmorganca/ollama/format" | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | type LLM interface { | 
					
						
							| 
									
										
										
										
											2023-12-05 13:16:27 +08:00
										 |  |  | 	Predict(context.Context, []int, string, string, func(api.GenerateResponse)) error | 
					
						
							| 
									
										
										
										
											2023-08-31 04:35:03 +08:00
										 |  |  | 	Embedding(context.Context, string) ([]float64, error) | 
					
						
							|  |  |  | 	Encode(context.Context, string) ([]int, error) | 
					
						
							|  |  |  | 	Decode(context.Context, []int) (string, error) | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	SetOptions(api.Options) | 
					
						
							|  |  |  | 	Close() | 
					
						
							| 
									
										
										
										
											2023-08-31 04:35:03 +08:00
										 |  |  | 	Ping(context.Context) error | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-22 03:38:49 +08:00
										 |  |  | func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) { | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	if _, err := os.Stat(model); err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 	f, err := os.Open(model) | 
					
						
							|  |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							| 
									
										
										
										
											2023-08-15 07:08:02 +08:00
										 |  |  | 	defer f.Close() | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-08 01:55:37 +08:00
										 |  |  | 	ggml, err := DecodeGGML(f) | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	if err != nil { | 
					
						
							|  |  |  | 		return nil, err | 
					
						
							|  |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-06 00:53:47 +08:00
										 |  |  | 	if runtime.GOOS == "darwin" { | 
					
						
							|  |  |  | 		switch ggml.FileType() { | 
					
						
							| 
									
										
										
										
											2023-11-21 05:44:12 +08:00
										 |  |  | 		case "F32", "Q5_0", "Q5_1", "Q8_0": | 
					
						
							| 
									
										
										
										
											2023-10-06 00:53:47 +08:00
										 |  |  | 			if ggml.Name() != "gguf" && opts.NumGPU != 0 { | 
					
						
							|  |  |  | 				// GGML Q8_0 do not support Metal API and will
 | 
					
						
							|  |  |  | 				// cause the runner to segmentation fault so disable GPU
 | 
					
						
							|  |  |  | 				log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0") | 
					
						
							|  |  |  | 				opts.NumGPU = 0 | 
					
						
							|  |  |  | 			} | 
					
						
							| 
									
										
										
										
											2023-08-04 06:40:16 +08:00
										 |  |  | 		} | 
					
						
							| 
									
										
										
										
											2023-10-13 00:47:17 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 |  |  | 		var requiredMemory int64 | 
					
						
							|  |  |  | 		var f16Multiplier int64 = 2 | 
					
						
							| 
									
										
										
										
											2023-10-13 01:36:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 |  |  | 		switch ggml.ModelType() { | 
					
						
							|  |  |  | 		case "3B", "7B": | 
					
						
							|  |  |  | 			requiredMemory = 8 * format.GigaByte | 
					
						
							|  |  |  | 		case "13B": | 
					
						
							|  |  |  | 			requiredMemory = 16 * format.GigaByte | 
					
						
							|  |  |  | 		case "30B", "34B", "40B": | 
					
						
							|  |  |  | 			requiredMemory = 32 * format.GigaByte | 
					
						
							|  |  |  | 		case "65B", "70B": | 
					
						
							|  |  |  | 			requiredMemory = 64 * format.GigaByte | 
					
						
							|  |  |  | 		case "180B": | 
					
						
							|  |  |  | 			requiredMemory = 128 * format.GigaByte | 
					
						
							|  |  |  | 			f16Multiplier = 4 | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2023-10-13 01:36:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 |  |  | 		systemMemory := int64(memory.TotalMemory()) | 
					
						
							| 
									
										
										
										
											2023-10-13 01:36:23 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-14 05:41:51 +08:00
										 |  |  | 		if ggml.FileType() == "F16" && requiredMemory*f16Multiplier > systemMemory { | 
					
						
							|  |  |  | 			return nil, fmt.Errorf("F16 model requires at least %s of total memory", format.HumanBytes(requiredMemory)) | 
					
						
							|  |  |  | 		} else if requiredMemory > systemMemory { | 
					
						
							|  |  |  | 			return nil, fmt.Errorf("model requires at least %s of total memory", format.HumanBytes(requiredMemory)) | 
					
						
							|  |  |  | 		} | 
					
						
							| 
									
										
										
										
											2023-08-04 06:47:36 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-08 01:55:37 +08:00
										 |  |  | 	switch ggml.Name() { | 
					
						
							|  |  |  | 	case "gguf": | 
					
						
							| 
									
										
										
										
											2023-11-02 23:41:30 +08:00
										 |  |  | 		// TODO: gguf will load these options automatically from the model binary
 | 
					
						
							|  |  |  | 		opts.NumGQA = 0 | 
					
						
							|  |  |  | 		opts.RopeFrequencyBase = 0.0 | 
					
						
							|  |  |  | 		opts.RopeFrequencyScale = 0.0 | 
					
						
							| 
									
										
										
										
											2023-10-20 02:50:45 +08:00
										 |  |  | 		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts) | 
					
						
							| 
									
										
										
										
											2023-09-08 01:55:37 +08:00
										 |  |  | 	case "ggml", "ggmf", "ggjt", "ggla": | 
					
						
							| 
									
										
										
										
											2023-10-20 02:50:45 +08:00
										 |  |  | 		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts) | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	default: | 
					
						
							| 
									
										
										
										
											2023-08-18 02:37:27 +08:00
										 |  |  | 		return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily()) | 
					
						
							| 
									
										
										
										
											2023-07-22 04:33:56 +08:00
										 |  |  | 	} | 
					
						
							|  |  |  | } |