Jeffrey Morgan
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								3b4bab3dc5 
								
							 
						 
						
							
							
								
								Fix embeddings load model behavior ( #2848 )  
							
							 
							
							
							
						 
						
							2024-02-29 17:40:56 -08:00  
						
					 
				
					
						
							
							
								 
								Ikko Eltociear Ashimine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								e95b896790 
								
							 
						 
						
							
							
								
								Update types.go ( #2744 )  
							
							 
							
							... 
							
							
							
							specfied -> specified 
							
						 
						
							2024-02-25 13:41:25 -05:00  
						
					 
				
					
						
							
							
								 
								bnorick
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								caf2b13c10 
								
							 
						 
						
							
							
								
								Fix infinite keep_alive ( #2480 )  
							
							 
							
							
							
						 
						
							2024-02-13 15:40:32 -08:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								b5cf31b460 
								
							 
						 
						
							
							
								
								add keep_alive to generate/chat/embedding api endpoints ( #2146 )  
							
							 
							
							
							
						 
						
							2024-01-26 14:28:02 -08:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								7c40a67841 
								
							 
						 
						
							
							
								
								Save and load sessions ( #2063 )  
							
							 
							
							
							
						 
						
							2024-01-25 12:12:36 -08:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								745b5934fa 
								
							 
						 
						
							
							
								
								add model to ModelResponse  
							
							 
							
							
							
						 
						
							2024-01-18 14:32:55 -08:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								a38d88d828 
								
							 
						 
						
							
							
								
								api: add model for all requests  
							
							 
							
							... 
							
							
							
							prefer using req.Model and fallback to req.Name 
							
						 
						
							2024-01-18 14:31:37 -08:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
							
							
								
							
							
								22e93efa41 
								
							 
						 
						
							
							
								
								add show info command and fix the modelfile  
							
							 
							
							
							
						 
						
							2024-01-05 12:20:05 -08:00  
						
					 
				
					
						
							
							
								 
								Jeffrey Morgan
							
						 
						
							 
							
							
							
							
								
							
							
								55978c1dc9 
								
							 
						 
						
							
							
								
								clean up cache api option  
							
							 
							
							
							
						 
						
							2023-12-27 14:27:45 -05:00  
						
					 
				
					
						
							
							
								 
								Jeffrey Morgan
							
						 
						
							 
							
							
							
							
								
							
							
								d4ebdadbe7 
								
							 
						 
						
							
							
								
								enable `cache_prompt` by default  
							
							 
							
							
							
						 
						
							2023-12-27 14:23:42 -05:00  
						
					 
				
					
						
							
							
								 
								K0IN
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								10da41d677 
								
							 
						 
						
							
							
								
								Add Cache flag to api ( #1642 )  
							
							 
							
							
							
						 
						
							2023-12-22 17:16:20 -05:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								d99fa6ce0a 
								
							 
						 
						
							
							
								
								send empty messages on last chat response ( #1530 )  
							
							 
							
							
							
						 
						
							2023-12-18 14:23:38 -05:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								d9e60f634b 
								
							 
						 
						
							
							
								
								add image support to the chat api ( #1490 )  
							
							 
							
							
							
						 
						
							2023-12-12 13:28:58 -08:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								910e9401d0 
								
							 
						 
						
							
							
								
								Multimodal support ( #1216 )  
							
							 
							
							... 
							
							
							
							---------
Co-authored-by: Matt Apperson <mattapperson@Matts-MacBook-Pro.local> 
							
						 
						
							2023-12-11 13:56:22 -08:00  
						
					 
				
					
						
							
							
								 
								Jeffrey Morgan
							
						 
						
							 
							
							
							
							
								
							
							
								9e1406e4ed 
								
							 
						 
						
							
							
								
								Don't expose model information in `/api/generate`  
							
							 
							
							
							
						 
						
							2023-12-09 02:05:43 -08:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								5d75505ebd 
								
							 
						 
						
							
							
								
								return model configuration in generate  
							
							 
							
							
							
						 
						
							2023-12-05 14:39:02 -08:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								195e3d9dbd 
								
							 
						 
						
							
							
								
								chat api endpoint ( #1392 )  
							
							 
							
							
							
						 
						
							2023-12-05 14:57:33 -05:00  
						
					 
				
					
						
							
							
								 
								Jeffrey Morgan
							
						 
						
							 
							
							
							
							
								
							
							
								00d06619a1 
								
							 
						 
						
							
							
								
								Revert "chat api ( #991 )" while context variable is fixed  
							
							 
							
							... 
							
							
							
							This reverts commit 7a0899d62d . 
							
						 
						
							2023-12-04 21:16:27 -08:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								7a0899d62d 
								
							 
						 
						
							
							
								
								chat api ( #991 )  
							
							 
							
							... 
							
							
							
							- update chat docs
- add messages chat endpoint
- remove deprecated context and template generate parameters from docs
- context and template are still supported for the time being and will continue to work as expected
- add partial response to chat history 
							
						 
						
							2023-12-04 18:01:06 -05:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								cde31cb220 
								
							 
						 
						
							
							
								
								Allow setting parameters in the REPL ( #1294 )  
							
							 
							
							
							
						 
						
							2023-11-29 09:56:42 -08:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								bc22d5a38b 
								
							 
						 
						
							
							
								
								no blob response  
							
							 
							
							
							
						 
						
							2023-11-15 15:16:23 -08:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								1552cee59f 
								
							 
						 
						
							
							
								
								client create modelfile  
							
							 
							
							
							
						 
						
							2023-11-15 15:16:23 -08:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								3ca56b5ada 
								
							 
						 
						
							
							
								
								add create modelfile field  
							
							 
							
							
							
						 
						
							2023-11-15 15:16:23 -08:00  
						
					 
				
					
						
							
							
								 
								Jeffrey Morgan
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								5cba29b9d6 
								
							 
						 
						
							
							
								
								JSON mode: add `"format" as an api parameter ( #1051 )  
							
							 
							
							... 
							
							
							
							* add `"format": "json"` as an API parameter
---------
Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com> 
							
						 
						
							2023-11-09 16:44:02 -08:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								a49d6acc1e 
								
							 
						 
						
							
							
								
								add a complete /generate options example ( #1035 )  
							
							 
							
							
							
						 
						
							2023-11-08 16:44:36 -08:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								ec2a31e9b3 
								
							 
						 
						
							
							
								
								support raw generation requests ( #952 )  
							
							 
							
							... 
							
							
							
							- add the optional `raw` generate request parameter to bypass prompt formatting and response context
-add raw request to docs 
							
						 
						
							2023-11-08 14:05:02 -08:00  
						
					 
				
					
						
							
							
								 
								Jeffrey Morgan
							
						 
						
							 
							
							
							
							
								
							
							
								17678b7225 
								
							 
						 
						
							
							
								
								Restore system prompt on requests and default `num_keep` to `0`  
							
							 
							
							
							
						 
						
							2023-11-03 13:25:25 -07:00  
						
					 
				
					
						
							
							
								 
								Jeffrey Morgan
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								06589a3b30 
								
							 
						 
						
							
							
								
								Set `NumKeep` to `4` by default ( #982 )  
							
							 
							
							
							
						 
						
							2023-11-02 17:26:11 -07:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								6db3691b8f 
								
							 
						 
						
							
							
								
								update default NumKeep  
							
							 
							
							
							
						 
						
							2023-11-02 15:47:35 -07:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								fe6f3b48f7 
								
							 
						 
						
							
							
								
								do not reload the running llm when runtime params change ( #840 )  
							
							 
							
							... 
							
							
							
							- only reload the running llm if the model has changed, or the options for loading the running model have changed
- rename loaded llm to runner to differentiate from loaded model image
- remove logic which keeps the first system prompt in the generation context 
							
						 
						
							2023-10-19 10:39:58 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								6fe178134d 
								
							 
						 
						
							
							
								
								improve api error handling ( #781 )  
							
							 
							
							... 
							
							
							
							- remove new lines from llama.cpp error messages relayed to client
- check api option types and return error on wrong type
- change num layers from 95% VRAM to 92% VRAM 
							
						 
						
							2023-10-13 16:57:10 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								7804b8fab9 
								
							 
						 
						
							
							
								
								validate api options fields from map ( #711 )  
							
							 
							
							
							
						 
						
							2023-10-12 11:18:11 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								274d5a5fdf 
								
							 
						 
						
							
							
								
								optional parameter to not stream response ( #639 )  
							
							 
							
							... 
							
							
							
							* update streaming request accept header
* add optional stream param to request bodies 
							
						 
						
							2023-10-11 12:54:27 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								2130c0708b 
								
							 
						 
						
							
							
								
								output type parsed from modelfile ( #678 )  
							
							 
							
							
							
						 
						
							2023-10-05 14:58:04 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								1fbf3585d6 
								
							 
						 
						
							
							
								
								Relay default values to llama runner ( #672 )  
							
							 
							
							... 
							
							
							
							* include seed in params for llama.cpp server and remove empty filter for temp
* relay default predict options to llama.cpp
- reorganize options to match predict request for readability
* omit empty stop
---------
Co-authored-by: hallh <hallh@users.noreply.github.com> 
							
						 
						
							2023-10-02 14:53:16 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								a1b2d95f96 
								
							 
						 
						
							
							
								
								remove unused push/pull params ( #650 )  
							
							 
							
							
							
						 
						
							2023-09-29 17:27:19 -04:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								f40b3de758 
								
							 
						 
						
							
							
								
								use int64 consistently  
							
							 
							
							
							
						 
						
							2023-09-28 11:07:24 -07:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								f221637053 
								
							 
						 
						
							
							
								
								first pass at linux gpu support ( #454 )  
							
							 
							
							... 
							
							
							
							* linux gpu support
* handle multiple gpus
* add cuda docker image (#488 )
---------
Co-authored-by: Michael Yang <mxyng@pm.me> 
							
						 
						
							2023-09-12 11:04:35 -04:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								790d24eb7b 
								
							 
						 
						
							
							
								
								add show command ( #474 )  
							
							 
							
							
							
						 
						
							2023-09-06 11:04:17 -07:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								0f541a0367 
								
							 
						 
						
							
							
								
								s/ListResponseModel/ModelResponse/  
							
							 
							
							
							
						 
						
							2023-08-31 09:47:10 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								42998d797d 
								
							 
						 
						
							
							
								
								subprocess llama.cpp server ( #401 )  
							
							 
							
							... 
							
							
							
							* remove c code
* pack llama.cpp
* use request context for llama_cpp
* let llama_cpp decide the number of threads to use
* stop llama runner when app stops
* remove sample count and duration metrics
* use go generate to get libraries
* tmp dir for running llm 
							
						 
						
							2023-08-30 16:35:03 -04:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								8bbff2df98 
								
							 
						 
						
							
							
								
								add model IDs ( #439 )  
							
							 
							
							
							
						 
						
							2023-08-28 20:50:24 -07:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								f723bf0879 
								
							 
						 
						
							
							
								
								ignore nil map values  
							
							 
							
							
							
						 
						
							2023-08-17 15:50:46 -07:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								f27bc261cf 
								
							 
						 
						
							
							
								
								s/parmeter/parameter/  
							
							 
							
							
							
						 
						
							2023-08-10 16:26:06 -07:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
							
							
								
							
							
								81d8d7b73f 
								
							 
						 
						
							
							
								
								fix could not convert int  
							
							 
							
							
							
						 
						
							2023-08-10 16:24:17 -07:00  
						
					 
				
					
						
							
							
								 
								Patrick Devine
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								be989d89d1 
								
							 
						 
						
							
							
								
								Token auth ( #314 )  
							
							 
							
							
							
						 
						
							2023-08-10 11:34:25 -07:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
							
							
								
							
							
								4b3507f036 
								
							 
						 
						
							
							
								
								embeddings endpoint  
							
							 
							
							... 
							
							
							
							Co-Authored-By: Jeffrey Morgan <jmorganca@gmail.com> 
							
						 
						
							2023-08-10 11:45:57 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								7a5f3616fd 
								
							 
						 
						
							
							
								
								embed text document in modelfile  
							
							 
							
							
							
						 
						
							2023-08-09 10:26:19 -04:00  
						
					 
				
					
						
							
							
								 
								Bruce MacDonald
							
						 
						
							 
							
							
							
							
								
							
							
								21ddcaa1f1 
								
							 
						 
						
							
							
								
								pr comments  
							
							 
							
							... 
							
							
							
							- default to embeddings enabled
- move embedding logic for loaded model to request
- allow embedding full directory
- close llm on reload 
							
						 
						
							2023-08-08 13:49:37 -04:00  
						
					 
				
					
						
							
							
								 
								Michael Yang
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								f2074ed4c0 
								
							 
						 
						
							
							
								
								Merge pull request  #306  from jmorganca/default-keep-system  
							
							 
							
							... 
							
							
							
							automatically set num_keep if num_keep < 0 
							
						 
						
							2023-08-08 09:25:34 -07:00