| 
									
										
										
										
											2024-02-18 13:06:08 +08:00
										 |  |  | from pathlib import Path | 
					
						
							| 
									
										
										
										
											2023-11-19 13:41:43 +08:00
										 |  |  | import hashlib | 
					
						
							| 
									
										
										
										
											2024-01-03 08:22:48 +08:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2024-02-20 12:44:00 +08:00
										 |  |  | from datetime import timedelta | 
					
						
							| 
									
										
										
										
											2024-08-06 18:31:45 +08:00
										 |  |  | from typing import Optional, List, Tuple, Callable | 
					
						
							| 
									
										
										
										
											2024-06-20 19:38:59 +08:00
										 |  |  | import uuid | 
					
						
							|  |  |  | import time | 
					
						
							| 
									
										
										
										
											2024-06-09 18:01:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-03 21:24:26 +08:00
										 |  |  | from utils.task import prompt_template | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-09 18:01:25 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  | def get_last_user_message_item(messages: List[dict]) -> Optional[dict]: | 
					
						
							| 
									
										
										
										
											2024-06-09 18:01:25 +08:00
										 |  |  |     for message in reversed(messages): | 
					
						
							|  |  |  |         if message["role"] == "user": | 
					
						
							| 
									
										
										
										
											2024-07-02 15:37:21 +08:00
										 |  |  |             return message | 
					
						
							|  |  |  |     return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  | def get_content_from_message(message: dict) -> Optional[str]: | 
					
						
							|  |  |  |     if isinstance(message["content"], list): | 
					
						
							|  |  |  |         for item in message["content"]: | 
					
						
							|  |  |  |             if item["type"] == "text": | 
					
						
							|  |  |  |                 return item["text"] | 
					
						
							|  |  |  |     else: | 
					
						
							| 
									
										
										
										
											2024-07-02 15:37:21 +08:00
										 |  |  |         return message["content"] | 
					
						
							| 
									
										
										
										
											2024-06-09 18:01:25 +08:00
										 |  |  |     return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  | def get_last_user_message(messages: List[dict]) -> Optional[str]: | 
					
						
							|  |  |  |     message = get_last_user_message_item(messages) | 
					
						
							|  |  |  |     if message is None: | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return get_content_from_message(message) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_last_assistant_message(messages: List[dict]) -> Optional[str]: | 
					
						
							| 
									
										
										
										
											2024-06-09 18:01:25 +08:00
										 |  |  |     for message in reversed(messages): | 
					
						
							|  |  |  |         if message["role"] == "assistant": | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  |             return get_content_from_message(message) | 
					
						
							| 
									
										
										
										
											2024-06-09 18:01:25 +08:00
										 |  |  |     return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  | def get_system_message(messages: List[dict]) -> Optional[dict]: | 
					
						
							| 
									
										
										
										
											2024-06-18 04:28:29 +08:00
										 |  |  |     for message in messages: | 
					
						
							|  |  |  |         if message["role"] == "system": | 
					
						
							|  |  |  |             return message | 
					
						
							|  |  |  |     return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def remove_system_message(messages: List[dict]) -> List[dict]: | 
					
						
							|  |  |  |     return [message for message in messages if message["role"] != "system"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  | def pop_system_message(messages: List[dict]) -> Tuple[Optional[dict], List[dict]]: | 
					
						
							| 
									
										
										
										
											2024-06-18 04:28:29 +08:00
										 |  |  |     return get_system_message(messages), remove_system_message(messages) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-26 19:22:13 +08:00
										 |  |  | def prepend_to_first_user_message_content( | 
					
						
							|  |  |  |     content: str, messages: List[dict] | 
					
						
							|  |  |  | ) -> List[dict]: | 
					
						
							|  |  |  |     for message in messages: | 
					
						
							|  |  |  |         if message["role"] == "user": | 
					
						
							|  |  |  |             if isinstance(message["content"], list): | 
					
						
							|  |  |  |                 for item in message["content"]: | 
					
						
							|  |  |  |                     if item["type"] == "text": | 
					
						
							|  |  |  |                         item["text"] = f"{content}\n{item['text']}" | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 message["content"] = f"{content}\n{message['content']}" | 
					
						
							|  |  |  |             break | 
					
						
							|  |  |  |     return messages | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-09 18:01:25 +08:00
										 |  |  | def add_or_update_system_message(content: str, messages: List[dict]): | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Adds a new system message at the beginning of the messages list | 
					
						
							|  |  |  |     or updates the existing system message at the beginning. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     :param msg: The message to be added or appended. | 
					
						
							|  |  |  |     :param messages: The list of message dictionaries. | 
					
						
							|  |  |  |     :return: The updated list of message dictionaries. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if messages and messages[0].get("role") == "system": | 
					
						
							|  |  |  |         messages[0]["content"] += f"{content}\n{messages[0]['content']}" | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         # Insert at the beginning | 
					
						
							|  |  |  |         messages.insert(0, {"role": "system", "content": content}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return messages | 
					
						
							| 
									
										
										
										
											2023-11-19 13:41:43 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-06-20 19:38:59 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-01 05:01:22 +08:00
										 |  |  | def openai_chat_message_template(model: str): | 
					
						
							| 
									
										
										
										
											2024-06-20 19:38:59 +08:00
										 |  |  |     return { | 
					
						
							|  |  |  |         "id": f"{model}-{str(uuid.uuid4())}", | 
					
						
							|  |  |  |         "created": int(time.time()), | 
					
						
							|  |  |  |         "model": model, | 
					
						
							| 
									
										
										
										
											2024-07-31 22:26:26 +08:00
										 |  |  |         "choices": [{"index": 0, "logprobs": None, "finish_reason": None}], | 
					
						
							| 
									
										
										
										
											2024-06-20 19:38:59 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-07-31 22:26:26 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-04 19:10:02 +08:00
										 |  |  | def openai_chat_chunk_message_template(model: str, message: str) -> dict: | 
					
						
							| 
									
										
										
										
											2024-08-01 05:01:22 +08:00
										 |  |  |     template = openai_chat_message_template(model) | 
					
						
							| 
									
										
										
										
											2024-07-31 22:26:26 +08:00
										 |  |  |     template["object"] = "chat.completion.chunk" | 
					
						
							|  |  |  |     template["choices"][0]["delta"] = {"content": message} | 
					
						
							|  |  |  |     return template | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-04 19:10:02 +08:00
										 |  |  | def openai_chat_completion_message_template(model: str, message: str) -> dict: | 
					
						
							| 
									
										
										
										
											2024-08-01 05:01:22 +08:00
										 |  |  |     template = openai_chat_message_template(model) | 
					
						
							| 
									
										
										
										
											2024-07-31 22:26:26 +08:00
										 |  |  |     template["object"] = "chat.completion" | 
					
						
							|  |  |  |     template["choices"][0]["message"] = {"content": message, "role": "assistant"} | 
					
						
							|  |  |  |     template["choices"][0]["finish_reason"] = "stop" | 
					
						
							| 
									
										
										
										
											2024-08-04 19:10:02 +08:00
										 |  |  |     return template | 
					
						
							| 
									
										
										
										
											2024-07-31 22:26:26 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-19 13:41:43 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-03 21:24:26 +08:00
										 |  |  | # inplace function: form_data is modified | 
					
						
							|  |  |  | def apply_model_system_prompt_to_body(params: dict, form_data: dict, user) -> dict: | 
					
						
							|  |  |  |     system = params.get("system", None) | 
					
						
							|  |  |  |     if not system: | 
					
						
							|  |  |  |         return form_data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if user: | 
					
						
							|  |  |  |         template_params = { | 
					
						
							|  |  |  |             "user_name": user.name, | 
					
						
							|  |  |  |             "user_location": user.info.get("location") if user.info else None, | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         template_params = {} | 
					
						
							|  |  |  |     system = prompt_template(system, **template_params) | 
					
						
							|  |  |  |     form_data["messages"] = add_or_update_system_message( | 
					
						
							|  |  |  |         system, form_data.get("messages", []) | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     return form_data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # inplace function: form_data is modified | 
					
						
							| 
									
										
										
										
											2024-08-06 18:31:45 +08:00
										 |  |  | def apply_model_params_to_body( | 
					
						
							|  |  |  |     params: dict, form_data: dict, mappings: dict[str, Callable] | 
					
						
							|  |  |  | ) -> dict: | 
					
						
							| 
									
										
										
										
											2024-08-03 21:24:26 +08:00
										 |  |  |     if not params: | 
					
						
							|  |  |  |         return form_data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for key, cast_func in mappings.items(): | 
					
						
							|  |  |  |         if (value := params.get(key)) is not None: | 
					
						
							|  |  |  |             form_data[key] = cast_func(value) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return form_data | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-06 18:31:45 +08:00
										 |  |  | # inplace function: form_data is modified | 
					
						
							|  |  |  | def apply_model_params_to_body_openai(params: dict, form_data: dict) -> dict: | 
					
						
							| 
									
										
										
										
											2024-08-08 18:01:00 +08:00
										 |  |  |     mappings = { | 
					
						
							|  |  |  |         "temperature": float, | 
					
						
							|  |  |  |         "top_p": int, | 
					
						
							|  |  |  |         "max_tokens": int, | 
					
						
							|  |  |  |         "frequency_penalty": int, | 
					
						
							|  |  |  |         "seed": lambda x: x, | 
					
						
							|  |  |  |         "stop": lambda x: [bytes(s, "utf-8").decode("unicode_escape") for s in x], | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return apply_model_params_to_body(params, form_data, mappings) | 
					
						
							| 
									
										
										
										
											2024-08-06 18:31:45 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def apply_model_params_to_body_ollama(params: dict, form_data: dict) -> dict: | 
					
						
							|  |  |  |     opts = [ | 
					
						
							| 
									
										
										
										
											2024-08-08 18:01:00 +08:00
										 |  |  |         "temperature", | 
					
						
							|  |  |  |         "top_p", | 
					
						
							|  |  |  |         "seed", | 
					
						
							| 
									
										
										
										
											2024-08-06 18:31:45 +08:00
										 |  |  |         "mirostat", | 
					
						
							|  |  |  |         "mirostat_eta", | 
					
						
							|  |  |  |         "mirostat_tau", | 
					
						
							|  |  |  |         "num_ctx", | 
					
						
							|  |  |  |         "num_batch", | 
					
						
							|  |  |  |         "num_keep", | 
					
						
							|  |  |  |         "repeat_last_n", | 
					
						
							|  |  |  |         "tfs_z", | 
					
						
							|  |  |  |         "top_k", | 
					
						
							|  |  |  |         "min_p", | 
					
						
							|  |  |  |         "use_mmap", | 
					
						
							|  |  |  |         "use_mlock", | 
					
						
							|  |  |  |         "num_thread", | 
					
						
							|  |  |  |     ] | 
					
						
							|  |  |  |     mappings = {i: lambda x: x for i in opts} | 
					
						
							| 
									
										
										
										
											2024-08-08 17:52:09 +08:00
										 |  |  |     form_data = apply_model_params_to_body(params, form_data, mappings) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-08 18:01:00 +08:00
										 |  |  |     name_differences = { | 
					
						
							|  |  |  |         "max_tokens": "num_predict", | 
					
						
							|  |  |  |         "frequency_penalty": "repeat_penalty", | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for key, value in name_differences.items(): | 
					
						
							|  |  |  |         if (param := params.get(key, None)) is not None: | 
					
						
							|  |  |  |             form_data[value] = param | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-08-08 17:52:09 +08:00
										 |  |  |     return form_data | 
					
						
							| 
									
										
										
										
											2024-08-06 18:31:45 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-19 13:41:43 +08:00
										 |  |  | def get_gravatar_url(email): | 
					
						
							|  |  |  |     # Trim leading and trailing whitespace from | 
					
						
							|  |  |  |     # an email address and force all characters | 
					
						
							|  |  |  |     # to lower case | 
					
						
							|  |  |  |     address = str(email).strip().lower() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Create a SHA256 hash of the final string | 
					
						
							|  |  |  |     hash_object = hashlib.sha256(address.encode()) | 
					
						
							|  |  |  |     hash_hex = hash_object.hexdigest() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Grab the actual image URL | 
					
						
							| 
									
										
										
										
											2023-11-19 16:46:27 +08:00
										 |  |  |     return f"https://www.gravatar.com/avatar/{hash_hex}?d=mp" | 
					
						
							| 
									
										
										
										
											2023-12-24 07:38:52 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def calculate_sha256(file): | 
					
						
							|  |  |  |     sha256 = hashlib.sha256() | 
					
						
							|  |  |  |     # Read the file in chunks to efficiently handle large files | 
					
						
							|  |  |  |     for chunk in iter(lambda: file.read(8192), b""): | 
					
						
							|  |  |  |         sha256.update(chunk) | 
					
						
							|  |  |  |     return sha256.hexdigest() | 
					
						
							| 
									
										
										
										
											2024-01-03 08:22:48 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-27 14:17:28 +08:00
										 |  |  | def calculate_sha256_string(string): | 
					
						
							|  |  |  |     # Create a new SHA-256 hash object | 
					
						
							|  |  |  |     sha256_hash = hashlib.sha256() | 
					
						
							|  |  |  |     # Update the hash object with the bytes of the input string | 
					
						
							|  |  |  |     sha256_hash.update(string.encode("utf-8")) | 
					
						
							|  |  |  |     # Get the hexadecimal representation of the hash | 
					
						
							|  |  |  |     hashed_string = sha256_hash.hexdigest() | 
					
						
							|  |  |  |     return hashed_string | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-03 08:22:48 +08:00
										 |  |  | def validate_email_format(email: str) -> bool: | 
					
						
							| 
									
										
										
										
											2024-05-08 23:40:18 +08:00
										 |  |  |     if email.endswith("@localhost"): | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return bool(re.match(r"[^@]+@[^@]+\.[^@]+", email)) | 
					
						
							| 
									
										
										
										
											2024-02-18 13:06:08 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def sanitize_filename(file_name): | 
					
						
							|  |  |  |     # Convert to lowercase | 
					
						
							|  |  |  |     lower_case_file_name = file_name.lower() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Remove special characters using regular expression | 
					
						
							|  |  |  |     sanitized_file_name = re.sub(r"[^\w\s]", "", lower_case_file_name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Replace spaces with dashes | 
					
						
							|  |  |  |     final_file_name = re.sub(r"\s+", "-", sanitized_file_name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return final_file_name | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def extract_folders_after_data_docs(path): | 
					
						
							|  |  |  |     # Convert the path to a Path object if it's not already | 
					
						
							|  |  |  |     path = Path(path) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Extract parts of the path | 
					
						
							|  |  |  |     parts = path.parts | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Find the index of '/data/docs' in the path | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         index_data_docs = parts.index("data") + 1 | 
					
						
							|  |  |  |         index_docs = parts.index("docs", index_data_docs) + 1 | 
					
						
							|  |  |  |     except ValueError: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Exclude the filename and accumulate folder names | 
					
						
							|  |  |  |     tags = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     folders = parts[index_docs:-1] | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  |     for idx, _ in enumerate(folders): | 
					
						
							| 
									
										
										
										
											2024-02-18 13:06:08 +08:00
										 |  |  |         tags.append("/".join(folders[: idx + 1])) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return tags | 
					
						
							| 
									
										
										
										
											2024-02-20 12:44:00 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse_duration(duration: str) -> Optional[timedelta]: | 
					
						
							|  |  |  |     if duration == "-1" or duration == "0": | 
					
						
							|  |  |  |         return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Regular expression to find number and unit pairs | 
					
						
							|  |  |  |     pattern = r"(-?\d+(\.\d+)?)(ms|s|m|h|d|w)" | 
					
						
							|  |  |  |     matches = re.findall(pattern, duration) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if not matches: | 
					
						
							|  |  |  |         raise ValueError("Invalid duration string") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     total_duration = timedelta() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     for number, _, unit in matches: | 
					
						
							|  |  |  |         number = float(number) | 
					
						
							|  |  |  |         if unit == "ms": | 
					
						
							|  |  |  |             total_duration += timedelta(milliseconds=number) | 
					
						
							|  |  |  |         elif unit == "s": | 
					
						
							|  |  |  |             total_duration += timedelta(seconds=number) | 
					
						
							|  |  |  |         elif unit == "m": | 
					
						
							|  |  |  |             total_duration += timedelta(minutes=number) | 
					
						
							|  |  |  |         elif unit == "h": | 
					
						
							|  |  |  |             total_duration += timedelta(hours=number) | 
					
						
							|  |  |  |         elif unit == "d": | 
					
						
							|  |  |  |             total_duration += timedelta(days=number) | 
					
						
							|  |  |  |         elif unit == "w": | 
					
						
							|  |  |  |             total_duration += timedelta(weeks=number) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return total_duration | 
					
						
							| 
									
										
										
										
											2024-05-25 10:26:27 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse_ollama_modelfile(model_text): | 
					
						
							|  |  |  |     parameters_meta = { | 
					
						
							|  |  |  |         "mirostat": int, | 
					
						
							|  |  |  |         "mirostat_eta": float, | 
					
						
							|  |  |  |         "mirostat_tau": float, | 
					
						
							|  |  |  |         "num_ctx": int, | 
					
						
							|  |  |  |         "repeat_last_n": int, | 
					
						
							|  |  |  |         "repeat_penalty": float, | 
					
						
							|  |  |  |         "temperature": float, | 
					
						
							|  |  |  |         "seed": int, | 
					
						
							|  |  |  |         "tfs_z": float, | 
					
						
							|  |  |  |         "num_predict": int, | 
					
						
							|  |  |  |         "top_k": int, | 
					
						
							|  |  |  |         "top_p": float, | 
					
						
							| 
									
										
										
										
											2024-06-04 03:48:17 +08:00
										 |  |  |         "num_keep": int, | 
					
						
							|  |  |  |         "typical_p": float, | 
					
						
							|  |  |  |         "presence_penalty": float, | 
					
						
							|  |  |  |         "frequency_penalty": float, | 
					
						
							|  |  |  |         "penalize_newline": bool, | 
					
						
							|  |  |  |         "numa": bool, | 
					
						
							|  |  |  |         "num_batch": int, | 
					
						
							|  |  |  |         "num_gpu": int, | 
					
						
							|  |  |  |         "main_gpu": int, | 
					
						
							|  |  |  |         "low_vram": bool, | 
					
						
							|  |  |  |         "f16_kv": bool, | 
					
						
							|  |  |  |         "vocab_only": bool, | 
					
						
							|  |  |  |         "use_mmap": bool, | 
					
						
							|  |  |  |         "use_mlock": bool, | 
					
						
							|  |  |  |         "num_thread": int, | 
					
						
							| 
									
										
										
										
											2024-05-25 10:26:27 +08:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     data = {"base_model_id": None, "params": {}} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Parse base model | 
					
						
							|  |  |  |     base_model_match = re.search( | 
					
						
							|  |  |  |         r"^FROM\s+(\w+)", model_text, re.MULTILINE | re.IGNORECASE | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     if base_model_match: | 
					
						
							|  |  |  |         data["base_model_id"] = base_model_match.group(1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Parse template | 
					
						
							|  |  |  |     template_match = re.search( | 
					
						
							|  |  |  |         r'TEMPLATE\s+"""(.+?)"""', model_text, re.DOTALL | re.IGNORECASE | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     if template_match: | 
					
						
							|  |  |  |         data["params"] = {"template": template_match.group(1).strip()} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Parse stops | 
					
						
							|  |  |  |     stops = re.findall(r'PARAMETER stop "(.*?)"', model_text, re.IGNORECASE) | 
					
						
							|  |  |  |     if stops: | 
					
						
							|  |  |  |         data["params"]["stop"] = stops | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Parse other parameters from the provided list | 
					
						
							|  |  |  |     for param, param_type in parameters_meta.items(): | 
					
						
							|  |  |  |         param_match = re.search(rf"PARAMETER {param} (.+)", model_text, re.IGNORECASE) | 
					
						
							|  |  |  |         if param_match: | 
					
						
							|  |  |  |             value = param_match.group(1) | 
					
						
							| 
									
										
										
										
											2024-06-04 03:48:17 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |             try: | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  |                 if param_type is int: | 
					
						
							| 
									
										
										
										
											2024-06-04 03:48:17 +08:00
										 |  |  |                     value = int(value) | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  |                 elif param_type is float: | 
					
						
							| 
									
										
										
										
											2024-06-04 03:48:17 +08:00
										 |  |  |                     value = float(value) | 
					
						
							| 
									
										
										
										
											2024-08-01 04:58:40 +08:00
										 |  |  |                 elif param_type is bool: | 
					
						
							| 
									
										
										
										
											2024-06-04 03:48:17 +08:00
										 |  |  |                     value = value.lower() == "true" | 
					
						
							|  |  |  |             except Exception as e: | 
					
						
							|  |  |  |                 print(e) | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-25 10:26:27 +08:00
										 |  |  |             data["params"][param] = value | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Parse adapter | 
					
						
							|  |  |  |     adapter_match = re.search(r"ADAPTER (.+)", model_text, re.IGNORECASE) | 
					
						
							|  |  |  |     if adapter_match: | 
					
						
							|  |  |  |         data["params"]["adapter"] = adapter_match.group(1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Parse system description | 
					
						
							|  |  |  |     system_desc_match = re.search( | 
					
						
							|  |  |  |         r'SYSTEM\s+"""(.+?)"""', model_text, re.DOTALL | re.IGNORECASE | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2024-06-09 04:45:33 +08:00
										 |  |  |     system_desc_match_single = re.search( | 
					
						
							|  |  |  |         r"SYSTEM\s+([^\n]+)", model_text, re.IGNORECASE | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-05-25 10:26:27 +08:00
										 |  |  |     if system_desc_match: | 
					
						
							|  |  |  |         data["params"]["system"] = system_desc_match.group(1).strip() | 
					
						
							| 
									
										
										
										
											2024-06-09 04:45:33 +08:00
										 |  |  |     elif system_desc_match_single: | 
					
						
							|  |  |  |         data["params"]["system"] = system_desc_match_single.group(1).strip() | 
					
						
							| 
									
										
										
										
											2024-05-25 10:26:27 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # Parse messages | 
					
						
							|  |  |  |     messages = [] | 
					
						
							|  |  |  |     message_matches = re.findall(r"MESSAGE (\w+) (.+)", model_text, re.IGNORECASE) | 
					
						
							|  |  |  |     for role, content in message_matches: | 
					
						
							|  |  |  |         messages.append({"role": role, "content": content}) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if messages: | 
					
						
							|  |  |  |         data["params"]["messages"] = messages | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return data |