refac
This commit is contained in:
		
							parent
							
								
									6676824947
								
							
						
					
					
						commit
						1f123eb100
					
				|  | @ -460,20 +460,19 @@ def get_sources_from_files( | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|     extracted_collections = [] |     extracted_collections = [] | ||||||
|     relevant_contexts = [] |     query_results = [] | ||||||
| 
 | 
 | ||||||
|     for file in files: |     for file in files: | ||||||
| 
 |         query_result = None | ||||||
|         context = None |  | ||||||
|         if file.get("docs"): |         if file.get("docs"): | ||||||
|             # BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL |             # BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL | ||||||
|             context = { |             query_result = { | ||||||
|                 "documents": [[doc.get("content") for doc in file.get("docs")]], |                 "documents": [[doc.get("content") for doc in file.get("docs")]], | ||||||
|                 "metadatas": [[doc.get("metadata") for doc in file.get("docs")]], |                 "metadatas": [[doc.get("metadata") for doc in file.get("docs")]], | ||||||
|             } |             } | ||||||
|         elif file.get("context") == "full": |         elif file.get("context") == "full": | ||||||
|             # Manual Full Mode Toggle |             # Manual Full Mode Toggle | ||||||
|             context = { |             query_result = { | ||||||
|                 "documents": [[file.get("file").get("data", {}).get("content")]], |                 "documents": [[file.get("file").get("data", {}).get("content")]], | ||||||
|                 "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], |                 "metadatas": [[{"file_id": file.get("id"), "name": file.get("name")}]], | ||||||
|             } |             } | ||||||
|  | @ -500,7 +499,7 @@ def get_sources_from_files( | ||||||
|                             } |                             } | ||||||
|                         ) |                         ) | ||||||
| 
 | 
 | ||||||
|                 context = { |                 query_result = { | ||||||
|                     "documents": [documents], |                     "documents": [documents], | ||||||
|                     "metadatas": [metadatas], |                     "metadatas": [metadatas], | ||||||
|                 } |                 } | ||||||
|  | @ -508,7 +507,7 @@ def get_sources_from_files( | ||||||
|             elif file.get("id"): |             elif file.get("id"): | ||||||
|                 file_object = Files.get_file_by_id(file.get("id")) |                 file_object = Files.get_file_by_id(file.get("id")) | ||||||
|                 if file_object: |                 if file_object: | ||||||
|                     context = { |                     query_result = { | ||||||
|                         "documents": [[file_object.data.get("content", "")]], |                         "documents": [[file_object.data.get("content", "")]], | ||||||
|                         "metadatas": [ |                         "metadatas": [ | ||||||
|                             [ |                             [ | ||||||
|  | @ -521,7 +520,7 @@ def get_sources_from_files( | ||||||
|                         ], |                         ], | ||||||
|                     } |                     } | ||||||
|             elif file.get("file").get("data"): |             elif file.get("file").get("data"): | ||||||
|                 context = { |                 query_result = { | ||||||
|                     "documents": [[file.get("file").get("data", {}).get("content")]], |                     "documents": [[file.get("file").get("data", {}).get("content")]], | ||||||
|                     "metadatas": [ |                     "metadatas": [ | ||||||
|                         [file.get("file").get("data", {}).get("metadata", {})] |                         [file.get("file").get("data", {}).get("metadata", {})] | ||||||
|  | @ -549,19 +548,27 @@ def get_sources_from_files( | ||||||
| 
 | 
 | ||||||
|             if full_context: |             if full_context: | ||||||
|                 try: |                 try: | ||||||
|                     context = get_all_items_from_collections(collection_names) |                     query_result = get_all_items_from_collections(collection_names) | ||||||
|                 except Exception as e: |                 except Exception as e: | ||||||
|                     log.exception(e) |                     log.exception(e) | ||||||
| 
 | 
 | ||||||
|             else: |             else: | ||||||
|                 try: |                 try: | ||||||
|                     context = None |                     query_result = None | ||||||
|                     if file.get("type") == "text": |                     if file.get("type") == "text": | ||||||
|                         context = file["content"] |                         # Not sure when this is used, but it seems to be a fallback | ||||||
|  |                         query_result = { | ||||||
|  |                             "documents": [ | ||||||
|  |                                 [file.get("file").get("data", {}).get("content")] | ||||||
|  |                             ], | ||||||
|  |                             "metadatas": [ | ||||||
|  |                                 [file.get("file").get("data", {}).get("meta", {})] | ||||||
|  |                             ], | ||||||
|  |                         } | ||||||
|                     else: |                     else: | ||||||
|                         if hybrid_search: |                         if hybrid_search: | ||||||
|                             try: |                             try: | ||||||
|                                 context = query_collection_with_hybrid_search( |                                 query_result = query_collection_with_hybrid_search( | ||||||
|                                     collection_names=collection_names, |                                     collection_names=collection_names, | ||||||
|                                     queries=queries, |                                     queries=queries, | ||||||
|                                     embedding_function=embedding_function, |                                     embedding_function=embedding_function, | ||||||
|  | @ -577,8 +584,8 @@ def get_sources_from_files( | ||||||
|                                     " non hybrid search as fallback." |                                     " non hybrid search as fallback." | ||||||
|                                 ) |                                 ) | ||||||
| 
 | 
 | ||||||
|                         if (not hybrid_search) or (context is None): |                         if (not hybrid_search) or (query_result is None): | ||||||
|                             context = query_collection( |                             query_result = query_collection( | ||||||
|                                 collection_names=collection_names, |                                 collection_names=collection_names, | ||||||
|                                 queries=queries, |                                 queries=queries, | ||||||
|                                 embedding_function=embedding_function, |                                 embedding_function=embedding_function, | ||||||
|  | @ -589,24 +596,24 @@ def get_sources_from_files( | ||||||
| 
 | 
 | ||||||
|             extracted_collections.extend(collection_names) |             extracted_collections.extend(collection_names) | ||||||
| 
 | 
 | ||||||
|         if context: |         if query_result: | ||||||
|             if "data" in file: |             if "data" in file: | ||||||
|                 del file["data"] |                 del file["data"] | ||||||
| 
 | 
 | ||||||
|             relevant_contexts.append({**context, "file": file}) |             query_results.append({**query_result, "file": file}) | ||||||
| 
 | 
 | ||||||
|     sources = [] |     sources = [] | ||||||
|     for context in relevant_contexts: |     for query_result in query_results: | ||||||
|         try: |         try: | ||||||
|             if "documents" in context: |             if "documents" in query_result: | ||||||
|                 if "metadatas" in context: |                 if "metadatas" in query_result: | ||||||
|                     source = { |                     source = { | ||||||
|                         "source": context["file"], |                         "source": query_result["file"], | ||||||
|                         "document": context["documents"][0], |                         "document": query_result["documents"][0], | ||||||
|                         "metadata": context["metadatas"][0], |                         "metadata": query_result["metadatas"][0], | ||||||
|                     } |                     } | ||||||
|                     if "distances" in context and context["distances"]: |                     if "distances" in query_result and query_result["distances"]: | ||||||
|                         source["distances"] = context["distances"][0] |                         source["distances"] = query_result["distances"][0] | ||||||
| 
 | 
 | ||||||
|                     sources.append(source) |                     sources.append(source) | ||||||
|         except Exception as e: |         except Exception as e: | ||||||
|  |  | ||||||
|  | @ -718,6 +718,10 @@ def apply_params_to_form_data(form_data, model): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| async def process_chat_payload(request, form_data, user, metadata, model): | async def process_chat_payload(request, form_data, user, metadata, model): | ||||||
|  |     # Pipeline Inlet -> Filter Inlet -> Chat Memory -> Chat Web Search -> Chat Image Generation | ||||||
|  |     # -> Chat Code Interpreter (Form Data Update) -> (Default) Chat Tools Function Calling | ||||||
|  |     # -> Chat Files | ||||||
|  | 
 | ||||||
|     form_data = apply_params_to_form_data(form_data, model) |     form_data = apply_params_to_form_data(form_data, model) | ||||||
|     log.debug(f"form_data: {form_data}") |     log.debug(f"form_data: {form_data}") | ||||||
| 
 | 
 | ||||||
|  | @ -911,7 +915,6 @@ async def process_chat_payload(request, form_data, user, metadata, model): | ||||||
|                     request, form_data, extra_params, user, models, tools_dict |                     request, form_data, extra_params, user, models, tools_dict | ||||||
|                 ) |                 ) | ||||||
|                 sources.extend(flags.get("sources", [])) |                 sources.extend(flags.get("sources", [])) | ||||||
| 
 |  | ||||||
|             except Exception as e: |             except Exception as e: | ||||||
|                 log.exception(e) |                 log.exception(e) | ||||||
| 
 | 
 | ||||||
|  | @ -924,24 +927,27 @@ async def process_chat_payload(request, form_data, user, metadata, model): | ||||||
|     # If context is not empty, insert it into the messages |     # If context is not empty, insert it into the messages | ||||||
|     if len(sources) > 0: |     if len(sources) > 0: | ||||||
|         context_string = "" |         context_string = "" | ||||||
|         citation_idx = {} |         citation_idx_map = {} | ||||||
|  | 
 | ||||||
|         for source in sources: |         for source in sources: | ||||||
|             if "document" in source: |             if "document" in source: | ||||||
|                 for doc_context, doc_meta in zip( |                 for document_text, document_metadata in zip( | ||||||
|                     source["document"], source["metadata"] |                     source["document"], source["metadata"] | ||||||
|                 ): |                 ): | ||||||
|                     source_name = source.get("source", {}).get("name", None) |                     source_name = source.get("source", {}).get("name", None) | ||||||
|                     citation_id = ( |                     source_id = ( | ||||||
|                         doc_meta.get("source", None) |                         document_metadata.get("source", None) | ||||||
|                         or source.get("source", {}).get("id", None) |                         or source.get("source", {}).get("id", None) | ||||||
|                         or "N/A" |                         or "N/A" | ||||||
|                     ) |                     ) | ||||||
|                     if citation_id not in citation_idx: | 
 | ||||||
|                         citation_idx[citation_id] = len(citation_idx) + 1 |                     if source_id not in citation_idx_map: | ||||||
|  |                         citation_idx_map[source_id] = len(citation_idx_map) + 1 | ||||||
|  | 
 | ||||||
|                     context_string += ( |                     context_string += ( | ||||||
|                         f'<source id="{citation_idx[citation_id]}"' |                         f'<source id="{citation_idx_map[source_id]}"' | ||||||
|                         + (f' name="{source_name}"' if source_name else "") |                         + (f' name="{source_name}"' if source_name else "") | ||||||
|                         + f">{doc_context}</source>\n" |                         + f">{document_text}</source>\n" | ||||||
|                     ) |                     ) | ||||||
| 
 | 
 | ||||||
|         context_string = context_string.strip() |         context_string = context_string.strip() | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue