Fix: Prevent RAG queries when all files are in full context

This commit fixes an issue where Retrieval-Augmented Generation (RAG)
queries were still being generated even when all attached files were set
to 'full context' mode. This was inefficient as the full content of the
files was already available to the model.

The `chat_completion_files_handler` in `backend/open_webui/utils/middleware.py`
has been updated to:
- Check if all attached files have the `context: 'full'` property.
- Skip the `generate_queries` step if all files are in full context mode.
- Pass a `full_context=True` flag to the `get_sources_from_items`
  function to ensure it fetches the entire document content instead of
  performing a vector search.

This change ensures that RAG queries are only generated when necessary,
improving the efficiency of the system.
This commit is contained in:
google-labs-jules[bot] 2025-09-25 15:54:58 +00:00
parent b55a38ee97
commit 4a7e1b93e5
1 changed files with 40 additions and 34 deletions

View File

@ -631,7 +631,11 @@ async def chat_completion_files_handler(
sources = []
if files := body.get("metadata", {}).get("files", None):
# Check if all files are in full context mode
all_full_context = all(item.get("context") == "full" for item in files)
queries = []
if not all_full_context:
try:
queries_response = await generate_queries(
request,
@ -663,6 +667,7 @@ async def chat_completion_files_handler(
if len(queries) == 0:
queries = [get_last_user_message(body["messages"])]
if not all_full_context:
await __event_emitter__(
{
"type": "status",
@ -701,7 +706,8 @@ async def chat_completion_files_handler(
r=request.app.state.config.RELEVANCE_THRESHOLD,
hybrid_bm25_weight=request.app.state.config.HYBRID_BM25_WEIGHT,
hybrid_search=request.app.state.config.ENABLE_RAG_HYBRID_SEARCH,
full_context=request.app.state.config.RAG_FULL_CONTEXT,
full_context=all_full_context
or request.app.state.config.RAG_FULL_CONTEXT,
user=user,
),
)