diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c5e3b35a3..4761a997f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,81 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.6.23] - 2025-08-21 + +### Added + +- ⚡ **Asynchronous Chat Payload Processing**: Refactored the chat completion pipeline to return a response immediately for streaming requests involving web search or tool calls. This enables users to stop ongoing generations promptly and preventing network timeouts during lengthy preprocessing phases, thus significantly improving user experience and responsiveness. +- 📁 **Asynchronous File Upload with Polling**: Implemented an asynchronous file upload process with frontend polling to resolve gateway timeouts and improve reliability when uploading large files. This ensures that even lengthy file processing, such as embedding or transcription, does not block the user interface or lead to connection timeouts, providing a smoother experience for all file operations. +- 📈 **Database Performance Indexes and Migration Script**: Introduced new database indexes on the "chat", "tag", and "function" tables to significantly enhance query performance for SQLite and PostgreSQL installations. For existing deployments, a new Alembic migration script is included to seamlessly apply these indexes, ensuring faster filtering and sorting operations across the platform. +- ✨ **Enhanced Database Performance Options**: Introduced new configurable options to significantly improve database performance, especially for SQLite. This includes "DATABASE_ENABLE_SQLITE_WAL" to enable SQLite WAL (Write-Ahead Logging) mode for concurrent operations, and "DATABASE_DEDUPLICATE_INTERVAL" which, in conjunction with a new deduplication mechanism, reduces redundant updates to "user.last_active_at", minimizing write conflicts across all database types. +- 💾 **Save Temporary Chats Button**: Introduced a new 'Save Chat' button for conversations initiated in temporary mode. This allows users to permanently save valuable temporary conversations to their chat history, providing greater flexibility and ensuring important discussions are not lost. +- 📂 **Chat Movement Options in Menu**: Added the ability to move chats directly to folders from the chat menu. This enhances chat organization and allows users to manage their conversations more efficiently by relocating them between folders with ease. +- 💬 **Language-Aware Follow-Up Suggestions**: Enhanced the AI's follow-up question generation to dynamically adapt to the primary language of the current chat. Follow-up prompts will now be suggested in the same language the user and AI are conversing in, ensuring more natural and contextually relevant interactions. +- 👤 **Expanded User Profile Details**: Introduced new user profile fields including username, bio, gender, and date of birth, allowing for more comprehensive user customization and information management. This enhancement includes corresponding updates to the database schema, API, and user interface for seamless integration. +- 👥 **Direct Navigation to User Groups from User Edit**: Enhanced the user edit modal to include a direct link to the associated user group. This allows administrators to quickly navigate from a user's profile to their group settings, streamlining user and group management workflows. +- 🔧 **Enhanced External Tool Server Compatibility**: Improved handling of responses from external tool servers, allowing both the backend and frontend to process plain text content in addition to JSON, ensuring greater flexibility and integration with diverse tool outputs. +- 🗣️ **Enhanced Audio Transcription Language Fallback and Deepgram Support**: Implemented a robust language fallback mechanism for both OpenAI and Deepgram Speech-to-Text (STT) API calls. If a specified language parameter is not supported by the model or provider, the system will now intelligently retry the transcription without the language parameter or with a default, ensuring greater reliability and preventing failed API calls. This also specifically adds and refines support for the audio language parameter in Deepgram API integrations. +- ⚡ **Optimized Hybrid Search Performance for BM25 Weight Configuration**: Enhanced hybrid search to significantly improve performance when the BM25 weight is set to 0 or less. This optimization intelligently disables unnecessary collection retrieval and BM25 ranking calculations, leading to faster search results without impacting accuracy for configurations that do not utilize lexical search contributions. +- 🔒 **Configurable Code Interpreter Module Blacklist**: Introduced the "CODE_INTERPRETER_BLACKLISTED_MODULES" environment variable, allowing administrators to specify Python modules that are forbidden from being imported or executed within the code interpreter. This significantly enhances the security posture by mitigating risks associated with arbitrary code execution, such as unauthorized data access, system manipulation, or outbound connections. +- 🔐 **Enhanced OAuth Role Claim Handling**: Improved compatibility with diverse OAuth providers by allowing role claims to be supplied as single strings or integers, in addition to arrays. The system now automatically normalizes these single-value claims into arrays for consistent processing, streamlining integration with identity providers that format role data differently. +- ⚙️ **Configurable Tool Call Timeout**: Introduced the "AIOHTTP_CLIENT_TIMEOUT" environment variable, allowing administrators to specify custom timeout durations for external tool calls, which is crucial for integrations with tools that have varying or extended response times. +- 🛠️ **Improved Tool Callable Generation for Google genai SDK**: Enhanced the creation of tool callables to directly support native function calling within the Google 'genai' SDK. This refactoring ensures proper signature inference and removes extraneous parameters, enabling seamless integration for advanced AI workflows using Google's generative AI models. +- ✨ **Dynamic Loading of 'kokoro-js'**: Implemented dynamic loading for the 'kokoro-js' library, preventing failures and improving compatibility on older iOS browsers that may not support direct imports or certain modern JavaScript APIs like 'DecompressionStream'. +- 🖥️ **Improved Command List Visibility on Small Screens**: Resolved an issue where the top items in command lists (e.g., Knowledge Base, Models, Prompts) were hidden or overlapped by the header on smaller screen sizes or specific browser zoom levels. The command option lists now dynamically adjust their height, ensuring all items are fully visible and accessible with proper scrolling. +- 📦 **Improved Docker Image Compatibility for Arbitrary UIDs**: Fixed issues preventing the Open WebUI container from running in environments with arbitrary User IDs (UIDs), such as OpenShift's restricted Security Context Constraints (SCC). The Dockerfile has been updated to correctly set file system permissions for "/app" and "/root" directories, ensuring they are writable by processes running with a supplemental GID 0, thus resolving permission errors for Python libraries and application caches. +- ♿ **Accessibility Enhancements**: Significantly improved the semantic structure of chat messages by using "section", "h2", "ul", and "li" HTML tags, and enhanced screen reader compatibility by explicitly hiding decorative images with "aria-hidden" attributes. This refactoring provides clearer structural context and improves overall accessibility and web standards compliance for the conversation flow. +- 🌐 **Localization & Internationalization Improvements**: Significantly expanded internationalization support throughout the user interface, translating numerous user-facing strings in toast messages, placeholders, and other UI elements. This, alongside continuous refinement and expansion of translations for languages including Brazilian Portuguese, Kabyle (Taqbaylit), Czech, Finnish, Chinese (Simplified), Chinese (Traditional), and German, and general fixes for several other translation files, further enhances linguistic coverage and user experience. + +### Fixed + +- 🛡️ **Resolved Critical OIDC SSO Login Failure**: Fixed a critical issue where OIDC Single Sign-On (SSO) logins failed due to an error in setting the authentication token as a cookie during the redirect process. This ensures reliable and seamless authentication for users utilizing OIDC providers, restoring full login functionality that was impacted by previous security hardening. +- ⚡ **Prevented UI Blocking by Unreachable Webhooks**: Resolved a critical performance and user experience issue where synchronous webhook calls to unreachable or slow endpoints would block the entire user interface for all users. Webhook requests are now processed asynchronously using "aiohttp", ensuring that the UI remains responsive and functional even if webhook delivery encounters delays or failures. +- 🔒 **Password Change Option Hidden for Externally Authenticated Users**: Resolved an issue where the password change dialog was visible to users authenticated via external methods (e.g., LDAP, OIDC, Trusted Header). The option to change a password in user settings is now correctly hidden for these users, as their passwords are managed externally, streamlining the user interface and preventing confusion. +- 💬 **Resolved Temporary Chat and Permission Enforcement Issues**: Fixed a bug where temporary chats (identified by "chat_id = local") incorrectly triggered database checks, leading to 404 errors. This also resolves the issue where the 'USER_PERMISSIONS_CHAT_TEMPORARY_ENFORCED' setting was not functioning as intended, ensuring temporary chat mode now works correctly for user roles. +- 🔐 **Admin Model Visibility for Administrators**: Private models remained visible and usable for administrators in the chat model selector, even when the intended privacy setting ("ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS" - now renamed to "BYPASS_ADMIN_ACCESS_CONTROL") was disabled. This ensures consistent enforcement of model access controls and adherence to the principle of least privilege. +- 🔍 **Clarified Web Search Engine Label for DDGS**: Addressed user confusion and inaccurate labeling by renaming "duckduckgo" to "DDGS" (Dux Distributed Global Search) in the web search engine selector. This clarifies that the system utilizes DDGS, a metasearch library that aggregates results from various search providers, accurately reflecting its underlying functionality rather than implying exclusive use of DuckDuckGo's search engine. +- 🛠️ **Improved Settings UI Reactivity and Visibility**: Resolved an issue where settings tabs for 'Connections' and 'Tools' did not dynamically update their visibility based on global administrative feature flags (e.g., 'enable_direct_connections'). The UI now reactively shows or hides these sections, ensuring a consistent and clear experience when administrators control feature availability. +- 🎚️ **Restored Model and Banner Reordering Functionality**: Fixed a bug that prevented administrators from reordering models in the Admin Panel's 'Models' settings and banners in the 'Interface' settings via drag-and-drop. The sortable functionality has been restored, allowing for proper customization of display order. +- 📝 **Restored Custom Pending User Overlay Visibility**: Fixed an issue where the custom title and description configured for pending users were not visible. The application now correctly exposes these UI configuration settings to pending users, ensuring that the custom onboarding messages are displayed as intended. +- 📥 **Fixed Community Function Import Compatibility**: Resolved an issue that prevented the successful import of function files downloaded from openwebui.com due to schema differences. The system now correctly processes these files, allowing for seamless integration of community-contributed functions. +- 📦 **Fixed Stale Ollama Version in Docker Images**: Resolved an issue where the Ollama installation within Docker images could become stale due to caching during the build process. The Dockerfile now includes a mechanism to invalidate the build cache for the Ollama installation step, ensuring that the latest version of Ollama is always installed. +- 🗄️ **Improved Milvus Query Handling for Large Datasets**: Fixed a "MilvusException" that occurred when attempting to query more than 16384 entries from a Milvus collection. The query logic has been refactored to use "query_iterator()", enabling efficient fetching of larger result sets in batches and resolving the previous limitation on the number of entries that could be retrieved. +- 🐛 **Restored Message Toolbar Icons for Empty Messages with Files**: Fixed an issue where the edit, copy, and delete icons were not displayed on user messages that contained an attached file but no text content. This ensures full interaction capabilities for all message types, allowing users to manage their messages consistently. +- 💬 **Resolved Streaming Interruption for Kimi-Dev Models**: Fixed an issue where streaming responses from Kimi-Dev models would halt prematurely upon encountering specific 'thinking' tokens (◁think▷, ◁/think▷). The system now correctly processes these tokens, ensuring uninterrupted streaming and proper handling of hidden or collapsible thinking sections. +- 🔍 **Enhanced Knowledge Base Search Functionality**: Improved the search capability within the 'Knowledge' section of the Workspace. Previously, searching for knowledge bases required exact term matches or starting with the first letter. Now, the search algorithm has been refined to allow broader, less exact matches, making it easier and more intuitive to find relevant knowledge bases. +- 📝 **Resolved Chinese Input 'Enter' Key Issue (macOS & iOS Safari)**: Fixed a bug where pressing the 'Enter' key during text composition with Input Method Editors (IMEs) on macOS and iOS Safari browsers would prematurely send the message. The system now robustly handles the composition state by addressing a 'compositionend' event bug specific to Safari, ensuring a smooth and expected typing experience for users of various languages, including Chinese and Korean. +- 🔐 **Resolved OAUTH_GROUPS_CLAIM Configuration Issue**: Fixed a bug where the "OAUTH_GROUPS_CLAIM" environment variable was not correctly parsed due to a typo in the configuration file. This ensures that OAuth group management features, including automatic group creation, now correctly utilize the specified claim from the identity provider, allowing for seamless integration with external user directories like Keycloak. +- 🗄️ **Resolved Azure PostgreSQL pgvector Extension Permissions**: Fixed an issue preventing the creation of "pgvector" and "pgcrypto" extensions on Azure PostgreSQL Flexible Servers due to permission limitations (e.g., 'Only members of "azure_pg_admin" are allowed to use "CREATE EXTENSION"'). The extension creation process now includes a conditional check, ensuring seamless deployment and compatibility with Azure PostgreSQL environments even with restricted database user permissions. +- 🛠️ **Improved Backend Path Resolution and Alembic Stability**: Fixed issues causing Alembic database migrations to fail due to incorrect path resolution within the application. By implementing canonical path resolution for core directories and refining Alembic configuration, the robustness and correctness of internal pathing have been significantly enhanced, ensuring reliable database operations. +- 📊 **Resolved Arena Model Identification in Feedback History**: Fixed an issue where the model used for feedback in arena settings was incorrectly reported as 'arena-model' in the evaluation history. The system now correctly logs and displays the actual model ID that received the feedback, restoring clarity and enabling proper analysis of model performance in arena environments. +- 🎨 **Resolved Icon Overlap in 'Her' Theme**: Fixed a visual glitch in the 'Her' theme where icons would overlap on the loading screen and certain icons appeared incongruous. The display has been corrected to ensure proper visual presentation and theme consistency. +- 🛠️ **Resolved Model Sorting TypeError with Null Names**: Fixed a "TypeError" that occurred in the "/api/models" endpoint when sorting models with null or missing names. The model sorting logic has been improved to gracefully handle such edge cases by ensuring that model IDs and names are treated as empty strings if their values are null or undefined, preventing comparison errors and improving API stability. +- 💬 **Resolved Silently Dropped Streaming Response Chunks**: Fixed an issue where the final partial chunks of streaming chat responses could be silently dropped, leading to incomplete message delivery. The system now reliably flush any pending delta data upon stream termination, early breaks (e.g., code interpreter tags), or connection closure, ensuring complete and accurate response delivery. +- 📱 **Disabled Overscroll for iOS Frontend**: Fixed an issue where overscrolling was enabled on iOS devices, causing unexpected scrolling behavior over fixed or sticky elements within the PWA. Overscroll has now been disabled, providing a more native application-like experience for iOS users. +- 📝 **Resolved Code Block Input Issue with Shift+Enter**: Fixed a bug where typing three backticks followed by a language and then pressing Shift+Enter would cause the code block prefix to disappear, preventing proper code formatting. The system now correctly preserves the code block syntax, ensuring consistent behavior for multi-line code input. +- 🛠️ **Improved OpenAI Model List Handling for Null Names**: Fixed an edge case where some OpenAI-compatible API providers might return models with a null value for their 'name' field. This could lead to issues like broken model list sorting. The system now gracefully handles these instances by removing the null 'name' key, ensuring stable model retrieval and display. +- 🔍 **Resolved DDGS Concurrent Request Configuration**: Fixed an issue where the configured number of concurrent requests was not being honored for the DDGS (Dux Distributed Global Search) metasearch engine. The system now correctly applies the specified concurrency setting, improving efficiency for web searches. +- 🛠️ **Improved Tool List Synchronization in Multi-Replica Deployments**: Resolved an issue where tool updates were not consistently reflected across all instances in multi-replica environments, leading to stale tool lists for users on other replicas. The tool list in the message input menu is now automatically refreshed each time it is accessed, ensuring all users always see the most current set of available tools. +- 🛠️ **Resolved Duplicate Tool Name Collision**: Fixed an issue where tools with identical names from different external servers were silently removed, preventing their simultaneous use. The system now correctly handles tool name collisions by internally prefixing tools with their server identifier, allowing multiple instances of similarly named tools from different servers to be active and usable by LLMs. +- 🖼️ **Resolved Image Generation API Size Parameter Issue**: Fixed a bug where the "/api/v1/images/generations" API endpoint did not correctly apply the 'size' parameter specified in the request payload for image generation. The system now properly honors the requested image dimensions (e.g., '1980x1080'), ensuring that generated images match the user's explicit size preference rather than defaulting to settings. +- 🗄️ **Resolved S3 Vector Upload Limitations**: Fixed an issue that prevented uploading more than 500 vectors to S3 Vector buckets due to API limitations, which resulted in a "ValidationException". S3 vector uploads are now batched in groups of 500, ensuring successful processing of larger datasets. +- 🛠️ **Fixed Tool Installation Error During Startup**: Resolved a "NoneType" error that occurred during tool installation at startup when 'tool.user' was unexpectedly null. The system now includes a check to ensure 'tool.user' exists before attempting to access its properties, preventing crashes and ensuring robust tool initialization. +- 🛠️ **Improved Azure OpenAI GPT-5 Parameter Handling**: Fixed an issue with Azure OpenAI SDK parameter handling to correctly support GPT-5 models. The 'max_tokens' parameter is now appropriately converted to 'max_completion_tokens' for GPT-5 models, ensuring consistent behavior and proper function execution similar to existing o-series models. +- 🐛 **Resolved Exception with Missing Group Permissions**: Fixed an exception that occurred in the access control logic when group permission objects were missing or null. The system now correctly handles cases where groups may not have explicit permission definitions, ensuring that 'None' checks prevent errors and maintain application stability when processing user permissions. +- 🛠️ **Improved OpenAI API Base URL Handling**: Fixed an issue where a trailing slash in the 'OPENAI_API_BASE_URL' configuration could lead to models not being detected or the endpoint failing. The system now automatically removes trailing slashes from the configured URL, ensuring robust and consistent connections to OpenAI-compatible APIs. +- 🖼️ **Resolved S3-Compatible Storage Upload Failures**: Fixed an issue where uploads to S3-compatible storage providers would fail with an "XAmzContentSHA256Mismatch" error. The system now correctly handles checksum calculations, ensuring reliable file and image uploads to S3-compatible services. +- 🌐 **Corrected 'Releases' Link**: Fixed an issue where the 'Releases' button in the user menu directed to an incorrect URL, now correctly linking to the Open WebUI GitHub releases page. +- 🛠️ **Resolved Model Sorting Errors with Null or Undefined Names**: Fixed multiple "TypeError" instances that occurred when attempting to sort model lists where model names were null or undefined. The sorting logic across various UI components (including Ollama model selection, leaderboard, and admin model settings) has been made more robust by gracefully handling absent model names, preventing crashes and ensuring consistent alphabetical sorting based on available name or ID. +- 🎨 **Resolved Banner Dismissal Issue with Iteration IDs**: Fixed a bug where dismissing banners could lead to unintended multiple banner dismissals or other incorrect behavior, especially when banners lacked unique iteration IDs. Unique IDs are now assigned during banner iteration, ensuring proper individual dismissal and consistent display behavior. + +### Changed + +- 🛂 **Environment Variable for Admin Access Control**: The environment variable "ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS" has been renamed to "BYPASS_ADMIN_ACCESS_CONTROL". This new name more accurately reflects its function as a control to allow administrators to bypass model access restrictions. Users are encouraged to update their configurations to use the new variable name; existing configurations using the old name will still be honored for backward compatibility. +- 🗂️ **Core Directory Path Resolution Updated**: The internal mechanism for resolving core application directory paths ("OPEN_WEBUI_DIR", "BACKEND_DIR", "BASE_DIR") has been updated to use canonical resolution via "Path().resolve()". This change improves path reliability but may require adjustments for any external scripts or configurations that previously relied on specific non-canonical path interpretations. +- 🗃️ **Database Performance Options**: New database performance options, "DATABASE_ENABLE_SQLITE_WAL" and "DATABASE_DEDUPLICATE_INTERVAL", are now available. If "DATABASE_ENABLE_SQLITE_WAL" is enabled, SQLite will operate in WAL mode, which may alter SQLite's file locking behavior. If "DATABASE_DEDUPLICATE_INTERVAL" is set to a non-zero value, the "user.last_active_at" timestamp will be updated less frequently, leading to slightly less real-time accuracy for this specific field but significantly reducing database write conflicts and improving overall performance. Both options are disabled by default. +- 🌐 **Renamed Web Search Concurrency Setting**: The environment variable "WEB_SEARCH_CONCURRENT_REQUESTS" has been renamed to "WEB_LOADER_CONCURRENT_REQUESTS". This change clarifies its scope, explicitly applying to the concurrency of the web loader component (which fetches content from search results) rather than the initial search engine query. Users relying on the old environment variable name for configuring web search concurrency must update their configurations to use "WEB_LOADER_CONCURRENT_REQUESTS". + ## [0.6.22] - 2025-08-11 ### Added diff --git a/Dockerfile b/Dockerfile index 5747680e86..83a74365f0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -108,29 +108,13 @@ RUN echo -n 00000000-0000-0000-0000-000000000000 > $HOME/.cache/chroma/telemetry # Make sure the user has access to the app and root directory RUN chown -R $UID:$GID /app $HOME -RUN if [ "$USE_OLLAMA" = "true" ]; then \ - apt-get update && \ - # Install pandoc and netcat - apt-get install -y --no-install-recommends git build-essential pandoc netcat-openbsd curl && \ - apt-get install -y --no-install-recommends gcc python3-dev && \ - # for RAG OCR - apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ - # install helper tools - apt-get install -y --no-install-recommends curl jq && \ - # install ollama - curl -fsSL https://ollama.com/install.sh | sh && \ - # cleanup - rm -rf /var/lib/apt/lists/*; \ - else \ - apt-get update && \ - # Install pandoc, netcat and gcc - apt-get install -y --no-install-recommends git build-essential pandoc gcc netcat-openbsd curl jq && \ - apt-get install -y --no-install-recommends gcc python3-dev && \ - # for RAG OCR - apt-get install -y --no-install-recommends ffmpeg libsm6 libxext6 && \ - # cleanup - rm -rf /var/lib/apt/lists/*; \ - fi +# Install common system dependencies +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + git build-essential pandoc gcc netcat-openbsd curl jq \ + python3-dev \ + ffmpeg libsm6 libxext6 \ + && rm -rf /var/lib/apt/lists/* # install python dependencies COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt @@ -152,7 +136,13 @@ RUN pip3 install --no-cache-dir uv && \ fi; \ chown -R $UID:$GID /app/backend/data/ - +# Install Ollama if requested +RUN if [ "$USE_OLLAMA" = "true" ]; then \ + date +%s > /tmp/ollama_build_hash && \ + echo "Cache broken at timestamp: `cat /tmp/ollama_build_hash`" && \ + curl -fsSL https://ollama.com/install.sh | sh && \ + rm -rf /var/lib/apt/lists/*; \ + fi # copy embedding weight from build # RUN mkdir -p /root/.cache/chroma/onnx_models/all-MiniLM-L6-v2 @@ -170,6 +160,15 @@ EXPOSE 8080 HEALTHCHECK CMD curl --silent --fail http://localhost:${PORT:-8080}/health | jq -ne 'input.status == true' || exit 1 +# Minimal, atomic permission hardening for OpenShift (arbitrary UID): +# - Group 0 owns /app and /root +# - Directories are group-writable and have SGID so new files inherit GID 0 +RUN set -eux; \ + chgrp -R 0 /app /root || true; \ + chmod -R g+rwX /app /root || true; \ + find /app -type d -exec chmod g+s {} + || true; \ + find /root -type d -exec chmod g+s {} + || true + USER $UID:$GID ARG BUILD_HASH diff --git a/backend/open_webui/alembic.ini b/backend/open_webui/alembic.ini index 4eff85f0c6..dccd8a3c12 100644 --- a/backend/open_webui/alembic.ini +++ b/backend/open_webui/alembic.ini @@ -10,7 +10,7 @@ script_location = migrations # sys.path path, will be prepended to sys.path if present. # defaults to the current working directory. -prepend_sys_path = . +prepend_sys_path = .. # timezone to use when rendering the date within the migration file # as well as the filename. diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 43c864ef22..d3b7c9314c 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -510,7 +510,7 @@ OAUTH_EMAIL_CLAIM = PersistentConfig( OAUTH_GROUPS_CLAIM = PersistentConfig( "OAUTH_GROUPS_CLAIM", "oauth.oidc.group_claim", - os.environ.get("OAUTH_GROUP_CLAIM", "groups"), + os.environ.get("OAUTH_GROUPS_CLAIM", os.environ.get("OAUTH_GROUP_CLAIM", "groups")), ) ENABLE_OAUTH_ROLE_MANAGEMENT = PersistentConfig( @@ -953,6 +953,9 @@ GEMINI_API_BASE_URL = os.environ.get("GEMINI_API_BASE_URL", "") if OPENAI_API_BASE_URL == "": OPENAI_API_BASE_URL = "https://api.openai.com/v1" +else: + if OPENAI_API_BASE_URL.endswith("/"): + OPENAI_API_BASE_URL = OPENAI_API_BASE_URL[:-1] OPENAI_API_KEYS = os.environ.get("OPENAI_API_KEYS", "") OPENAI_API_KEYS = OPENAI_API_KEYS if OPENAI_API_KEYS != "" else OPENAI_API_KEY @@ -1355,6 +1358,14 @@ ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS = ( os.environ.get("ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS", "True").lower() == "true" ) +BYPASS_ADMIN_ACCESS_CONTROL = ( + os.environ.get( + "BYPASS_ADMIN_ACCESS_CONTROL", + os.environ.get("ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS", "True"), + ).lower() + == "true" +) + ENABLE_ADMIN_CHAT_ACCESS = ( os.environ.get("ENABLE_ADMIN_CHAT_ACCESS", "True").lower() == "true" ) @@ -1565,7 +1576,7 @@ FOLLOW_UP_GENERATION_PROMPT_TEMPLATE = PersistentConfig( ) DEFAULT_FOLLOW_UP_GENERATION_PROMPT_TEMPLATE = """### Task: -Suggest 3-5 relevant follow-up questions or prompts that the user might naturally ask next in this conversation as a **user**, based on the chat history, to help continue or deepen the discussion. +Suggest 3-5 relevant follow-up questions or prompts in the chat's primary language that the user might naturally ask next in this conversation as a **user**, based on the chat history, to help continue or deepen the discussion. ### Guidelines: - Write all follow-up questions from the user’s point of view, directed to the assistant. - Make questions concise, clear, and directly related to the discussed topic(s). @@ -1857,6 +1868,11 @@ CODE_INTERPRETER_JUPYTER_TIMEOUT = PersistentConfig( ), ) +CODE_INTERPRETER_BLOCKED_MODULES = [ + library.strip() + for library in os.environ.get("CODE_INTERPRETER_BLOCKED_MODULES", "").split(",") + if library.strip() +] DEFAULT_CODE_INTERPRETER_PROMPT = """ #### Tools Available @@ -2611,6 +2627,14 @@ WEB_LOADER_ENGINE = PersistentConfig( os.environ.get("WEB_LOADER_ENGINE", ""), ) + +WEB_LOADER_CONCURRENT_REQUESTS = PersistentConfig( + "WEB_LOADER_CONCURRENT_REQUESTS", + "rag.web.loader.concurrent_requests", + int(os.getenv("WEB_LOADER_CONCURRENT_REQUESTS", "10")), +) + + ENABLE_WEB_LOADER_SSL_VERIFICATION = PersistentConfig( "ENABLE_WEB_LOADER_SSL_VERIFICATION", "rag.web.loader.ssl_verification", diff --git a/backend/open_webui/env.py b/backend/open_webui/env.py index e561036408..83625031ea 100644 --- a/backend/open_webui/env.py +++ b/backend/open_webui/env.py @@ -17,14 +17,17 @@ from open_webui.constants import ERROR_MESSAGES # Load .env file #################################### -OPEN_WEBUI_DIR = Path(__file__).parent # the path containing this file -print(OPEN_WEBUI_DIR) +# Use .resolve() to get the canonical path, removing any '..' or '.' components +ENV_FILE_PATH = Path(__file__).resolve() -BACKEND_DIR = OPEN_WEBUI_DIR.parent # the path containing this file -BASE_DIR = BACKEND_DIR.parent # the path containing the backend/ +# OPEN_WEBUI_DIR should be the directory where env.py resides (open_webui/) +OPEN_WEBUI_DIR = ENV_FILE_PATH.parent -print(BACKEND_DIR) -print(BASE_DIR) +# BACKEND_DIR is the parent of OPEN_WEBUI_DIR (backend/) +BACKEND_DIR = OPEN_WEBUI_DIR.parent + +# BASE_DIR is the parent of BACKEND_DIR (open-webui-dev/) +BASE_DIR = BACKEND_DIR.parent try: from dotenv import find_dotenv, load_dotenv @@ -336,6 +339,21 @@ else: except Exception: DATABASE_POOL_RECYCLE = 3600 +DATABASE_ENABLE_SQLITE_WAL = ( + os.environ.get("DATABASE_ENABLE_SQLITE_WAL", "False").lower() == "true" +) + +DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL = os.environ.get( + "DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL", None +) +if DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL is not None: + try: + DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL = float( + DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL + ) + except Exception: + DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL = 0.0 + RESET_CONFIG_ON_START = ( os.environ.get("RESET_CONFIG_ON_START", "False").lower() == "true" ) @@ -677,6 +695,7 @@ AUDIT_EXCLUDED_PATHS = [path.lstrip("/") for path in AUDIT_EXCLUDED_PATHS] #################################### ENABLE_OTEL = os.environ.get("ENABLE_OTEL", "False").lower() == "true" +ENABLE_OTEL_TRACES = os.environ.get("ENABLE_OTEL_TRACES", "False").lower() == "true" ENABLE_OTEL_METRICS = os.environ.get("ENABLE_OTEL_METRICS", "False").lower() == "true" ENABLE_OTEL_LOGS = os.environ.get("ENABLE_OTEL_LOGS", "False").lower() == "true" diff --git a/backend/open_webui/functions.py b/backend/open_webui/functions.py index 6eb5c1bbdb..d8f2a61257 100644 --- a/backend/open_webui/functions.py +++ b/backend/open_webui/functions.py @@ -47,7 +47,7 @@ from open_webui.utils.misc import ( ) from open_webui.utils.payload import ( apply_model_params_to_body_openai, - apply_model_system_prompt_to_body, + apply_system_prompt_to_body, ) @@ -253,9 +253,7 @@ async def generate_function_chat_completion( if params: system = params.pop("system", None) form_data = apply_model_params_to_body_openai(params, form_data) - form_data = apply_model_system_prompt_to_body( - system, form_data, metadata, user - ) + form_data = apply_system_prompt_to_body(system, form_data, metadata, user) pipe_id = get_pipe_id(form_data) function_module = get_function_module_by_id(request, pipe_id) diff --git a/backend/open_webui/internal/db.py b/backend/open_webui/internal/db.py index d7a200ff20..b6913d87b0 100644 --- a/backend/open_webui/internal/db.py +++ b/backend/open_webui/internal/db.py @@ -14,9 +14,10 @@ from open_webui.env import ( DATABASE_POOL_RECYCLE, DATABASE_POOL_SIZE, DATABASE_POOL_TIMEOUT, + DATABASE_ENABLE_SQLITE_WAL, ) from peewee_migrate import Router -from sqlalchemy import Dialect, create_engine, MetaData, types +from sqlalchemy import Dialect, create_engine, MetaData, event, types from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.orm import scoped_session, sessionmaker from sqlalchemy.pool import QueuePool, NullPool @@ -114,6 +115,16 @@ elif "sqlite" in SQLALCHEMY_DATABASE_URL: engine = create_engine( SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False} ) + + def on_connect(dbapi_connection, connection_record): + cursor = dbapi_connection.cursor() + if DATABASE_ENABLE_SQLITE_WAL: + cursor.execute("PRAGMA journal_mode=WAL") + else: + cursor.execute("PRAGMA journal_mode=DELETE") + cursor.close() + + event.listen(engine, "connect", on_connect) else: if isinstance(DATABASE_POOL_SIZE, int): if DATABASE_POOL_SIZE > 0: diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index 076d4c486d..cbaefe1f3e 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -57,6 +57,7 @@ from open_webui.utils.logger import start_logger from open_webui.socket.main import ( app as socket_app, periodic_usage_pool_cleanup, + get_event_emitter, get_models_in_use, get_active_user_ids, ) @@ -185,6 +186,7 @@ from open_webui.config import ( FIRECRAWL_API_BASE_URL, FIRECRAWL_API_KEY, WEB_LOADER_ENGINE, + WEB_LOADER_CONCURRENT_REQUESTS, WHISPER_MODEL, WHISPER_VAD_FILTER, WHISPER_LANGUAGE, @@ -327,6 +329,7 @@ from open_webui.config import ( ENABLE_MESSAGE_RATING, ENABLE_USER_WEBHOOKS, ENABLE_EVALUATION_ARENA_MODELS, + BYPASS_ADMIN_ACCESS_CONTROL, USER_PERMISSIONS, DEFAULT_USER_ROLE, PENDING_USER_OVERLAY_CONTENT, @@ -375,6 +378,7 @@ from open_webui.config import ( RESPONSE_WATERMARK, # Admin ENABLE_ADMIN_CHAT_ACCESS, + BYPASS_ADMIN_ACCESS_CONTROL, ENABLE_ADMIN_EXPORT, # Tasks TASK_MODEL, @@ -463,6 +467,7 @@ from open_webui.utils.redis import get_redis_connection from open_webui.tasks import ( redis_task_command_listener, list_task_ids_by_item_id, + create_task, stop_task, list_tasks, ) # Import from tasks.py @@ -853,7 +858,10 @@ app.state.config.WEB_SEARCH_ENGINE = WEB_SEARCH_ENGINE app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = WEB_SEARCH_DOMAIN_FILTER_LIST app.state.config.WEB_SEARCH_RESULT_COUNT = WEB_SEARCH_RESULT_COUNT app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = WEB_SEARCH_CONCURRENT_REQUESTS + app.state.config.WEB_LOADER_ENGINE = WEB_LOADER_ENGINE +app.state.config.WEB_LOADER_CONCURRENT_REQUESTS = WEB_LOADER_CONCURRENT_REQUESTS + app.state.config.WEB_SEARCH_TRUST_ENV = WEB_SEARCH_TRUST_ENV app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL = ( BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL @@ -916,14 +924,19 @@ try: app.state.config.RAG_EMBEDDING_MODEL, RAG_EMBEDDING_MODEL_AUTO_UPDATE, ) - - app.state.rf = get_rf( - app.state.config.RAG_RERANKING_ENGINE, - app.state.config.RAG_RERANKING_MODEL, - app.state.config.RAG_EXTERNAL_RERANKER_URL, - app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, - RAG_RERANKING_MODEL_AUTO_UPDATE, - ) + if ( + app.state.config.ENABLE_RAG_HYBRID_SEARCH + and not app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + ): + app.state.rf = get_rf( + app.state.config.RAG_RERANKING_ENGINE, + app.state.config.RAG_RERANKING_MODEL, + app.state.config.RAG_EXTERNAL_RERANKER_URL, + app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, + RAG_RERANKING_MODEL_AUTO_UPDATE, + ) + else: + app.state.rf = None except Exception as e: log.error(f"Error updating models: {e}") pass @@ -1281,8 +1294,12 @@ async def get_models( model_info = Models.get_model_by_id(model["id"]) if model_info: - if user.id == model_info.user_id or has_access( - user.id, type="read", access_control=model_info.access_control + if ( + (user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL) + or user.id == model_info.user_id + or has_access( + user.id, type="read", access_control=model_info.access_control + ) ): filtered_models.append(model) @@ -1317,11 +1334,17 @@ async def get_models( model_order_dict = {model_id: i for i, model_id in enumerate(model_order_list)} # Sort models by order list priority, with fallback for those not in the list models.sort( - key=lambda x: (model_order_dict.get(x["id"], float("inf")), x["name"]) + key=lambda model: ( + model_order_dict.get(model.get("id", ""), float("inf")), + (model.get("name", "") or ""), + ) ) # Filter out models that the user does not have access to - if user.role == "user" and not BYPASS_MODEL_ACCESS_CONTROL: + if ( + user.role == "user" + or (user.role == "admin" and not BYPASS_ADMIN_ACCESS_CONTROL) + ) and not BYPASS_MODEL_ACCESS_CONTROL: models = get_filtered_models(models, user) log.debug( @@ -1392,7 +1415,9 @@ async def chat_completion( model_info = Models.get_model_by_id(model_id) # Check if user has access to the model - if not BYPASS_MODEL_ACCESS_CONTROL and user.role == "user": + if not BYPASS_MODEL_ACCESS_CONTROL and ( + user.role != "admin" or not BYPASS_ADMIN_ACCESS_CONTROL + ): try: check_model_access(user, model) except Exception as e: @@ -1444,66 +1469,89 @@ async def chat_completion( } if metadata.get("chat_id") and (user and user.role != "admin"): - chat = Chats.get_chat_by_id_and_user_id(metadata["chat_id"], user.id) - if chat is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=ERROR_MESSAGES.DEFAULT(), - ) + if metadata["chat_id"] != "local": + chat = Chats.get_chat_by_id_and_user_id(metadata["chat_id"], user.id) + if chat is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.DEFAULT(), + ) request.state.metadata = metadata form_data["metadata"] = metadata - form_data, metadata, events = await process_chat_payload( - request, form_data, user, metadata, model - ) except Exception as e: - log.debug(f"Error processing chat payload: {e}") - if metadata.get("chat_id") and metadata.get("message_id"): - # Update the chat message with the error - Chats.upsert_message_to_chat_by_id_and_message_id( - metadata["chat_id"], - metadata["message_id"], - { - "error": {"content": str(e)}, - }, - ) - + log.debug(f"Error processing chat metadata: {e}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, detail=str(e), ) - try: - response = await chat_completion_handler(request, form_data, user) - if metadata.get("chat_id") and metadata.get("message_id"): - Chats.upsert_message_to_chat_by_id_and_message_id( - metadata["chat_id"], - metadata["message_id"], - { - "model": model_id, - }, + async def process_chat(request, form_data, user, metadata, model): + try: + form_data, metadata, events = await process_chat_payload( + request, form_data, user, metadata, model ) - return await process_chat_response( - request, response, form_data, user, metadata, model, events, tasks - ) - except Exception as e: - log.debug(f"Error in chat completion: {e}") - if metadata.get("chat_id") and metadata.get("message_id"): - # Update the chat message with the error - Chats.upsert_message_to_chat_by_id_and_message_id( - metadata["chat_id"], - metadata["message_id"], - { - "error": {"content": str(e)}, - }, + response = await chat_completion_handler(request, form_data, user) + if metadata.get("chat_id") and metadata.get("message_id"): + try: + Chats.upsert_message_to_chat_by_id_and_message_id( + metadata["chat_id"], + metadata["message_id"], + { + "model": model_id, + }, + ) + except: + pass + + return await process_chat_response( + request, response, form_data, user, metadata, model, events, tasks + ) + except asyncio.CancelledError: + log.info("Chat processing was cancelled") + try: + event_emitter = get_event_emitter(metadata) + await event_emitter( + {"type": "task-cancelled"}, + ) + except Exception as e: + pass + except Exception as e: + log.debug(f"Error processing chat payload: {e}") + if metadata.get("chat_id") and metadata.get("message_id"): + # Update the chat message with the error + try: + Chats.upsert_message_to_chat_by_id_and_message_id( + metadata["chat_id"], + metadata["message_id"], + { + "error": {"content": str(e)}, + }, + ) + except: + pass + + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=str(e), ) - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e), + if ( + metadata.get("session_id") + and metadata.get("chat_id") + and metadata.get("message_id") + ): + # Asynchronous Chat Processing + task_id, _ = await create_task( + request.app.state.redis, + process_chat(request, form_data, user, metadata, model), + id=metadata["chat_id"], ) + return {"status": True, "task_id": task_id} + else: + return await process_chat(request, form_data, user, metadata, model) # Alias for chat_completion (Legacy) @@ -1704,6 +1752,16 @@ async def get_app_config(request: Request): } if user is not None and (user.role in ["admin", "user"]) else { + **( + { + "ui": { + "pending_user_overlay_title": app.state.config.PENDING_USER_OVERLAY_TITLE, + "pending_user_overlay_content": app.state.config.PENDING_USER_OVERLAY_CONTENT, + } + } + if user and user.role == "pending" + else {} + ), **( { "metadata": { @@ -1717,7 +1775,7 @@ async def get_app_config(request: Request): } if app.state.LICENSE_METADATA else {} - ) + ), } ), } diff --git a/backend/open_webui/migrations/versions/018012973d35_add_indexes.py b/backend/open_webui/migrations/versions/018012973d35_add_indexes.py new file mode 100644 index 0000000000..29af427108 --- /dev/null +++ b/backend/open_webui/migrations/versions/018012973d35_add_indexes.py @@ -0,0 +1,46 @@ +"""Add indexes + +Revision ID: 018012973d35 +Revises: d31026856c01 +Create Date: 2025-08-13 03:00:00.000000 + +""" + +from alembic import op +import sqlalchemy as sa + +revision = "018012973d35" +down_revision = "d31026856c01" +branch_labels = None +depends_on = None + + +def upgrade(): + # Chat table indexes + op.create_index("folder_id_idx", "chat", ["folder_id"]) + op.create_index("user_id_pinned_idx", "chat", ["user_id", "pinned"]) + op.create_index("user_id_archived_idx", "chat", ["user_id", "archived"]) + op.create_index("updated_at_user_id_idx", "chat", ["updated_at", "user_id"]) + op.create_index("folder_id_user_id_idx", "chat", ["folder_id", "user_id"]) + + # Tag table index + op.create_index("user_id_idx", "tag", ["user_id"]) + + # Function table index + op.create_index("is_global_idx", "function", ["is_global"]) + + +def downgrade(): + # Chat table indexes + op.drop_index("folder_id_idx", table_name="chat") + op.drop_index("user_id_pinned_idx", table_name="chat") + op.drop_index("user_id_archived_idx", table_name="chat") + op.drop_index("updated_at_user_id_idx", table_name="chat") + op.drop_index("folder_id_user_id_idx", table_name="chat") + + # Tag table index + op.drop_index("user_id_idx", table_name="tag") + + # Function table index + + op.drop_index("is_global_idx", table_name="function") diff --git a/backend/open_webui/migrations/versions/3af16a1c9fb6_update_user_table.py b/backend/open_webui/migrations/versions/3af16a1c9fb6_update_user_table.py new file mode 100644 index 0000000000..ab980f27ce --- /dev/null +++ b/backend/open_webui/migrations/versions/3af16a1c9fb6_update_user_table.py @@ -0,0 +1,32 @@ +"""update user table + +Revision ID: 3af16a1c9fb6 +Revises: 018012973d35 +Create Date: 2025-08-21 02:07:18.078283 + +""" + +from typing import Sequence, Union + +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision: str = "3af16a1c9fb6" +down_revision: Union[str, None] = "018012973d35" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + op.add_column("user", sa.Column("username", sa.String(length=50), nullable=True)) + op.add_column("user", sa.Column("bio", sa.Text(), nullable=True)) + op.add_column("user", sa.Column("gender", sa.Text(), nullable=True)) + op.add_column("user", sa.Column("date_of_birth", sa.Date(), nullable=True)) + + +def downgrade() -> None: + op.drop_column("user", "username") + op.drop_column("user", "bio") + op.drop_column("user", "gender") + op.drop_column("user", "date_of_birth") diff --git a/backend/open_webui/models/auths.py b/backend/open_webui/models/auths.py index 3ad88bc119..6517e21345 100644 --- a/backend/open_webui/models/auths.py +++ b/backend/open_webui/models/auths.py @@ -73,11 +73,6 @@ class ProfileImageUrlForm(BaseModel): profile_image_url: str -class UpdateProfileForm(BaseModel): - profile_image_url: str - name: str - - class UpdatePasswordForm(BaseModel): password: str new_password: str diff --git a/backend/open_webui/models/chats.py b/backend/open_webui/models/chats.py index a70af898d4..56f992806a 100644 --- a/backend/open_webui/models/chats.py +++ b/backend/open_webui/models/chats.py @@ -10,7 +10,7 @@ from open_webui.models.folders import Folders from open_webui.env import SRC_LOG_LEVELS from pydantic import BaseModel, ConfigDict -from sqlalchemy import BigInteger, Boolean, Column, String, Text, JSON +from sqlalchemy import BigInteger, Boolean, Column, String, Text, JSON, Index from sqlalchemy import or_, func, select, and_, text from sqlalchemy.sql import exists from sqlalchemy.sql.expression import bindparam @@ -41,6 +41,20 @@ class Chat(Base): meta = Column(JSON, server_default="{}") folder_id = Column(Text, nullable=True) + __table_args__ = ( + # Performance indexes for common queries + # WHERE folder_id = ... + Index("folder_id_idx", "folder_id"), + # WHERE user_id = ... AND pinned = ... + Index("user_id_pinned_idx", "user_id", "pinned"), + # WHERE user_id = ... AND archived = ... + Index("user_id_archived_idx", "user_id", "archived"), + # WHERE user_id = ... ORDER BY updated_at DESC + Index("updated_at_user_id_idx", "updated_at", "user_id"), + # WHERE folder_id = ... AND user_id = ... + Index("folder_id_user_id_idx", "folder_id", "user_id"), + ) + class ChatModel(BaseModel): model_config = ConfigDict(from_attributes=True) diff --git a/backend/open_webui/models/functions.py b/backend/open_webui/models/functions.py index e98771fa02..7530573e79 100644 --- a/backend/open_webui/models/functions.py +++ b/backend/open_webui/models/functions.py @@ -6,7 +6,7 @@ from open_webui.internal.db import Base, JSONField, get_db from open_webui.models.users import Users from open_webui.env import SRC_LOG_LEVELS from pydantic import BaseModel, ConfigDict -from sqlalchemy import BigInteger, Boolean, Column, String, Text +from sqlalchemy import BigInteger, Boolean, Column, String, Text, Index log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MODELS"]) @@ -31,6 +31,8 @@ class Function(Base): updated_at = Column(BigInteger) created_at = Column(BigInteger) + __table_args__ = (Index("is_global_idx", "is_global"),) + class FunctionMeta(BaseModel): description: Optional[str] = None @@ -250,9 +252,7 @@ class FunctionsTable: return user_settings["functions"]["valves"].get(id, {}) except Exception as e: - log.exception( - f"Error getting user values by id {id} and user id {user_id}: {e}" - ) + log.exception(f"Error getting user values by id {id} and user id {user_id}") return None def update_user_valves_by_id_and_user_id( diff --git a/backend/open_webui/models/tags.py b/backend/open_webui/models/tags.py index 279dc624d5..e1cbb68a0b 100644 --- a/backend/open_webui/models/tags.py +++ b/backend/open_webui/models/tags.py @@ -8,7 +8,7 @@ from open_webui.internal.db import Base, get_db from open_webui.env import SRC_LOG_LEVELS from pydantic import BaseModel, ConfigDict -from sqlalchemy import BigInteger, Column, String, JSON, PrimaryKeyConstraint +from sqlalchemy import BigInteger, Column, String, JSON, PrimaryKeyConstraint, Index log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MODELS"]) @@ -24,6 +24,11 @@ class Tag(Base): user_id = Column(String) meta = Column(JSON, nullable=True) + __table_args__ = ( + PrimaryKeyConstraint("id", "user_id", name="pk_id_user_id"), + Index("user_id_idx", "user_id"), + ) + # Unique constraint ensuring (id, user_id) is unique, not just the `id` column __table_args__ = (PrimaryKeyConstraint("id", "user_id", name="pk_id_user_id"),) diff --git a/backend/open_webui/models/tools.py b/backend/open_webui/models/tools.py index 68a83ea42c..7f1409a900 100644 --- a/backend/open_webui/models/tools.py +++ b/backend/open_webui/models/tools.py @@ -175,7 +175,7 @@ class ToolsTable: tool = db.get(Tool, id) return tool.valves if tool.valves else {} except Exception as e: - log.exception(f"Error getting tool valves by id {id}: {e}") + log.exception(f"Error getting tool valves by id {id}") return None def update_tool_valves_by_id(self, id: str, valves: dict) -> Optional[ToolValves]: diff --git a/backend/open_webui/models/users.py b/backend/open_webui/models/users.py index 60b6ad0c10..620a746eed 100644 --- a/backend/open_webui/models/users.py +++ b/backend/open_webui/models/users.py @@ -4,14 +4,17 @@ from typing import Optional from open_webui.internal.db import Base, JSONField, get_db +from open_webui.env import DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL from open_webui.models.chats import Chats from open_webui.models.groups import Groups +from open_webui.utils.misc import throttle from pydantic import BaseModel, ConfigDict -from sqlalchemy import BigInteger, Column, String, Text +from sqlalchemy import BigInteger, Column, String, Text, Date from sqlalchemy import or_ +import datetime #################### # User DB Schema @@ -23,20 +26,28 @@ class User(Base): id = Column(String, primary_key=True) name = Column(String) + email = Column(String) + username = Column(String(50), nullable=True) + role = Column(String) profile_image_url = Column(Text) - last_active_at = Column(BigInteger) - updated_at = Column(BigInteger) - created_at = Column(BigInteger) + bio = Column(Text, nullable=True) + gender = Column(Text, nullable=True) + date_of_birth = Column(Date, nullable=True) + + info = Column(JSONField, nullable=True) + settings = Column(JSONField, nullable=True) api_key = Column(String, nullable=True, unique=True) - settings = Column(JSONField, nullable=True) - info = Column(JSONField, nullable=True) - oauth_sub = Column(Text, unique=True) + last_active_at = Column(BigInteger) + + updated_at = Column(BigInteger) + created_at = Column(BigInteger) + class UserSettings(BaseModel): ui: Optional[dict] = {} @@ -47,20 +58,27 @@ class UserSettings(BaseModel): class UserModel(BaseModel): id: str name: str + email: str + username: Optional[str] = None + role: str = "pending" profile_image_url: str + bio: Optional[str] = None + gender: Optional[str] = None + date_of_birth: Optional[datetime.date] = None + + info: Optional[dict] = None + settings: Optional[UserSettings] = None + + api_key: Optional[str] = None + oauth_sub: Optional[str] = None + last_active_at: int # timestamp in epoch updated_at: int # timestamp in epoch created_at: int # timestamp in epoch - api_key: Optional[str] = None - settings: Optional[UserSettings] = None - info: Optional[dict] = None - - oauth_sub: Optional[str] = None - model_config = ConfigDict(from_attributes=True) @@ -69,6 +87,14 @@ class UserModel(BaseModel): #################### +class UpdateProfileForm(BaseModel): + profile_image_url: str + name: str + bio: Optional[str] = None + gender: Optional[str] = None + date_of_birth: Optional[datetime.date] = None + + class UserListResponse(BaseModel): users: list[UserModel] total: int @@ -311,6 +337,7 @@ class UsersTable: except Exception: return None + @throttle(DATABASE_USER_ACTIVE_STATUS_UPDATE_INTERVAL) def update_user_last_active_by_id(self, id: str) -> Optional[UserModel]: try: with get_db() as db: @@ -346,7 +373,8 @@ class UsersTable: user = db.query(User).filter_by(id=id).first() return UserModel.model_validate(user) # return UserModel(**user.dict()) - except Exception: + except Exception as e: + print(e) return None def update_user_settings_by_id(self, id: str, updated: dict) -> Optional[UserModel]: diff --git a/backend/open_webui/retrieval/utils.py b/backend/open_webui/retrieval/utils.py index 539adda329..aeaa741c94 100644 --- a/backend/open_webui/retrieval/utils.py +++ b/backend/open_webui/retrieval/utils.py @@ -124,12 +124,14 @@ def query_doc_with_hybrid_search( hybrid_bm25_weight: float, ) -> dict: try: - log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") - bm25_retriever = BM25Retriever.from_texts( - texts=collection_result.documents[0], - metadatas=collection_result.metadatas[0], - ) - bm25_retriever.k = k + # BM_25 required only if weight is greater than 0 + if hybrid_bm25_weight > 0: + log.debug(f"query_doc_with_hybrid_search:doc {collection_name}") + bm25_retriever = BM25Retriever.from_texts( + texts=collection_result.documents[0], + metadatas=collection_result.metadatas[0], + ) + bm25_retriever.k = k vector_search_retriever = VectorSearchRetriever( collection_name=collection_name, @@ -337,18 +339,22 @@ def query_collection_with_hybrid_search( # Fetch collection data once per collection sequentially # Avoid fetching the same data multiple times later collection_results = {} - for collection_name in collection_names: - try: - log.debug( - f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}" - ) - collection_results[collection_name] = VECTOR_DB_CLIENT.get( - collection_name=collection_name - ) - except Exception as e: - log.exception(f"Failed to fetch collection {collection_name}: {e}") - collection_results[collection_name] = None - + # Only retrieve entire collection if bm_25 calculation is required + if hybrid_bm25_weight > 0: + for collection_name in collection_names: + try: + log.debug( + f"query_collection_with_hybrid_search:VECTOR_DB_CLIENT.get:collection {collection_name}" + ) + collection_results[collection_name] = VECTOR_DB_CLIENT.get( + collection_name=collection_name + ) + except Exception as e: + log.exception(f"Failed to fetch collection {collection_name}: {e}") + collection_results[collection_name] = None + else: + for collection_name in collection_names: + collection_results[collection_name] = [] log.info( f"Starting hybrid search for {len(queries)} queries in {len(collection_names)} collections..." ) @@ -946,6 +952,7 @@ class RerankCompressor(BaseDocumentCompressor): ) -> Sequence[Document]: reranking = self.reranking_function is not None + scores = None if reranking: scores = self.reranking_function( [(query, doc.page_content) for doc in documents] @@ -959,22 +966,31 @@ class RerankCompressor(BaseDocumentCompressor): ) scores = util.cos_sim(query_embedding, document_embedding)[0] - docs_with_scores = list( - zip(documents, scores.tolist() if not isinstance(scores, list) else scores) - ) - if self.r_score: - docs_with_scores = [ - (d, s) for d, s in docs_with_scores if s >= self.r_score - ] - - result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True) - final_results = [] - for doc, doc_score in result[: self.top_n]: - metadata = doc.metadata - metadata["score"] = doc_score - doc = Document( - page_content=doc.page_content, - metadata=metadata, + if scores: + docs_with_scores = list( + zip( + documents, + scores.tolist() if not isinstance(scores, list) else scores, + ) ) - final_results.append(doc) - return final_results + if self.r_score: + docs_with_scores = [ + (d, s) for d, s in docs_with_scores if s >= self.r_score + ] + + result = sorted(docs_with_scores, key=operator.itemgetter(1), reverse=True) + final_results = [] + for doc, doc_score in result[: self.top_n]: + metadata = doc.metadata + metadata["score"] = doc_score + doc = Document( + page_content=doc.page_content, + metadata=metadata, + ) + final_results.append(doc) + return final_results + else: + log.warning( + "No valid scores found, check your reranking function. Returning original documents." + ) + return documents diff --git a/backend/open_webui/retrieval/vector/dbs/milvus.py b/backend/open_webui/retrieval/vector/dbs/milvus.py index 6e07c28016..059ea43cc0 100644 --- a/backend/open_webui/retrieval/vector/dbs/milvus.py +++ b/backend/open_webui/retrieval/vector/dbs/milvus.py @@ -1,5 +1,7 @@ from pymilvus import MilvusClient as Client from pymilvus import FieldSchema, DataType +from pymilvus import connections, Collection + import json import logging from typing import Optional @@ -188,6 +190,8 @@ class MilvusClient(VectorDBBase): return self._result_to_search_result(result) def query(self, collection_name: str, filter: dict, limit: Optional[int] = None): + connections.connect(uri=MILVUS_URI, token=MILVUS_TOKEN, db_name=MILVUS_DB) + # Construct the filter string for querying collection_name = collection_name.replace("-", "_") if not self.has_collection(collection_name): @@ -201,72 +205,36 @@ class MilvusClient(VectorDBBase): for key, value in filter.items() ] ) - max_limit = 16383 # The maximum number of records per request - all_results = [] - if limit is None: - # Milvus default limit for query if not specified is 16384, but docs mention iteration. - # Let's set a practical high number if "all" is intended, or handle true pagination. - # For now, if limit is None, we'll fetch in batches up to a very large number. - # This part could be refined based on expected use cases for "get all". - # For this function signature, None implies "as many as possible" up to Milvus limits. - limit = ( - 16384 * 10 - ) # A large number to signify fetching many, will be capped by actual data or max_limit per call. - log.info( - f"Limit not specified for query, fetching up to {limit} results in batches." - ) - # Initialize offset and remaining to handle pagination - offset = 0 - remaining = limit + collection = Collection(f"{self.collection_prefix}_{collection_name}") + collection.load() + all_results = [] try: log.info( f"Querying collection {self.collection_prefix}_{collection_name} with filter: '{filter_string}', limit: {limit}" ) - # Loop until there are no more items to fetch or the desired limit is reached - while remaining > 0: - current_fetch = min( - max_limit, remaining if isinstance(remaining, int) else max_limit - ) - log.debug( - f"Querying with offset: {offset}, current_fetch: {current_fetch}" - ) - results = self.client.query( - collection_name=f"{self.collection_prefix}_{collection_name}", - filter=filter_string, - output_fields=[ - "id", - "data", - "metadata", - ], # Explicitly list needed fields. Vector not usually needed in query. - limit=current_fetch, - offset=offset, - ) + iterator = collection.query_iterator( + filter=filter_string, + output_fields=[ + "id", + "data", + "metadata", + ], + limit=limit, # Pass the limit directly; None means no limit. + ) - if not results: - log.debug("No more results from query.") - break - - all_results.extend(results) - results_count = len(results) - log.debug(f"Fetched {results_count} results in this batch.") - - if isinstance(remaining, int): - remaining -= results_count - - offset += results_count - - # Break the loop if the results returned are less than the requested fetch count (means end of data) - if results_count < current_fetch: - log.debug( - "Fetched less than requested, assuming end of results for this query." - ) + while True: + result = iterator.next() + if not result: + iterator.close() break + all_results += result log.info(f"Total results from query: {len(all_results)}") return self._result_to_get_result([all_results]) + except Exception as e: log.exception( f"Error querying collection {self.collection_prefix}_{collection_name} with filter '{filter_string}' and limit {limit}: {e}" diff --git a/backend/open_webui/retrieval/vector/dbs/pgvector.py b/backend/open_webui/retrieval/vector/dbs/pgvector.py index 9deb61f5a3..d978f0c824 100644 --- a/backend/open_webui/retrieval/vector/dbs/pgvector.py +++ b/backend/open_webui/retrieval/vector/dbs/pgvector.py @@ -111,11 +111,35 @@ class PgvectorClient(VectorDBBase): try: # Ensure the pgvector extension is available - self.session.execute(text("CREATE EXTENSION IF NOT EXISTS vector;")) + # Use a conditional check to avoid permission issues on Azure PostgreSQL + self.session.execute( + text( + """ + DO $$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN + CREATE EXTENSION IF NOT EXISTS vector; + END IF; + END $$; + """ + ) + ) if PGVECTOR_PGCRYPTO: # Ensure the pgcrypto extension is available for encryption - self.session.execute(text("CREATE EXTENSION IF NOT EXISTS pgcrypto;")) + # Use a conditional check to avoid permission issues on Azure PostgreSQL + self.session.execute( + text( + """ + DO $$ + BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pgcrypto') THEN + CREATE EXTENSION IF NOT EXISTS pgcrypto; + END IF; + END $$; + """ + ) + ) if not PGVECTOR_PGCRYPTO_KEY: raise ValueError( diff --git a/backend/open_webui/retrieval/vector/dbs/s3vector.py b/backend/open_webui/retrieval/vector/dbs/s3vector.py index 8db1ecb778..2ac6911769 100644 --- a/backend/open_webui/retrieval/vector/dbs/s3vector.py +++ b/backend/open_webui/retrieval/vector/dbs/s3vector.py @@ -197,13 +197,23 @@ class S3VectorClient(VectorDBBase): "metadata": metadata, } ) - # Insert vectors - self.client.put_vectors( - vectorBucketName=self.bucket_name, - indexName=collection_name, - vectors=vectors, + + # Insert vectors in batches of 500 (S3 Vector API limit) + batch_size = 500 + for i in range(0, len(vectors), batch_size): + batch = vectors[i : i + batch_size] + self.client.put_vectors( + vectorBucketName=self.bucket_name, + indexName=collection_name, + vectors=batch, + ) + log.info( + f"Inserted batch {i//batch_size + 1}: {len(batch)} vectors into index '{collection_name}'." + ) + + log.info( + f"Completed insertion of {len(vectors)} vectors into index '{collection_name}'." ) - log.info(f"Inserted {len(vectors)} vectors into index '{collection_name}'.") except Exception as e: log.error(f"Error inserting vectors: {e}") raise @@ -258,16 +268,29 @@ class S3VectorClient(VectorDBBase): "metadata": metadata, } ) - # Upsert vectors (using put_vectors for upsert semantics) + + # Upsert vectors in batches of 500 (S3 Vector API limit) + batch_size = 500 + for i in range(0, len(vectors), batch_size): + batch = vectors[i : i + batch_size] + if i == 0: # Log sample info for first batch only + log.info( + f"Upserting batch 1: {len(batch)} vectors. First vector sample: key={batch[0]['key']}, data_type={type(batch[0]['data']['float32'])}, data_len={len(batch[0]['data']['float32'])}" + ) + else: + log.info( + f"Upserting batch {i//batch_size + 1}: {len(batch)} vectors." + ) + + self.client.put_vectors( + vectorBucketName=self.bucket_name, + indexName=collection_name, + vectors=batch, + ) + log.info( - f"Upserting {len(vectors)} vectors. First vector sample: key={vectors[0]['key']}, data_type={type(vectors[0]['data']['float32'])}, data_len={len(vectors[0]['data']['float32'])}" + f"Completed upsert of {len(vectors)} vectors into index '{collection_name}'." ) - self.client.put_vectors( - vectorBucketName=self.bucket_name, - indexName=collection_name, - vectors=vectors, - ) - log.info(f"Upserted {len(vectors)} vectors into index '{collection_name}'.") except Exception as e: log.error(f"Error upserting vectors: {e}") raise diff --git a/backend/open_webui/retrieval/web/duckduckgo.py b/backend/open_webui/retrieval/web/duckduckgo.py index a32fc358ed..e4cf9d00ec 100644 --- a/backend/open_webui/retrieval/web/duckduckgo.py +++ b/backend/open_webui/retrieval/web/duckduckgo.py @@ -11,7 +11,10 @@ log.setLevel(SRC_LOG_LEVELS["RAG"]) def search_duckduckgo( - query: str, count: int, filter_list: Optional[list[str]] = None + query: str, + count: int, + filter_list: Optional[list[str]] = None, + concurrent_requests: Optional[int] = None, ) -> list[SearchResult]: """ Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects. @@ -25,6 +28,9 @@ def search_duckduckgo( # Use the DDGS context manager to create a DDGS object search_results = [] with DDGS() as ddgs: + if concurrent_requests: + ddgs.threads = concurrent_requests + # Use the ddgs.text() method to perform the search try: search_results = ddgs.text( diff --git a/backend/open_webui/retrieval/web/main.py b/backend/open_webui/retrieval/web/main.py index 28a749e7d2..dc1eafb331 100644 --- a/backend/open_webui/retrieval/web/main.py +++ b/backend/open_webui/retrieval/web/main.py @@ -11,7 +11,7 @@ def get_filtered_results(results, filter_list): return results filtered_results = [] for result in results: - url = result.get("url") or result.get("link", "") + url = result.get("url") or result.get("link", "") or result.get("href", "") if not validators.url(url): continue domain = urlparse(url).netloc diff --git a/backend/open_webui/routers/audio.py b/backend/open_webui/routers/audio.py index b1b715d44b..cc5711569d 100644 --- a/backend/open_webui/routers/audio.py +++ b/backend/open_webui/routers/audio.py @@ -550,6 +550,11 @@ def transcription_handler(request, file_path, metadata): metadata = metadata or {} + languages = [ + metadata.get("language", None) if WHISPER_LANGUAGE == "" else WHISPER_LANGUAGE, + None, # Always fallback to None in case transcription fails + ] + if request.app.state.config.STT_ENGINE == "": if request.app.state.faster_whisper_model is None: request.app.state.faster_whisper_model = set_faster_whisper_model( @@ -561,11 +566,7 @@ def transcription_handler(request, file_path, metadata): file_path, beam_size=5, vad_filter=request.app.state.config.WHISPER_VAD_FILTER, - language=( - metadata.get("language", None) - if WHISPER_LANGUAGE == "" - else WHISPER_LANGUAGE - ), + language=languages[0], ) log.info( "Detected language '%s' with probability %f" @@ -585,21 +586,26 @@ def transcription_handler(request, file_path, metadata): elif request.app.state.config.STT_ENGINE == "openai": r = None try: - r = requests.post( - url=f"{request.app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", - headers={ - "Authorization": f"Bearer {request.app.state.config.STT_OPENAI_API_KEY}" - }, - files={"file": (filename, open(file_path, "rb"))}, - data={ + for language in languages: + payload = { "model": request.app.state.config.STT_MODEL, - **( - {"language": metadata.get("language")} - if metadata.get("language") - else {} - ), - }, - ) + } + + if language: + payload["language"] = language + + r = requests.post( + url=f"{request.app.state.config.STT_OPENAI_API_BASE_URL}/audio/transcriptions", + headers={ + "Authorization": f"Bearer {request.app.state.config.STT_OPENAI_API_KEY}" + }, + files={"file": (filename, open(file_path, "rb"))}, + data=payload, + ) + + if r.status_code == 200: + # Successful transcription + break r.raise_for_status() data = r.json() @@ -641,18 +647,26 @@ def transcription_handler(request, file_path, metadata): "Content-Type": mime, } - # Add model if specified - params = {} - if request.app.state.config.STT_MODEL: - params["model"] = request.app.state.config.STT_MODEL + for language in languages: + params = {} + if request.app.state.config.STT_MODEL: + params["model"] = request.app.state.config.STT_MODEL + + if language: + params["language"] = language + + # Make request to Deepgram API + r = requests.post( + "https://api.deepgram.com/v1/listen?smart_format=true", + headers=headers, + params=params, + data=file_data, + ) + + if r.status_code == 200: + # Successful transcription + break - # Make request to Deepgram API - r = requests.post( - "https://api.deepgram.com/v1/listen?smart_format=true", - headers=headers, - params=params, - data=file_data, - ) r.raise_for_status() response_data = r.json() diff --git a/backend/open_webui/routers/auths.py b/backend/open_webui/routers/auths.py index e157e5527d..11254ec78c 100644 --- a/backend/open_webui/routers/auths.py +++ b/backend/open_webui/routers/auths.py @@ -15,10 +15,9 @@ from open_webui.models.auths import ( SigninResponse, SignupForm, UpdatePasswordForm, - UpdateProfileForm, UserResponse, ) -from open_webui.models.users import Users +from open_webui.models.users import Users, UpdateProfileForm from open_webui.models.groups import Groups from open_webui.constants import ERROR_MESSAGES, WEBHOOK_MESSAGES @@ -73,7 +72,13 @@ class SessionUserResponse(Token, UserResponse): permissions: Optional[dict] = None -@router.get("/", response_model=SessionUserResponse) +class SessionUserInfoResponse(SessionUserResponse): + bio: Optional[str] = None + gender: Optional[str] = None + date_of_birth: Optional[datetime.date] = None + + +@router.get("/", response_model=SessionUserInfoResponse) async def get_session_user( request: Request, response: Response, user=Depends(get_current_user) ): @@ -121,6 +126,9 @@ async def get_session_user( "name": user.name, "role": user.role, "profile_image_url": user.profile_image_url, + "bio": user.bio, + "gender": user.gender, + "date_of_birth": user.date_of_birth, "permissions": user_permissions, } @@ -137,7 +145,7 @@ async def update_profile( if session_user: user = Users.update_user_by_id( session_user.id, - {"profile_image_url": form_data.profile_image_url, "name": form_data.name}, + form_data.model_dump(), ) if user: return user @@ -625,7 +633,7 @@ async def signup(request: Request, response: Response, form_data: SignupForm): ) if request.app.state.config.WEBHOOK_URL: - post_webhook( + await post_webhook( request.app.state.WEBUI_NAME, request.app.state.config.WEBHOOK_URL, WEBHOOK_MESSAGES.USER_SIGNUP(user.name), diff --git a/backend/open_webui/routers/channels.py b/backend/open_webui/routers/channels.py index e4390e23f6..cf3603c6ff 100644 --- a/backend/open_webui/routers/channels.py +++ b/backend/open_webui/routers/channels.py @@ -209,7 +209,7 @@ async def send_notification(name, webui_url, channel, message, active_user_ids): ) if webhook_url: - post_webhook( + await post_webhook( name, webhook_url, f"#{channel.name} - {webui_url}/channels/{channel.id}\n\n{message.content}", diff --git a/backend/open_webui/routers/chats.py b/backend/open_webui/routers/chats.py index ba16b506f7..6f853ab266 100644 --- a/backend/open_webui/routers/chats.py +++ b/backend/open_webui/routers/chats.py @@ -36,7 +36,7 @@ router = APIRouter() @router.get("/", response_model=list[ChatTitleIdResponse]) @router.get("/list", response_model=list[ChatTitleIdResponse]) -async def get_session_user_chat_list( +def get_session_user_chat_list( user=Depends(get_verified_user), page: Optional[int] = None ): try: diff --git a/backend/open_webui/routers/configs.py b/backend/open_webui/routers/configs.py index c8badfa112..8ce4e0d247 100644 --- a/backend/open_webui/routers/configs.py +++ b/backend/open_webui/routers/configs.py @@ -9,8 +9,8 @@ from open_webui.config import BannerModel from open_webui.utils.tools import ( get_tool_server_data, - get_tool_servers_data, get_tool_server_url, + set_tool_servers, ) @@ -114,10 +114,7 @@ async def set_tool_servers_config( request.app.state.config.TOOL_SERVER_CONNECTIONS = [ connection.model_dump() for connection in form_data.TOOL_SERVER_CONNECTIONS ] - - request.app.state.TOOL_SERVERS = await get_tool_servers_data( - request.app.state.config.TOOL_SERVER_CONNECTIONS - ) + await set_tool_servers(request) return { "TOOL_SERVER_CONNECTIONS": request.app.state.config.TOOL_SERVER_CONNECTIONS, diff --git a/backend/open_webui/routers/files.py b/backend/open_webui/routers/files.py index 0a2b4ac97f..67b3de193e 100644 --- a/backend/open_webui/routers/files.py +++ b/backend/open_webui/routers/files.py @@ -6,8 +6,10 @@ from fnmatch import fnmatch from pathlib import Path from typing import Optional from urllib.parse import quote +import asyncio from fastapi import ( + BackgroundTasks, APIRouter, Depends, File, @@ -18,6 +20,7 @@ from fastapi import ( status, Query, ) + from fastapi.responses import FileResponse, StreamingResponse from open_webui.constants import ERROR_MESSAGES from open_webui.env import SRC_LOG_LEVELS @@ -42,7 +45,6 @@ from pydantic import BaseModel log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["MODELS"]) - router = APIRouter() @@ -83,13 +85,64 @@ def has_access_to_file( ############################ +def process_uploaded_file(request, file, file_path, file_item, file_metadata, user): + try: + if file.content_type: + stt_supported_content_types = getattr( + request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", [] + ) + + if any( + fnmatch(file.content_type, content_type) + for content_type in ( + stt_supported_content_types + if stt_supported_content_types + and any(t.strip() for t in stt_supported_content_types) + else ["audio/*", "video/webm"] + ) + ): + file_path = Storage.get_file(file_path) + result = transcribe(request, file_path, file_metadata) + + process_file( + request, + ProcessFileForm( + file_id=file_item.id, content=result.get("text", "") + ), + user=user, + ) + elif (not file.content_type.startswith(("image/", "video/"))) or ( + request.app.state.config.CONTENT_EXTRACTION_ENGINE == "external" + ): + process_file(request, ProcessFileForm(file_id=file_item.id), user=user) + else: + log.info( + f"File type {file.content_type} is not provided, but trying to process anyway" + ) + process_file(request, ProcessFileForm(file_id=file_item.id), user=user) + + Files.update_file_data_by_id( + file_item.id, + {"status": "completed"}, + ) + except Exception as e: + log.error(f"Error processing file: {file_item.id}") + Files.update_file_data_by_id( + file_item.id, + { + "status": "failed", + "error": str(e.detail) if hasattr(e, "detail") else str(e), + }, + ) + + @router.post("/", response_model=FileModelResponse) def upload_file( request: Request, + background_tasks: BackgroundTasks, file: UploadFile = File(...), metadata: Optional[dict | str] = Form(None), process: bool = Query(True), - internal: bool = False, user=Depends(get_verified_user), ): log.info(f"file.content_type: {file.content_type}") @@ -112,7 +165,7 @@ def upload_file( # Remove the leading dot from the file extension file_extension = file_extension[1:] if file_extension else "" - if (not internal) and request.app.state.config.ALLOWED_FILE_EXTENSIONS: + if process and request.app.state.config.ALLOWED_FILE_EXTENSIONS: request.app.state.config.ALLOWED_FILE_EXTENSIONS = [ ext for ext in request.app.state.config.ALLOWED_FILE_EXTENSIONS if ext ] @@ -129,13 +182,16 @@ def upload_file( id = str(uuid.uuid4()) name = filename filename = f"{id}_{filename}" - tags = { - "OpenWebUI-User-Email": user.email, - "OpenWebUI-User-Id": user.id, - "OpenWebUI-User-Name": user.name, - "OpenWebUI-File-Id": id, - } - contents, file_path = Storage.upload_file(file.file, filename, tags) + contents, file_path = Storage.upload_file( + file.file, + filename, + { + "OpenWebUI-User-Email": user.email, + "OpenWebUI-User-Id": user.id, + "OpenWebUI-User-Name": user.name, + "OpenWebUI-File-Id": id, + }, + ) file_item = Files.insert_new_file( user.id, @@ -144,6 +200,9 @@ def upload_file( "id": id, "filename": name, "path": file_path, + "data": { + **({"status": "pending"} if process else {}), + }, "meta": { "name": name, "content_type": file.content_type, @@ -153,58 +212,26 @@ def upload_file( } ), ) + if process: - try: - if file.content_type: - stt_supported_content_types = getattr( - request.app.state.config, "STT_SUPPORTED_CONTENT_TYPES", [] - ) - - if any( - fnmatch(file.content_type, content_type) - for content_type in ( - stt_supported_content_types - if stt_supported_content_types - and any(t.strip() for t in stt_supported_content_types) - else ["audio/*", "video/webm"] - ) - ): - file_path = Storage.get_file(file_path) - result = transcribe(request, file_path, file_metadata) - - process_file( - request, - ProcessFileForm(file_id=id, content=result.get("text", "")), - user=user, - ) - elif (not file.content_type.startswith(("image/", "video/"))) or ( - request.app.state.config.CONTENT_EXTRACTION_ENGINE == "external" - ): - process_file(request, ProcessFileForm(file_id=id), user=user) - else: - log.info( - f"File type {file.content_type} is not provided, but trying to process anyway" - ) - process_file(request, ProcessFileForm(file_id=id), user=user) - - file_item = Files.get_file_by_id(id=id) - except Exception as e: - log.exception(e) - log.error(f"Error processing file: {file_item.id}") - file_item = FileModelResponse( - **{ - **file_item.model_dump(), - "error": str(e.detail) if hasattr(e, "detail") else str(e), - } - ) - - if file_item: - return file_item - else: - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=ERROR_MESSAGES.DEFAULT("Error uploading file"), + background_tasks.add_task( + process_uploaded_file, + request, + file, + file_path, + file_item, + file_metadata, + user, ) + return {"status": True, **file_item.model_dump()} + else: + if file_item: + return file_item + else: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ERROR_MESSAGES.DEFAULT("Error uploading file"), + ) except Exception as e: log.exception(e) @@ -331,6 +358,60 @@ async def get_file_by_id(id: str, user=Depends(get_verified_user)): ) +@router.get("/{id}/process/status") +async def get_file_process_status( + id: str, stream: bool = Query(False), user=Depends(get_verified_user) +): + file = Files.get_file_by_id(id) + + if not file: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + if ( + file.user_id == user.id + or user.role == "admin" + or has_access_to_file(id, "read", user) + ): + if stream: + MAX_FILE_PROCESSING_DURATION = 3600 * 2 + + async def event_stream(file_item): + for _ in range(MAX_FILE_PROCESSING_DURATION): + file_item = Files.get_file_by_id(file_item.id) + if file_item: + data = file_item.model_dump().get("data", {}) + status = data.get("status") + + if status: + event = {"status": status} + if status == "failed": + event["error"] = data.get("error") + + yield f"data: {json.dumps(event)}\n\n" + if status in ("completed", "failed"): + break + else: + # Legacy + break + + await asyncio.sleep(0.5) + + return StreamingResponse( + event_stream(file), + media_type="text/event-stream", + ) + else: + return {"status": file.data.get("status", "pending")} + else: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=ERROR_MESSAGES.NOT_FOUND, + ) + + ############################ # Get File Data Content By Id ############################ diff --git a/backend/open_webui/routers/images.py b/backend/open_webui/routers/images.py index 5cd07e3d54..b970bed664 100644 --- a/backend/open_webui/routers/images.py +++ b/backend/open_webui/routers/images.py @@ -469,7 +469,9 @@ def upload_image(request, image_data, content_type, metadata, user): "content-type": content_type, }, ) - file_item = upload_file(request, file, metadata=metadata, internal=True, user=user) + file_item = upload_file( + request, file=file, metadata=metadata, process=False, user=user + ) url = request.app.url_path_for("get_file_content_by_id", id=file_item.id) return url @@ -483,11 +485,15 @@ async def image_generations( # if IMAGE_SIZE = 'auto', default WidthxHeight to the 512x512 default # This is only relevant when the user has set IMAGE_SIZE to 'auto' with an # image model other than gpt-image-1, which is warned about on settings save - width, height = ( - tuple(map(int, request.app.state.config.IMAGE_SIZE.split("x"))) - if "x" in request.app.state.config.IMAGE_SIZE - else (512, 512) - ) + + size = "512x512" + if "x" in request.app.state.config.IMAGE_SIZE: + size = request.app.state.config.IMAGE_SIZE + + if "x" in form_data.size: + size = form_data.size + + width, height = tuple(map(int, size.split("x"))) r = None try: diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 69198816b3..e9ba9c39ad 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -25,7 +25,7 @@ from open_webui.utils.access_control import has_access, has_permission from open_webui.env import SRC_LOG_LEVELS -from open_webui.config import ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS +from open_webui.config import BYPASS_ADMIN_ACCESS_CONTROL from open_webui.models.models import Models, ModelForm @@ -43,7 +43,7 @@ router = APIRouter() async def get_knowledge(user=Depends(get_verified_user)): knowledge_bases = [] - if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: + if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: knowledge_bases = Knowledges.get_knowledge_bases() else: knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "read") @@ -91,7 +91,7 @@ async def get_knowledge(user=Depends(get_verified_user)): async def get_knowledge_list(user=Depends(get_verified_user)): knowledge_bases = [] - if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: + if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: knowledge_bases = Knowledges.get_knowledge_bases() else: knowledge_bases = Knowledges.get_knowledge_bases_by_user_id(user.id, "write") diff --git a/backend/open_webui/routers/models.py b/backend/open_webui/routers/models.py index 3d5f6ccf96..a4d4e3668e 100644 --- a/backend/open_webui/routers/models.py +++ b/backend/open_webui/routers/models.py @@ -15,7 +15,7 @@ from fastapi import APIRouter, Depends, HTTPException, Request, status from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.access_control import has_access, has_permission -from open_webui.config import ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS +from open_webui.config import BYPASS_ADMIN_ACCESS_CONTROL router = APIRouter() @@ -27,7 +27,7 @@ router = APIRouter() @router.get("/", response_model=list[ModelUserResponse]) async def get_models(id: Optional[str] = None, user=Depends(get_verified_user)): - if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: + if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: return Models.get_models() else: return Models.get_models_by_user_id(user.id) @@ -117,7 +117,7 @@ async def get_model_by_id(id: str, user=Depends(get_verified_user)): model = Models.get_model_by_id(id) if model: if ( - user.role == "admin" + (user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL) or model.user_id == user.id or has_access(user.id, "read", model.access_control) ): diff --git a/backend/open_webui/routers/ollama.py b/backend/open_webui/routers/ollama.py index 5894a72c35..11bf5b914f 100644 --- a/backend/open_webui/routers/ollama.py +++ b/backend/open_webui/routers/ollama.py @@ -47,7 +47,7 @@ from open_webui.utils.misc import ( from open_webui.utils.payload import ( apply_model_params_to_body_ollama, apply_model_params_to_body_openai, - apply_model_system_prompt_to_body, + apply_system_prompt_to_body, ) from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.access_control import has_access @@ -415,15 +415,15 @@ async def get_all_models(request: Request, user: UserModel = None): try: loaded_models = await get_ollama_loaded_models(request, user=user) expires_map = { - m["name"]: m["expires_at"] + m["model"]: m["expires_at"] for m in loaded_models["models"] if "expires_at" in m } for m in models["models"]: - if m["name"] in expires_map: + if m["model"] in expires_map: # Parse ISO8601 datetime with offset, get unix timestamp as int - dt = datetime.fromisoformat(expires_map[m["name"]]) + dt = datetime.fromisoformat(expires_map[m["model"]]) m["expires_at"] = int(dt.timestamp()) except Exception as e: log.debug(f"Failed to get loaded models: {e}") @@ -1330,7 +1330,7 @@ async def generate_chat_completion( system = params.pop("system", None) payload = apply_model_params_to_body_ollama(params, payload) - payload = apply_model_system_prompt_to_body(system, payload, metadata, user) + payload = apply_system_prompt_to_body(system, payload, metadata, user) # Check if user has access to the model if not bypass_filter and user.role == "user": @@ -1519,7 +1519,7 @@ async def generate_openai_chat_completion( system = params.pop("system", None) payload = apply_model_params_to_body_openai(params, payload) - payload = apply_model_system_prompt_to_body(system, payload, metadata, user) + payload = apply_system_prompt_to_body(system, payload, metadata, user) # Check if user has access to the model if user.role == "user": diff --git a/backend/open_webui/routers/openai.py b/backend/open_webui/routers/openai.py index c8a3aebdd0..7ba0c5f68a 100644 --- a/backend/open_webui/routers/openai.py +++ b/backend/open_webui/routers/openai.py @@ -39,7 +39,7 @@ from open_webui.env import SRC_LOG_LEVELS from open_webui.utils.payload import ( apply_model_params_to_body_openai, - apply_model_system_prompt_to_body, + apply_system_prompt_to_body, ) from open_webui.utils.misc import ( convert_logit_bias_input_to_json, @@ -361,9 +361,18 @@ async def get_all_models_responses(request: Request, user: UserModel) -> list: prefix_id = api_config.get("prefix_id", None) tags = api_config.get("tags", []) - for model in ( + model_list = ( response if isinstance(response, list) else response.get("data", []) - ): + ) + if not isinstance(model_list, list): + # Catch non-list responses + model_list = [] + + for model in model_list: + # Remove name key if its value is None #16689 + if "name" in model and model["name"] is None: + del model["name"] + if prefix_id: model["id"] = ( f"{prefix_id}.{model.get('id', model.get('name', ''))}" @@ -693,6 +702,10 @@ def get_azure_allowed_params(api_version: str) -> set[str]: return allowed_params +def is_openai_reasoning_model(model: str) -> bool: + return model.lower().startswith(("o1", "o3", "o4", "gpt-5")) + + def convert_to_azure_payload(url, payload: dict, api_version: str): model = payload.get("model", "") @@ -700,7 +713,7 @@ def convert_to_azure_payload(url, payload: dict, api_version: str): allowed_params = get_azure_allowed_params(api_version) # Special handling for o-series models - if model.startswith("o") and model.endswith("-mini"): + if is_openai_reasoning_model(model): # Convert max_tokens to max_completion_tokens for o-series models if "max_tokens" in payload: payload["max_completion_tokens"] = payload["max_tokens"] @@ -750,7 +763,7 @@ async def generate_chat_completion( system = params.pop("system", None) payload = apply_model_params_to_body_openai(params, payload) - payload = apply_model_system_prompt_to_body(system, payload, metadata, user) + payload = apply_system_prompt_to_body(system, payload, metadata, user) # Check if user has access to the model if not bypass_filter and user.role == "user": @@ -806,10 +819,7 @@ async def generate_chat_completion( key = request.app.state.config.OPENAI_API_KEYS[idx] # Check if model is a reasoning model that needs special handling - is_reasoning_model = ( - payload["model"].lower().startswith(("o1", "o3", "o4", "gpt-5")) - ) - if is_reasoning_model: + if is_openai_reasoning_model(payload["model"]): payload = openai_reasoning_model_handler(payload) elif "api.openai.com" not in url: # Remove "max_completion_tokens" from the payload for backward compatibility diff --git a/backend/open_webui/routers/prompts.py b/backend/open_webui/routers/prompts.py index afc00951fd..5981f99f69 100644 --- a/backend/open_webui/routers/prompts.py +++ b/backend/open_webui/routers/prompts.py @@ -10,7 +10,7 @@ from open_webui.models.prompts import ( from open_webui.constants import ERROR_MESSAGES from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.access_control import has_access, has_permission -from open_webui.config import ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS +from open_webui.config import BYPASS_ADMIN_ACCESS_CONTROL router = APIRouter() @@ -21,7 +21,7 @@ router = APIRouter() @router.get("/", response_model=list[PromptModel]) async def get_prompts(user=Depends(get_verified_user)): - if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: + if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: prompts = Prompts.get_prompts() else: prompts = Prompts.get_prompts_by_user_id(user.id, "read") @@ -31,7 +31,7 @@ async def get_prompts(user=Depends(get_verified_user)): @router.get("/list", response_model=list[PromptUserResponse]) async def get_prompt_list(user=Depends(get_verified_user)): - if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: + if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: prompts = Prompts.get_prompts() else: prompts = Prompts.get_prompts_by_user_id(user.id, "write") diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index c02b48e487..738f2d05fc 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -5,7 +5,6 @@ import os import shutil import asyncio - import uuid from datetime import datetime from pathlib import Path @@ -281,6 +280,18 @@ async def update_embedding_config( log.info( f"Updating embedding model: {request.app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}" ) + if request.app.state.config.RAG_EMBEDDING_ENGINE == "": + # unloads current internal embedding model and clears VRAM cache + request.app.state.ef = None + request.app.state.EMBEDDING_FUNCTION = None + import gc + + gc.collect() + if DEVICE_TYPE == "cuda": + import torch + + if torch.cuda.is_available(): + torch.cuda.empty_cache() try: request.app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine request.app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model @@ -449,6 +460,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_LOADER_CONCURRENT_REQUESTS": request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, @@ -504,6 +516,7 @@ class WebConfig(BaseModel): WEB_SEARCH_TRUST_ENV: Optional[bool] = None WEB_SEARCH_RESULT_COUNT: Optional[int] = None WEB_SEARCH_CONCURRENT_REQUESTS: Optional[int] = None + WEB_LOADER_CONCURRENT_REQUESTS: Optional[int] = None WEB_SEARCH_DOMAIN_FILTER_LIST: Optional[List[str]] = [] BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: Optional[bool] = None BYPASS_WEB_SEARCH_WEB_LOADER: Optional[bool] = None @@ -651,9 +664,6 @@ async def update_rag_config( if form_data.ENABLE_RAG_HYBRID_SEARCH is not None else request.app.state.config.ENABLE_RAG_HYBRID_SEARCH ) - # Free up memory if hybrid search is disabled - if not request.app.state.config.ENABLE_RAG_HYBRID_SEARCH: - request.app.state.rf = None request.app.state.config.TOP_K_RERANKER = ( form_data.TOP_K_RERANKER @@ -807,6 +817,18 @@ async def update_rag_config( ) # Reranking settings + if request.app.state.config.RAG_RERANKING_ENGINE == "": + # Unloading the internal reranker and clear VRAM memory + request.app.state.rf = None + request.app.state.RERANKING_FUNCTION = None + import gc + + gc.collect() + if DEVICE_TYPE == "cuda": + import torch + + if torch.cuda.is_available(): + torch.cuda.empty_cache() request.app.state.config.RAG_RERANKING_ENGINE = ( form_data.RAG_RERANKING_ENGINE if form_data.RAG_RERANKING_ENGINE is not None @@ -836,19 +858,23 @@ async def update_rag_config( ) try: - request.app.state.rf = get_rf( - request.app.state.config.RAG_RERANKING_ENGINE, - request.app.state.config.RAG_RERANKING_MODEL, - request.app.state.config.RAG_EXTERNAL_RERANKER_URL, - request.app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, - True, - ) + if ( + request.app.state.config.ENABLE_RAG_HYBRID_SEARCH + and not request.app.state.config.BYPASS_EMBEDDING_AND_RETRIEVAL + ): + request.app.state.rf = get_rf( + request.app.state.config.RAG_RERANKING_ENGINE, + request.app.state.config.RAG_RERANKING_MODEL, + request.app.state.config.RAG_EXTERNAL_RERANKER_URL, + request.app.state.config.RAG_EXTERNAL_RERANKER_API_KEY, + True, + ) - request.app.state.RERANKING_FUNCTION = get_reranking_function( - request.app.state.config.RAG_RERANKING_ENGINE, - request.app.state.config.RAG_RERANKING_MODEL, - request.app.state.rf, - ) + request.app.state.RERANKING_FUNCTION = get_reranking_function( + request.app.state.config.RAG_RERANKING_ENGINE, + request.app.state.config.RAG_RERANKING_MODEL, + request.app.state.rf, + ) except Exception as e: log.error(f"Error loading reranking model: {e}") request.app.state.config.ENABLE_RAG_HYBRID_SEARCH = False @@ -916,6 +942,9 @@ async def update_rag_config( request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS = ( form_data.web.WEB_SEARCH_CONCURRENT_REQUESTS ) + request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS = ( + form_data.web.WEB_LOADER_CONCURRENT_REQUESTS + ) request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST = ( form_data.web.WEB_SEARCH_DOMAIN_FILTER_LIST ) @@ -1067,6 +1096,7 @@ async def update_rag_config( "WEB_SEARCH_TRUST_ENV": request.app.state.config.WEB_SEARCH_TRUST_ENV, "WEB_SEARCH_RESULT_COUNT": request.app.state.config.WEB_SEARCH_RESULT_COUNT, "WEB_SEARCH_CONCURRENT_REQUESTS": request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + "WEB_LOADER_CONCURRENT_REQUESTS": request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, "WEB_SEARCH_DOMAIN_FILTER_LIST": request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, "BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL": request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL, "BYPASS_WEB_SEARCH_WEB_LOADER": request.app.state.config.BYPASS_WEB_SEARCH_WEB_LOADER, @@ -1470,7 +1500,7 @@ def process_file( log.debug(f"text_content: {text_content}") Files.update_file_data_by_id( file.id, - {"content": text_content}, + {"status": "completed", "content": text_content}, ) hash = calculate_sha256_string(text_content) @@ -1624,7 +1654,7 @@ def process_web( loader = get_web_loader( form_data.url, verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, ) docs = loader.load() content = " ".join([doc.page_content for doc in docs]) @@ -1798,6 +1828,7 @@ def search_web(request: Request, engine: str, query: str) -> list[SearchResult]: query, request.app.state.config.WEB_SEARCH_RESULT_COUNT, request.app.state.config.WEB_SEARCH_DOMAIN_FILTER_LIST, + concurrent_requests=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, ) elif engine == "tavily": if request.app.state.config.TAVILY_API_KEY: @@ -1971,7 +2002,7 @@ async def process_web_search( loader = get_web_loader( urls, verify_ssl=request.app.state.config.ENABLE_WEB_LOADER_SSL_VERIFICATION, - requests_per_second=request.app.state.config.WEB_SEARCH_CONCURRENT_REQUESTS, + requests_per_second=request.app.state.config.WEB_LOADER_CONCURRENT_REQUESTS, trust_env=request.app.state.config.WEB_SEARCH_TRUST_ENV, ) docs = await loader.aload() diff --git a/backend/open_webui/routers/tasks.py b/backend/open_webui/routers/tasks.py index 2dec218d92..e49602094f 100644 --- a/backend/open_webui/routers/tasks.py +++ b/backend/open_webui/routers/tasks.py @@ -198,14 +198,7 @@ async def generate_title( else: template = DEFAULT_TITLE_GENERATION_PROMPT_TEMPLATE - content = title_generation_template( - template, - form_data["messages"], - { - "name": user.name, - "location": user.info.get("location") if user.info else None, - }, - ) + content = title_generation_template(template, form_data["messages"], user) max_tokens = ( models[task_model_id].get("info", {}).get("params", {}).get("max_tokens", 1000) @@ -289,14 +282,7 @@ async def generate_follow_ups( else: template = DEFAULT_FOLLOW_UP_GENERATION_PROMPT_TEMPLATE - content = follow_up_generation_template( - template, - form_data["messages"], - { - "name": user.name, - "location": user.info.get("location") if user.info else None, - }, - ) + content = follow_up_generation_template(template, form_data["messages"], user) payload = { "model": task_model_id, @@ -369,9 +355,7 @@ async def generate_chat_tags( else: template = DEFAULT_TAGS_GENERATION_PROMPT_TEMPLATE - content = tags_generation_template( - template, form_data["messages"], {"name": user.name} - ) + content = tags_generation_template(template, form_data["messages"], user) payload = { "model": task_model_id, @@ -437,13 +421,7 @@ async def generate_image_prompt( else: template = DEFAULT_IMAGE_PROMPT_GENERATION_PROMPT_TEMPLATE - content = image_prompt_generation_template( - template, - form_data["messages"], - user={ - "name": user.name, - }, - ) + content = image_prompt_generation_template(template, form_data["messages"], user) payload = { "model": task_model_id, @@ -524,9 +502,7 @@ async def generate_queries( else: template = DEFAULT_QUERY_GENERATION_PROMPT_TEMPLATE - content = query_generation_template( - template, form_data["messages"], {"name": user.name} - ) + content = query_generation_template(template, form_data["messages"], user) payload = { "model": task_model_id, @@ -611,9 +587,7 @@ async def generate_autocompletion( else: template = DEFAULT_AUTOCOMPLETE_GENERATION_PROMPT_TEMPLATE - content = autocomplete_generation_template( - template, prompt, messages, type, {"name": user.name} - ) + content = autocomplete_generation_template(template, prompt, messages, type, user) payload = { "model": task_model_id, @@ -675,14 +649,7 @@ async def generate_emoji( template = DEFAULT_EMOJI_GENERATION_PROMPT_TEMPLATE - content = emoji_generation_template( - template, - form_data["prompt"], - { - "name": user.name, - "location": user.info.get("location") if user.info else None, - }, - ) + content = emoji_generation_template(template, form_data["prompt"], user) payload = { "model": task_model_id, diff --git a/backend/open_webui/routers/tools.py b/backend/open_webui/routers/tools.py index 3c3e06a985..c017233765 100644 --- a/backend/open_webui/routers/tools.py +++ b/backend/open_webui/routers/tools.py @@ -19,10 +19,10 @@ from open_webui.utils.plugin import load_tool_module_by_id, replace_imports from open_webui.utils.tools import get_tool_specs from open_webui.utils.auth import get_admin_user, get_verified_user from open_webui.utils.access_control import has_access, has_permission -from open_webui.utils.tools import get_tool_servers_data +from open_webui.utils.tools import get_tool_servers from open_webui.env import SRC_LOG_LEVELS -from open_webui.config import CACHE_DIR, ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS +from open_webui.config import CACHE_DIR, BYPASS_ADMIN_ACCESS_CONTROL from open_webui.constants import ERROR_MESSAGES @@ -32,6 +32,7 @@ log.setLevel(SRC_LOG_LEVELS["MAIN"]) router = APIRouter() + ############################ # GetTools ############################ @@ -39,23 +40,14 @@ router = APIRouter() @router.get("/", response_model=list[ToolUserResponse]) async def get_tools(request: Request, user=Depends(get_verified_user)): - - if not request.app.state.TOOL_SERVERS: - # If the tool servers are not set, we need to set them - # This is done only once when the server starts - # This is done to avoid loading the tool servers every time - - request.app.state.TOOL_SERVERS = await get_tool_servers_data( - request.app.state.config.TOOL_SERVER_CONNECTIONS - ) - tools = Tools.get_tools() - for server in request.app.state.TOOL_SERVERS: + + for server in await get_tool_servers(request): tools.append( ToolUserResponse( **{ - "id": f"server:{server['idx']}", - "user_id": f"server:{server['idx']}", + "id": f"server:{server.get('id')}", + "user_id": f"server:{server.get('id')}", "name": server.get("openapi", {}) .get("info", {}) .get("title", "Tool Server"), @@ -65,7 +57,7 @@ async def get_tools(request: Request, user=Depends(get_verified_user)): .get("description", ""), }, "access_control": request.app.state.config.TOOL_SERVER_CONNECTIONS[ - server["idx"] + server.get("idx", 0) ] .get("config", {}) .get("access_control", None), @@ -75,7 +67,7 @@ async def get_tools(request: Request, user=Depends(get_verified_user)): ) ) - if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: + if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: # Admin can see all tools return tools else: @@ -95,7 +87,7 @@ async def get_tools(request: Request, user=Depends(get_verified_user)): @router.get("/list", response_model=list[ToolUserResponse]) async def get_tool_list(user=Depends(get_verified_user)): - if user.role == "admin" and ENABLE_ADMIN_WORKSPACE_CONTENT_ACCESS: + if user.role == "admin" and BYPASS_ADMIN_ACCESS_CONTROL: tools = Tools.get_tools() else: tools = Tools.get_tools_by_user_id(user.id, "write") diff --git a/backend/open_webui/storage/provider.py b/backend/open_webui/storage/provider.py index 41a92fafe9..4292e53827 100644 --- a/backend/open_webui/storage/provider.py +++ b/backend/open_webui/storage/provider.py @@ -112,6 +112,9 @@ class S3StorageProvider(StorageProvider): "use_accelerate_endpoint": S3_USE_ACCELERATE_ENDPOINT, "addressing_style": S3_ADDRESSING_STYLE, }, + # KIT change - see https://github.com/boto/boto3/issues/4400#issuecomment-2600742103∆ + request_checksum_calculation="when_required", + response_checksum_validation="when_required", ) # If access key and secret are provided, use them for authentication diff --git a/backend/open_webui/utils/access_control.py b/backend/open_webui/utils/access_control.py index c93574527f..c36d861ad6 100644 --- a/backend/open_webui/utils/access_control.py +++ b/backend/open_webui/utils/access_control.py @@ -60,8 +60,7 @@ def get_permissions( # Combine permissions from all user groups for group in user_groups: - group_permissions = group.permissions or {} - permissions = combine_permissions(permissions, group_permissions) + permissions = combine_permissions(permissions, group.permissions or {}) # Ensure all fields from default_permissions are present and filled in permissions = fill_missing_permissions(permissions, default_permissions) @@ -96,8 +95,7 @@ def has_permission( user_groups = Groups.get_groups_by_member_id(user_id) for group in user_groups: - group_permissions = group.permissions - if get_permission(group_permissions, permission_hierarchy): + if get_permission(group.permissions or {}, permission_hierarchy): return True # Check default permissions afterward if the group permissions don't allow it diff --git a/backend/open_webui/utils/middleware.py b/backend/open_webui/utils/middleware.py index b459f211b1..d9bcce9272 100644 --- a/backend/open_webui/utils/middleware.py +++ b/backend/open_webui/utils/middleware.py @@ -3,6 +3,7 @@ import logging import sys import os import base64 +import textwrap import asyncio from aiocache import cached @@ -73,6 +74,7 @@ from open_webui.utils.misc import ( add_or_update_user_message, get_last_user_message, get_last_assistant_message, + get_system_message, prepend_to_first_user_message_content, convert_logit_bias_input_to_json, ) @@ -83,14 +85,14 @@ from open_webui.utils.filter import ( process_filter_functions, ) from open_webui.utils.code_interpreter import execute_code_jupyter -from open_webui.utils.payload import apply_model_system_prompt_to_body +from open_webui.utils.payload import apply_system_prompt_to_body -from open_webui.tasks import create_task from open_webui.config import ( CACHE_DIR, DEFAULT_TOOLS_FUNCTION_CALLING_PROMPT_TEMPLATE, DEFAULT_CODE_INTERPRETER_PROMPT, + CODE_INTERPRETER_BLOCKED_MODULES, ) from open_webui.env import ( SRC_LOG_LEVELS, @@ -736,6 +738,12 @@ async def process_chat_payload(request, form_data, user, metadata, model): form_data = apply_params_to_form_data(form_data, model) log.debug(f"form_data: {form_data}") + system_message = get_system_message(form_data.get("messages", [])) + if system_message: + form_data = apply_system_prompt_to_body( + system_message.get("content"), form_data, metadata, user + ) + event_emitter = get_event_emitter(metadata) event_call = get_event_call(metadata) @@ -777,7 +785,7 @@ async def process_chat_payload(request, form_data, user, metadata, model): if folder and folder.data: if "system_prompt" in folder.data: - form_data = apply_model_system_prompt_to_body( + form_data = apply_system_prompt_to_body( folder.data["system_prompt"], form_data, metadata, user ) if "files" in folder.data: @@ -908,7 +916,7 @@ async def process_chat_payload(request, form_data, user, metadata, model): tools_dict = {} if tool_ids: - tools_dict = get_tools( + tools_dict = await get_tools( request, tool_ids, user, @@ -989,25 +997,24 @@ async def process_chat_payload(request, form_data, user, metadata, model): if prompt is None: raise Exception("No user message found") - if context_string == "": - if request.app.state.config.RELEVANCE_THRESHOLD == 0: - log.debug( - f"With a 0 relevancy threshold for RAG, the context cannot be empty" - ) - else: + if context_string != "": # Workaround for Ollama 2.0+ system prompt issue # TODO: replace with add_or_update_system_message if model.get("owned_by") == "ollama": form_data["messages"] = prepend_to_first_user_message_content( rag_template( - request.app.state.config.RAG_TEMPLATE, context_string, prompt + request.app.state.config.RAG_TEMPLATE, + context_string, + prompt, ), form_data["messages"], ) else: form_data["messages"] = add_or_update_system_message( rag_template( - request.app.state.config.RAG_TEMPLATE, context_string, prompt + request.app.state.config.RAG_TEMPLATE, + context_string, + prompt, ), form_data["messages"], ) @@ -1324,7 +1331,7 @@ async def process_chat_response( if not get_active_status_by_user_id(user.id): webhook_url = Users.get_user_webhook_url_by_id(user.id) if webhook_url: - post_webhook( + await post_webhook( request.app.state.WEBUI_NAME, webhook_url, f"{title} - {request.app.state.config.WEBUI_URL}/c/{metadata['chat_id']}\n\n{content}", @@ -1620,9 +1627,13 @@ async def process_chat_response( match = re.search(start_tag_pattern, content) if match: - attr_content = ( - match.group(1) if match.group(1) else "" - ) # Ensure it's not None + try: + attr_content = ( + match.group(1) if match.group(1) else "" + ) # Ensure it's not None + except: + attr_content = "" + attributes = extract_attributes( attr_content ) # Extract attributes safely @@ -1846,6 +1857,21 @@ async def process_chat_response( or 1 ), ) + last_delta_data = None + + async def flush_pending_delta_data(threshold: int = 0): + nonlocal delta_count + nonlocal last_delta_data + + if delta_count >= threshold and last_delta_data: + await event_emitter( + { + "type": "chat:completion", + "data": last_delta_data, + } + ) + delta_count = 0 + last_delta_data = None async for line in response.body_iterator: line = line.decode("utf-8") if isinstance(line, bytes) else line @@ -1886,6 +1912,12 @@ async def process_chat_response( "selectedModelId": model_id, }, ) + await event_emitter( + { + "type": "chat:completion", + "data": data, + } + ) else: choices = data.get("choices", []) if not choices: @@ -2096,14 +2128,9 @@ async def process_chat_response( if delta: delta_count += 1 + last_delta_data = data if delta_count >= delta_chunk_size: - await event_emitter( - { - "type": "chat:completion", - "data": data, - } - ) - delta_count = 0 + await flush_pending_delta_data(delta_chunk_size) else: await event_emitter( { @@ -2118,6 +2145,7 @@ async def process_chat_response( else: log.debug(f"Error: {e}") continue + await flush_pending_delta_data() if content_blocks: # Clean up the last text block @@ -2355,6 +2383,27 @@ async def process_chat_response( try: if content_blocks[-1]["attributes"].get("type") == "code": code = content_blocks[-1]["content"] + if CODE_INTERPRETER_BLOCKED_MODULES: + blocking_code = textwrap.dedent( + f""" + import builtins + + BLOCKED_MODULES = {CODE_INTERPRETER_BLOCKED_MODULES} + + _real_import = builtins.__import__ + def restricted_import(name, globals=None, locals=None, fromlist=(), level=0): + if name.split('.')[0] in BLOCKED_MODULES: + importer_name = globals.get('__name__') if globals else None + if importer_name == '__main__': + raise ImportError( + f"Direct import of module {{name}} is restricted." + ) + return _real_import(name, globals, locals, fromlist, level) + + builtins.__import__ = restricted_import + """ + ) + code = blocking_code + "\n" + code if ( request.app.state.config.CODE_INTERPRETER_ENGINE @@ -2520,7 +2569,7 @@ async def process_chat_response( if not get_active_status_by_user_id(user.id): webhook_url = Users.get_user_webhook_url_by_id(user.id) if webhook_url: - post_webhook( + await post_webhook( request.app.state.WEBUI_NAME, webhook_url, f"{title} - {request.app.state.config.WEBUI_URL}/c/{metadata['chat_id']}\n\n{content}", @@ -2557,13 +2606,7 @@ async def process_chat_response( if response.background is not None: await response.background() - # background_tasks.add_task(response_handler, response, events) - task_id, _ = await create_task( - request.app.state.redis, - response_handler(response, events), - id=metadata["chat_id"], - ) - return {"status": True, "task_id": task_id} + return await response_handler(response, events) else: # Fallback to the original response diff --git a/backend/open_webui/utils/misc.py b/backend/open_webui/utils/misc.py index 2a780209a7..82729c34e0 100644 --- a/backend/open_webui/utils/misc.py +++ b/backend/open_webui/utils/misc.py @@ -1,5 +1,6 @@ import hashlib import re +import threading import time import uuid import logging @@ -478,3 +479,46 @@ def convert_logit_bias_input_to_json(user_input): bias = 100 if bias > 100 else -100 if bias < -100 else bias logit_bias_json[token] = bias return json.dumps(logit_bias_json) + + +def freeze(value): + """ + Freeze a value to make it hashable. + """ + if isinstance(value, dict): + return frozenset((k, freeze(v)) for k, v in value.items()) + elif isinstance(value, list): + return tuple(freeze(v) for v in value) + return value + + +def throttle(interval: float = 10.0): + """ + Decorator to prevent a function from being called more than once within a specified duration. + If the function is called again within the duration, it returns None. To avoid returning + different types, the return type of the function should be Optional[T]. + + :param interval: Duration in seconds to wait before allowing the function to be called again. + """ + + def decorator(func): + last_calls = {} + lock = threading.Lock() + + def wrapper(*args, **kwargs): + if interval is None: + return func(*args, **kwargs) + + key = (args, freeze(kwargs)) + now = time.time() + if now - last_calls.get(key, 0) < interval: + return None + with lock: + if now - last_calls.get(key, 0) < interval: + return None + last_calls[key] = now + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/backend/open_webui/utils/oauth.py b/backend/open_webui/utils/oauth.py index 131c35800e..5ac189d48d 100644 --- a/backend/open_webui/utils/oauth.py +++ b/backend/open_webui/utils/oauth.py @@ -115,7 +115,13 @@ class OAuthManager: nested_claims = oauth_claim.split(".") for nested_claim in nested_claims: claim_data = claim_data.get(nested_claim, {}) - oauth_roles = claim_data if isinstance(claim_data, list) else [] + + oauth_roles = [] + + if isinstance(claim_data, list): + oauth_roles = claim_data + if isinstance(claim_data, str) or isinstance(claim_data, int): + oauth_roles = [str(claim_data)] log.debug(f"Oauth Roles claim: {oauth_claim}") log.debug(f"User roles from oauth: {oauth_roles}") @@ -355,7 +361,11 @@ class OAuthManager: log.warning(f"OAuth callback error: {e}") raise HTTPException(400, detail=ERROR_MESSAGES.INVALID_CRED) user_data: UserInfo = token.get("userinfo") - if not user_data or auth_manager_config.OAUTH_EMAIL_CLAIM not in user_data: + if ( + (not user_data) + or (auth_manager_config.OAUTH_EMAIL_CLAIM not in user_data) + or (auth_manager_config.OAUTH_USERNAME_CLAIM not in user_data) + ): user_data: UserInfo = await client.userinfo(token=token) if not user_data: log.warning(f"OAuth callback failed, user data is missing: {token}") @@ -498,7 +508,7 @@ class OAuthManager: ) if auth_manager_config.WEBHOOK_URL: - post_webhook( + await post_webhook( WEBUI_NAME, auth_manager_config.WEBHOOK_URL, WEBHOOK_MESSAGES.USER_SIGNUP(user.name), @@ -525,7 +535,15 @@ class OAuthManager: default_permissions=request.app.state.config.USER_PERMISSIONS, ) + redirect_base_url = str(request.app.state.config.WEBUI_URL or request.base_url) + if redirect_base_url.endswith("/"): + redirect_base_url = redirect_base_url[:-1] + redirect_url = f"{redirect_base_url}/auth" + + response = RedirectResponse(url=redirect_url, headers=response.headers) + # Set the cookie token + # Redirect back to the frontend with the JWT token response.set_cookie( key="token", value=jwt_token, @@ -543,11 +561,4 @@ class OAuthManager: samesite=WEBUI_AUTH_COOKIE_SAME_SITE, secure=WEBUI_AUTH_COOKIE_SECURE, ) - # Redirect back to the frontend with the JWT token - - redirect_base_url = str(request.app.state.config.WEBUI_URL or request.base_url) - if redirect_base_url.endswith("/"): - redirect_base_url = redirect_base_url[:-1] - redirect_url = f"{redirect_base_url}/auth" - - return RedirectResponse(url=redirect_url, headers=response.headers) + return response diff --git a/backend/open_webui/utils/payload.py b/backend/open_webui/utils/payload.py index 316e61c34c..811ba75c9f 100644 --- a/backend/open_webui/utils/payload.py +++ b/backend/open_webui/utils/payload.py @@ -9,7 +9,7 @@ import json # inplace function: form_data is modified -def apply_model_system_prompt_to_body( +def apply_system_prompt_to_body( system: Optional[str], form_data: dict, metadata: Optional[dict] = None, user=None ) -> dict: if not system: @@ -22,15 +22,7 @@ def apply_model_system_prompt_to_body( system = prompt_variables_template(system, variables) # Legacy (API Usage) - if user: - template_params = { - "user_name": user.name, - "user_location": user.info.get("location") if user.info else None, - } - else: - template_params = {} - - system = prompt_template(system, **template_params) + system = prompt_template(system, user) form_data["messages"] = add_or_update_system_message( system, form_data.get("messages", []) diff --git a/backend/open_webui/utils/plugin.py b/backend/open_webui/utils/plugin.py index 9d539f4840..8d9729bae2 100644 --- a/backend/open_webui/utils/plugin.py +++ b/backend/open_webui/utils/plugin.py @@ -260,7 +260,7 @@ def install_tool_and_function_dependencies(): all_dependencies += f"{dependencies}, " for tool in tool_list: # Only install requirements for admin tools - if tool.user.role == "admin": + if tool.user and tool.user.role == "admin": frontmatter = extract_frontmatter(replace_imports(tool.content)) if dependencies := frontmatter.get("requirements"): all_dependencies += f"{dependencies}, " diff --git a/backend/open_webui/utils/task.py b/backend/open_webui/utils/task.py index 42b44d5167..84c9308dc1 100644 --- a/backend/open_webui/utils/task.py +++ b/backend/open_webui/utils/task.py @@ -2,7 +2,7 @@ import logging import math import re from datetime import datetime -from typing import Optional +from typing import Optional, Any import uuid @@ -38,9 +38,46 @@ def prompt_variables_template(template: str, variables: dict[str, str]) -> str: return template -def prompt_template( - template: str, user_name: Optional[str] = None, user_location: Optional[str] = None -) -> str: +def prompt_template(template: str, user: Optional[Any] = None) -> str: + + USER_VARIABLES = {} + + if user: + if hasattr(user, "model_dump"): + user = user.model_dump() + + if isinstance(user, dict): + user_info = user.get("info", {}) or {} + birth_date = user.get("date_of_birth") + age = None + + if birth_date: + try: + # If birth_date is str, convert to datetime + if isinstance(birth_date, str): + birth_date = datetime.strptime(birth_date, "%Y-%m-%d") + + today = datetime.now() + age = ( + today.year + - birth_date.year + - ( + (today.month, today.day) + < (birth_date.month, birth_date.day) + ) + ) + except Exception as e: + pass + + USER_VARIABLES = { + "name": str(user.get("name")), + "location": str(user_info.get("location")), + "bio": str(user.get("bio")), + "gender": str(user.get("gender")), + "birth_date": str(birth_date), + "age": str(age), + } + # Get the current date current_date = datetime.now() @@ -56,19 +93,20 @@ def prompt_template( ) template = template.replace("{{CURRENT_WEEKDAY}}", formatted_weekday) - if user_name: - # Replace {{USER_NAME}} in the template with the user's name - template = template.replace("{{USER_NAME}}", user_name) - else: - # Replace {{USER_NAME}} in the template with "Unknown" - template = template.replace("{{USER_NAME}}", "Unknown") - - if user_location: - # Replace {{USER_LOCATION}} in the template with the current location - template = template.replace("{{USER_LOCATION}}", user_location) - else: - # Replace {{USER_LOCATION}} in the template with "Unknown" - template = template.replace("{{USER_LOCATION}}", "Unknown") + template = template.replace("{{USER_NAME}}", USER_VARIABLES.get("name", "Unknown")) + template = template.replace("{{USER_BIO}}", USER_VARIABLES.get("bio", "Unknown")) + template = template.replace( + "{{USER_GENDER}}", USER_VARIABLES.get("gender", "Unknown") + ) + template = template.replace( + "{{USER_BIRTH_DATE}}", USER_VARIABLES.get("birth_date", "Unknown") + ) + template = template.replace( + "{{USER_AGE}}", str(USER_VARIABLES.get("age", "Unknown")) + ) + template = template.replace( + "{{USER_LOCATION}}", USER_VARIABLES.get("location", "Unknown") + ) return template @@ -189,90 +227,56 @@ def rag_template(template: str, context: str, query: str): def title_generation_template( - template: str, messages: list[dict], user: Optional[dict] = None + template: str, messages: list[dict], user: Optional[Any] = None ) -> str: + prompt = get_last_user_message(messages) template = replace_prompt_variable(template, prompt) template = replace_messages_variable(template, messages) - template = prompt_template( - template, - **( - {"user_name": user.get("name"), "user_location": user.get("location")} - if user - else {} - ), - ) + template = prompt_template(template, user) return template def follow_up_generation_template( - template: str, messages: list[dict], user: Optional[dict] = None + template: str, messages: list[dict], user: Optional[Any] = None ) -> str: prompt = get_last_user_message(messages) template = replace_prompt_variable(template, prompt) template = replace_messages_variable(template, messages) - template = prompt_template( - template, - **( - {"user_name": user.get("name"), "user_location": user.get("location")} - if user - else {} - ), - ) + template = prompt_template(template, user) return template def tags_generation_template( - template: str, messages: list[dict], user: Optional[dict] = None + template: str, messages: list[dict], user: Optional[Any] = None ) -> str: prompt = get_last_user_message(messages) template = replace_prompt_variable(template, prompt) template = replace_messages_variable(template, messages) - template = prompt_template( - template, - **( - {"user_name": user.get("name"), "user_location": user.get("location")} - if user - else {} - ), - ) + template = prompt_template(template, user) return template def image_prompt_generation_template( - template: str, messages: list[dict], user: Optional[dict] = None + template: str, messages: list[dict], user: Optional[Any] = None ) -> str: prompt = get_last_user_message(messages) template = replace_prompt_variable(template, prompt) template = replace_messages_variable(template, messages) - template = prompt_template( - template, - **( - {"user_name": user.get("name"), "user_location": user.get("location")} - if user - else {} - ), - ) + template = prompt_template(template, user) return template def emoji_generation_template( - template: str, prompt: str, user: Optional[dict] = None + template: str, prompt: str, user: Optional[Any] = None ) -> str: template = replace_prompt_variable(template, prompt) - template = prompt_template( - template, - **( - {"user_name": user.get("name"), "user_location": user.get("location")} - if user - else {} - ), - ) + template = prompt_template(template, user) return template @@ -282,38 +286,24 @@ def autocomplete_generation_template( prompt: str, messages: Optional[list[dict]] = None, type: Optional[str] = None, - user: Optional[dict] = None, + user: Optional[Any] = None, ) -> str: template = template.replace("{{TYPE}}", type if type else "") template = replace_prompt_variable(template, prompt) template = replace_messages_variable(template, messages) - template = prompt_template( - template, - **( - {"user_name": user.get("name"), "user_location": user.get("location")} - if user - else {} - ), - ) + template = prompt_template(template, user) return template def query_generation_template( - template: str, messages: list[dict], user: Optional[dict] = None + template: str, messages: list[dict], user: Optional[Any] = None ) -> str: prompt = get_last_user_message(messages) template = replace_prompt_variable(template, prompt) template = replace_messages_variable(template, messages) - template = prompt_template( - template, - **( - {"user_name": user.get("name"), "user_location": user.get("location")} - if user - else {} - ), - ) + template = prompt_template(template, user) return template diff --git a/backend/open_webui/utils/telemetry/setup.py b/backend/open_webui/utils/telemetry/setup.py index cd1f45ea6a..36294b4e56 100644 --- a/backend/open_webui/utils/telemetry/setup.py +++ b/backend/open_webui/utils/telemetry/setup.py @@ -17,6 +17,7 @@ from open_webui.env import ( OTEL_SERVICE_NAME, OTEL_EXPORTER_OTLP_ENDPOINT, OTEL_EXPORTER_OTLP_INSECURE, + ENABLE_OTEL_TRACES, ENABLE_OTEL_METRICS, OTEL_BASIC_AUTH_USERNAME, OTEL_BASIC_AUTH_PASSWORD, @@ -27,29 +28,30 @@ from open_webui.env import ( def setup(app: FastAPI, db_engine: Engine): # set up trace resource = Resource.create(attributes={SERVICE_NAME: OTEL_SERVICE_NAME}) - trace.set_tracer_provider(TracerProvider(resource=resource)) + if ENABLE_OTEL_TRACES: + trace.set_tracer_provider(TracerProvider(resource=resource)) - # Add basic auth header only if both username and password are not empty - headers = [] - if OTEL_BASIC_AUTH_USERNAME and OTEL_BASIC_AUTH_PASSWORD: - auth_string = f"{OTEL_BASIC_AUTH_USERNAME}:{OTEL_BASIC_AUTH_PASSWORD}" - auth_header = b64encode(auth_string.encode()).decode() - headers = [("authorization", f"Basic {auth_header}")] + # Add basic auth header only if both username and password are not empty + headers = [] + if OTEL_BASIC_AUTH_USERNAME and OTEL_BASIC_AUTH_PASSWORD: + auth_string = f"{OTEL_BASIC_AUTH_USERNAME}:{OTEL_BASIC_AUTH_PASSWORD}" + auth_header = b64encode(auth_string.encode()).decode() + headers = [("authorization", f"Basic {auth_header}")] - # otlp export - if OTEL_OTLP_SPAN_EXPORTER == "http": - exporter = HttpOTLPSpanExporter( - endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, - headers=headers, - ) - else: - exporter = OTLPSpanExporter( - endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, - insecure=OTEL_EXPORTER_OTLP_INSECURE, - headers=headers, - ) - trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(exporter)) - Instrumentor(app=app, db_engine=db_engine).instrument() + # otlp export + if OTEL_OTLP_SPAN_EXPORTER == "http": + exporter = HttpOTLPSpanExporter( + endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, + headers=headers, + ) + else: + exporter = OTLPSpanExporter( + endpoint=OTEL_EXPORTER_OTLP_ENDPOINT, + insecure=OTEL_EXPORTER_OTLP_INSECURE, + headers=headers, + ) + trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(exporter)) + Instrumentor(app=app, db_engine=db_engine).instrument() # set up metrics only if enabled if ENABLE_OTEL_METRICS: diff --git a/backend/open_webui/utils/tools.py b/backend/open_webui/utils/tools.py index 3727bb1ad9..428b534865 100644 --- a/backend/open_webui/utils/tools.py +++ b/backend/open_webui/utils/tools.py @@ -5,6 +5,7 @@ import inspect import aiohttp import asyncio import yaml +import json from pydantic import BaseModel from pydantic.fields import FieldInfo @@ -38,6 +39,7 @@ from open_webui.models.users import UserModel from open_webui.utils.plugin import load_tool_module_by_id from open_webui.env import ( SRC_LOG_LEVELS, + AIOHTTP_CLIENT_TIMEOUT, AIOHTTP_CLIENT_TIMEOUT_TOOL_SERVER_DATA, AIOHTTP_CLIENT_SESSION_TOOL_SERVER_SSL, ) @@ -55,19 +57,38 @@ def get_async_tool_function_and_apply_extra_params( extra_params = {k: v for k, v in extra_params.items() if k in sig.parameters} partial_func = partial(function, **extra_params) + # Remove the 'frozen' keyword arguments from the signature + # python-genai uses the signature to infer the tool properties for native function calling + parameters = [] + for name, parameter in sig.parameters.items(): + # Exclude keyword arguments that are frozen + if name in extra_params: + continue + # Keep remaining parameters + parameters.append(parameter) + + new_sig = inspect.Signature( + parameters=parameters, return_annotation=sig.return_annotation + ) + if inspect.iscoroutinefunction(function): - update_wrapper(partial_func, function) - return partial_func + # wrap the functools.partial as python-genai has trouble with it + # https://github.com/googleapis/python-genai/issues/907 + async def new_function(*args, **kwargs): + return await partial_func(*args, **kwargs) + else: - # Make it a coroutine function + # Make it a coroutine function when it is not already async def new_function(*args, **kwargs): return partial_func(*args, **kwargs) - update_wrapper(new_function, function) - return new_function + update_wrapper(new_function, function) + new_function.__signature__ = new_sig + + return new_function -def get_tools( +async def get_tools( request: Request, tool_ids: list[str], user: UserModel, extra_params: dict ) -> dict[str, dict]: tools_dict = {} @@ -76,18 +97,24 @@ def get_tools( tool = Tools.get_tool_by_id(tool_id) if tool is None: if tool_id.startswith("server:"): - server_idx = int(tool_id.split(":")[1]) - tool_server_connection = ( - request.app.state.config.TOOL_SERVER_CONNECTIONS[server_idx] - ) + server_id = tool_id.split(":")[1] + tool_server_data = None - for server in request.app.state.TOOL_SERVERS: - if server["idx"] == server_idx: + for server in await get_tool_servers(request): + if server["id"] == server_id: tool_server_data = server break - assert tool_server_data is not None - specs = tool_server_data.get("specs", []) + if tool_server_data is None: + log.warning(f"Tool server data not found for {server_id}") + continue + + tool_server_idx = tool_server_data.get("idx", 0) + tool_server_connection = ( + request.app.state.config.TOOL_SERVER_CONNECTIONS[tool_server_idx] + ) + + specs = tool_server_data.get("specs", []) for spec in specs: function_name = spec["name"] @@ -126,14 +153,15 @@ def get_tools( "spec": spec, } - # TODO: if collision, prepend toolkit name - if function_name in tools_dict: + # Handle function name collisions + while function_name in tools_dict: log.warning( f"Tool {function_name} already exists in another tools!" ) - log.warning(f"Discarding {tool_id}.{function_name}") - else: - tools_dict[function_name] = tool_dict + # Prepend server ID to function name + function_name = f"{server_id}_{function_name}" + + tools_dict[function_name] = tool_dict else: continue else: @@ -193,14 +221,15 @@ def get_tools( }, } - # TODO: if collision, prepend toolkit name - if function_name in tools_dict: + # Handle function name collisions + while function_name in tools_dict: log.warning( f"Tool {function_name} already exists in another tools!" ) - log.warning(f"Discarding {tool_id}.{function_name}") - else: - tools_dict[function_name] = tool_dict + # Prepend tool ID to function name + function_name = f"{tool_id}_{function_name}" + + tools_dict[function_name] = tool_dict return tools_dict @@ -283,15 +312,15 @@ def convert_function_to_pydantic_model(func: Callable) -> type[BaseModel]: field_defs = {} for name, param in parameters.items(): - type_hint = type_hints.get(name, Any) default_value = param.default if param.default is not param.empty else ... param_description = function_param_descriptions.get(name, None) if param_description: - field_defs[name] = type_hint, Field( - default_value, description=param_description + field_defs[name] = ( + type_hint, + Field(default_value, description=param_description), ) else: field_defs[name] = type_hint, default_value @@ -442,6 +471,34 @@ def convert_openapi_to_tool_payload(openapi_spec): return tool_payload +async def set_tool_servers(request: Request): + request.app.state.TOOL_SERVERS = await get_tool_servers_data( + request.app.state.config.TOOL_SERVER_CONNECTIONS + ) + + if request.app.state.redis is not None: + await request.app.state.redis.set( + "tool_servers", json.dumps(request.app.state.TOOL_SERVERS) + ) + + return request.app.state.TOOL_SERVERS + + +async def get_tool_servers(request: Request): + tool_servers = [] + if request.app.state.redis is not None: + try: + tool_servers = json.loads(await request.app.state.redis.get("tool_servers")) + except Exception as e: + log.error(f"Error fetching tool_servers from Redis: {e}") + + if not tool_servers: + await set_tool_servers(request) + + request.app.state.TOOL_SERVERS = tool_servers + return request.app.state.TOOL_SERVERS + + async def get_tool_server_data(token: str, url: str) -> Dict[str, Any]: headers = { "Accept": "application/json", @@ -505,11 +562,16 @@ async def get_tool_servers_data( token = server.get("key", "") elif auth_type == "session": token = session_token - server_entries.append((idx, server, full_url, info, token)) + + id = info.get("id") + if not id: + id = str(idx) + + server_entries.append((id, idx, server, full_url, info, token)) # Create async tasks to fetch data tasks = [ - get_tool_server_data(token, url) for (_, _, url, _, token) in server_entries + get_tool_server_data(token, url) for (_, _, _, url, _, token) in server_entries ] # Execute tasks concurrently @@ -517,7 +579,7 @@ async def get_tool_servers_data( # Build final results with index and server metadata results = [] - for (idx, server, url, info, _), response in zip(server_entries, responses): + for (id, idx, server, url, info, _), response in zip(server_entries, responses): if isinstance(response, Exception): log.error(f"Failed to connect to {url} OpenAPI tool server") continue @@ -535,6 +597,7 @@ async def get_tool_servers_data( results.append( { + "id": str(id), "idx": idx, "url": server.get("url"), "openapi": openapi_data, @@ -613,7 +676,9 @@ async def execute_tool_server( if token: headers["Authorization"] = f"Bearer {token}" - async with aiohttp.ClientSession(trust_env=True) as session: + async with aiohttp.ClientSession( + trust_env=True, timeout=aiohttp.ClientTimeout(total=AIOHTTP_CLIENT_TIMEOUT) + ) as session: request_method = getattr(session, http_method.lower()) if http_method in ["post", "put", "patch"]: @@ -626,7 +691,13 @@ async def execute_tool_server( if response.status >= 400: text = await response.text() raise Exception(f"HTTP error {response.status}: {text}") - return await response.json() + + try: + response_data = await response.json() + except Exception: + response_data = await response.text() + + return response_data else: async with request_method( final_url, @@ -636,7 +707,13 @@ async def execute_tool_server( if response.status >= 400: text = await response.text() raise Exception(f"HTTP error {response.status}: {text}") - return await response.json() + + try: + response_data = await response.json() + except Exception: + response_data = await response.text() + + return response_data except Exception as err: error = str(err) diff --git a/backend/open_webui/utils/webhook.py b/backend/open_webui/utils/webhook.py index bf0b334d82..7ea29f3988 100644 --- a/backend/open_webui/utils/webhook.py +++ b/backend/open_webui/utils/webhook.py @@ -1,7 +1,7 @@ import json import logging +import aiohttp -import requests from open_webui.config import WEBUI_FAVICON_URL from open_webui.env import SRC_LOG_LEVELS, VERSION @@ -9,7 +9,7 @@ log = logging.getLogger(__name__) log.setLevel(SRC_LOG_LEVELS["WEBHOOK"]) -def post_webhook(name: str, url: str, message: str, event_data: dict) -> bool: +async def post_webhook(name: str, url: str, message: str, event_data: dict) -> bool: try: log.debug(f"post_webhook: {url}, {message}, {event_data}") payload = {} @@ -51,9 +51,12 @@ def post_webhook(name: str, url: str, message: str, event_data: dict) -> bool: payload = {**event_data} log.debug(f"payload: {payload}") - r = requests.post(url, json=payload) - r.raise_for_status() - log.debug(f"r.text: {r.text}") + async with aiohttp.ClientSession() as session: + async with session.post(url, json=payload) as r: + r_text = await r.text() + r.raise_for_status() + log.debug(f"r.text: {r_text}") + return True except Exception as e: log.exception(e) diff --git a/package-lock.json b/package-lock.json index 58d7858226..e1753472d9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "open-webui", - "version": "0.6.22", + "version": "0.6.23", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "open-webui", - "version": "0.6.22", + "version": "0.6.23", "dependencies": { "@azure/msal-browser": "^4.5.0", "@codemirror/lang-javascript": "^6.2.2", diff --git a/package.json b/package.json index 7e2d59074c..87152d3258 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "open-webui", - "version": "0.6.22", + "version": "0.6.23", "private": true, "scripts": { "dev": "npm run pyodide:fetch && vite dev --host", diff --git a/pyproject.toml b/pyproject.toml index faf0ec64e2..abedf3fe89 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ authors = [ license = { file = "LICENSE" } dependencies = [ "fastapi==0.115.7", - "uvicorn[standard]==0.34.2", + "uvicorn[standard]==0.35.0", "pydantic==2.11.7", "python-multipart==0.0.20", diff --git a/src/app.html b/src/app.html index 30a0ecc067..be2cc0f4ad 100644 --- a/src/app.html +++ b/src/app.html @@ -86,6 +86,10 @@ document.addEventListener('DOMContentLoaded', function () { const splash = document.getElementById('splash-screen'); + if (document.documentElement.classList.contains('her')) { + return; + } + if (splash) splash.prepend(logo); }); })(); @@ -167,6 +171,7 @@