open-webui/backend/open_webui/retrieval/web/duckduckgo.py

53 lines
1.5 KiB
Python
Raw Normal View History

import logging
2024-08-14 20:46:31 +08:00
from typing import Optional
2024-08-28 06:10:27 +08:00
2024-12-12 10:05:42 +08:00
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
2025-07-07 19:56:05 +08:00
from ddgs import DDGS
from ddgs.exceptions import RatelimitException
from open_webui.env import SRC_LOG_LEVELS
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])
2024-06-18 05:32:23 +08:00
def search_duckduckgo(
query: str,
count: int,
filter_list: Optional[list[str]] = None,
concurrent_requests: Optional[int] = None,
2024-06-18 05:32:23 +08:00
) -> list[SearchResult]:
"""
Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects.
Args:
query (str): The query to search for
count (int): The number of results to return
Returns:
2024-08-14 20:46:31 +08:00
list[SearchResult]: A list of search results
"""
# Use the DDGS context manager to create a DDGS object
search_results = []
with DDGS() as ddgs:
if concurrent_requests:
ddgs.threads = concurrent_requests
# Use the ddgs.text() method to perform the search
try:
search_results = ddgs.text(
query, safesearch="moderate", max_results=count, backend="lite"
)
except RatelimitException as e:
log.error(f"RatelimitException: {e}")
if filter_list:
2025-02-16 08:45:56 +08:00
search_results = get_filtered_results(search_results, filter_list)
# Return the list of search results
2025-02-16 08:45:56 +08:00
return [
SearchResult(
link=result["href"],
title=result.get("title"),
snippet=result.get("body"),
)
for result in search_results
]