2024-06-11 22:19:08 +08:00
|
|
|
import logging
|
2024-08-14 20:46:31 +08:00
|
|
|
from typing import Optional
|
2024-08-28 06:10:27 +08:00
|
|
|
|
2024-12-12 10:05:42 +08:00
|
|
|
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
2025-07-07 19:56:05 +08:00
|
|
|
from ddgs import DDGS
|
|
|
|
|
from ddgs.exceptions import RatelimitException
|
2024-09-04 22:54:48 +08:00
|
|
|
from open_webui.env import SRC_LOG_LEVELS
|
2024-06-11 22:19:08 +08:00
|
|
|
|
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
log.setLevel(SRC_LOG_LEVELS["RAG"])
|
|
|
|
|
|
|
|
|
|
|
2024-06-18 05:32:23 +08:00
|
|
|
def search_duckduckgo(
|
2025-08-19 00:06:36 +08:00
|
|
|
query: str,
|
|
|
|
|
count: int,
|
|
|
|
|
filter_list: Optional[list[str]] = None,
|
|
|
|
|
concurrent_requests: Optional[int] = None,
|
2024-06-18 05:32:23 +08:00
|
|
|
) -> list[SearchResult]:
|
2024-06-11 22:19:08 +08:00
|
|
|
"""
|
|
|
|
|
Search using DuckDuckGo's Search API and return the results as a list of SearchResult objects.
|
|
|
|
|
Args:
|
|
|
|
|
query (str): The query to search for
|
|
|
|
|
count (int): The number of results to return
|
|
|
|
|
|
|
|
|
|
Returns:
|
2024-08-14 20:46:31 +08:00
|
|
|
list[SearchResult]: A list of search results
|
2024-06-11 22:19:08 +08:00
|
|
|
"""
|
|
|
|
|
# Use the DDGS context manager to create a DDGS object
|
2025-04-08 19:51:54 +08:00
|
|
|
search_results = []
|
2024-06-11 22:19:08 +08:00
|
|
|
with DDGS() as ddgs:
|
2025-08-19 00:06:36 +08:00
|
|
|
if concurrent_requests:
|
|
|
|
|
ddgs.threads = concurrent_requests
|
|
|
|
|
|
2024-06-11 22:19:08 +08:00
|
|
|
# Use the ddgs.text() method to perform the search
|
2025-04-08 19:51:54 +08:00
|
|
|
try:
|
|
|
|
|
search_results = ddgs.text(
|
2025-04-08 20:01:44 +08:00
|
|
|
query, safesearch="moderate", max_results=count, backend="lite"
|
2025-04-08 19:51:54 +08:00
|
|
|
)
|
|
|
|
|
except RatelimitException as e:
|
|
|
|
|
log.error(f"RatelimitException: {e}")
|
2024-06-17 15:34:59 +08:00
|
|
|
if filter_list:
|
2025-02-16 08:45:56 +08:00
|
|
|
search_results = get_filtered_results(search_results, filter_list)
|
|
|
|
|
|
2024-06-11 22:19:08 +08:00
|
|
|
# Return the list of search results
|
2025-02-16 08:45:56 +08:00
|
|
|
return [
|
|
|
|
|
SearchResult(
|
|
|
|
|
link=result["href"],
|
|
|
|
|
title=result.get("title"),
|
|
|
|
|
snippet=result.get("body"),
|
|
|
|
|
)
|
|
|
|
|
for result in search_results
|
|
|
|
|
]
|