open-webui/backend/open_webui/retrieval/vector/utils.py

29 lines
776 B
Python
Raw Normal View History

2025-07-31 21:45:06 +08:00
from datetime import datetime
2025-09-29 09:17:27 +08:00
KEYS_TO_EXCLUDE = ["content", "pages", "tables", "paragraphs", "sections", "figures"]
2025-07-31 21:45:06 +08:00
2025-09-29 09:17:27 +08:00
def filter_metadata(metadata: dict[str, any]) -> dict[str, any]:
metadata = {
key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE
}
return metadata
def process_metadata(
2025-07-31 21:45:06 +08:00
metadata: dict[str, any],
) -> dict[str, any]:
for key, value in metadata.items():
2025-09-29 09:17:27 +08:00
# Remove large fields
if key in KEYS_TO_EXCLUDE:
del metadata[key]
# Convert non-serializable fields to strings
2025-07-31 21:45:06 +08:00
if (
isinstance(value, datetime)
or isinstance(value, list)
or isinstance(value, dict)
):
metadata[key] = str(value)
return metadata