mirror of
https://github.com/open-webui/open-webui.git
synced 2026-05-05 06:42:22 +02:00
fix: consolidate psql cleanup logic and fix web add with cleanup (#20072)
* sequential * consolidate logic and fix for web add * Update WebSearch.svelte * Update retrieval.py * Update retrieval.py * Update WebSearch.svelte
This commit is contained in:
@@ -373,6 +373,32 @@ def sanitize_filename(file_name):
|
||||
return final_file_name
|
||||
|
||||
|
||||
def sanitize_text_for_db(text: str) -> str:
|
||||
"""Remove null bytes and invalid UTF-8 surrogates from text for PostgreSQL storage."""
|
||||
if not isinstance(text, str):
|
||||
return text
|
||||
# Remove null bytes - PostgreSQL cannot store \x00 in text fields
|
||||
text = text.replace("\x00", "")
|
||||
# Remove invalid UTF-8 surrogate characters that can cause encoding errors
|
||||
# This handles cases where binary data or encoding issues introduced surrogates
|
||||
try:
|
||||
text = text.encode("utf-8", errors="surrogatepass").decode("utf-8", errors="ignore")
|
||||
except (UnicodeEncodeError, UnicodeDecodeError):
|
||||
pass
|
||||
return text
|
||||
|
||||
|
||||
def sanitize_data_for_db(obj):
|
||||
"""Recursively sanitize all strings in a data structure for database storage."""
|
||||
if isinstance(obj, str):
|
||||
return sanitize_text_for_db(obj)
|
||||
elif isinstance(obj, dict):
|
||||
return {k: sanitize_data_for_db(v) for k, v in obj.items()}
|
||||
elif isinstance(obj, list):
|
||||
return [sanitize_data_for_db(v) for v in obj]
|
||||
return obj
|
||||
|
||||
|
||||
def extract_folders_after_data_docs(path):
|
||||
# Convert the path to a Path object if it's not already
|
||||
path = Path(path)
|
||||
|
||||
Reference in New Issue
Block a user