mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
fix sensitive_data redaction order to prevent substring leaks (#4660)
Resolves #4609
<!-- This is an auto-generated description by cubic. -->
---
## Summary by cubic
Fixes #4609 by preventing substring leaks during sensitive data
redaction. Redaction now replaces longer secrets first and uses shared
utils for consistent behavior.
- **Bug Fixes**
- Redact longest matches first to avoid partial/substring leaks.
- Support both legacy flat and domain-scoped `sensitive_data` formats.
- Apply consistent redaction across message manager and views.
- **Refactors**
- Added `collect_sensitive_data_values` and `redact_sensitive_string` in
`browser_use/utils.py`.
- Replaced inline redaction logic in
`browser_use/agent/message_manager/service.py` and
`browser_use/agent/views.py`.
<sup>Written for commit 65f87b7fca.
Summary will update on new commits.</sup>
<!-- End of auto-generated description by cubic. -->
This commit is contained in:
@@ -25,7 +25,12 @@ from browser_use.llm.messages import (
|
||||
UserMessage,
|
||||
)
|
||||
from browser_use.observability import observe_debug
|
||||
from browser_use.utils import match_url_with_domain_pattern, time_execution_sync
|
||||
from browser_use.utils import (
|
||||
collect_sensitive_data_values,
|
||||
match_url_with_domain_pattern,
|
||||
redact_sensitive_string,
|
||||
time_execution_sync,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -573,30 +578,14 @@ class MessageManager:
|
||||
if not self.sensitive_data:
|
||||
return value
|
||||
|
||||
# Collect all sensitive values, immediately converting old format to new format
|
||||
sensitive_values: dict[str, str] = {}
|
||||
|
||||
# Process all sensitive data entries
|
||||
for key_or_domain, content in self.sensitive_data.items():
|
||||
if isinstance(content, dict):
|
||||
# Already in new format: {domain: {key: value}}
|
||||
for key, val in content.items():
|
||||
if val: # Skip empty values
|
||||
sensitive_values[key] = val
|
||||
elif content: # Old format: {key: value} - convert to new format internally
|
||||
# We treat this as if it was {'http*://*': {key_or_domain: content}}
|
||||
sensitive_values[key_or_domain] = content
|
||||
sensitive_values = collect_sensitive_data_values(self.sensitive_data)
|
||||
|
||||
# If there are no valid sensitive data entries, just return the original value
|
||||
if not sensitive_values:
|
||||
logger.warning('No valid entries found in sensitive_data dictionary')
|
||||
return value
|
||||
|
||||
# Replace all valid sensitive data values with their placeholder tags
|
||||
for key, val in sensitive_values.items():
|
||||
value = value.replace(val, f'<secret>{key}</secret>')
|
||||
|
||||
return value
|
||||
return redact_sensitive_string(value, sensitive_values)
|
||||
|
||||
if isinstance(message.content, str):
|
||||
message.content = replace_sensitive(message.content)
|
||||
|
||||
@@ -27,6 +27,7 @@ from browser_use.filesystem.file_system import FileSystemState
|
||||
from browser_use.llm.base import BaseChatModel
|
||||
from browser_use.tokens.views import UsageSummary
|
||||
from browser_use.tools.registry.views import ActionModel
|
||||
from browser_use.utils import collect_sensitive_data_values, redact_sensitive_string
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -512,29 +513,13 @@ class AgentHistory(BaseModel):
|
||||
if not sensitive_data:
|
||||
return value
|
||||
|
||||
# Collect all sensitive values, immediately converting old format to new format
|
||||
sensitive_values: dict[str, str] = {}
|
||||
|
||||
# Process all sensitive data entries
|
||||
for key_or_domain, content in sensitive_data.items():
|
||||
if isinstance(content, dict):
|
||||
# Already in new format: {domain: {key: value}}
|
||||
for key, val in content.items():
|
||||
if val: # Skip empty values
|
||||
sensitive_values[key] = val
|
||||
elif content: # Old format: {key: value} - convert to new format internally
|
||||
# We treat this as if it was {'http*://*': {key_or_domain: content}}
|
||||
sensitive_values[key_or_domain] = content
|
||||
sensitive_values = collect_sensitive_data_values(sensitive_data)
|
||||
|
||||
# If there are no valid sensitive data entries, just return the original value
|
||||
if not sensitive_values:
|
||||
return value
|
||||
|
||||
# Replace all valid sensitive data values with their placeholder tags
|
||||
for key, val in sensitive_values.items():
|
||||
value = value.replace(val, f'<secret>{key}</secret>')
|
||||
|
||||
return value
|
||||
return redact_sensitive_string(value, sensitive_values)
|
||||
|
||||
def _filter_sensitive_data_from_dict(
|
||||
self, data: dict[str, Any], sensitive_data: dict[str, str | dict[str, str]] | None
|
||||
|
||||
@@ -31,6 +31,30 @@ _openai_bad_request_error: type | None = None
|
||||
_groq_bad_request_error: type | None = None
|
||||
|
||||
|
||||
def collect_sensitive_data_values(sensitive_data: dict[str, str | dict[str, str]] | None) -> dict[str, str]:
|
||||
"""Flatten legacy and domain-scoped sensitive data into placeholder -> value mappings."""
|
||||
if not sensitive_data:
|
||||
return {}
|
||||
|
||||
sensitive_values: dict[str, str] = {}
|
||||
for key_or_domain, content in sensitive_data.items():
|
||||
if isinstance(content, dict):
|
||||
for key, val in content.items():
|
||||
if val:
|
||||
sensitive_values[key] = val
|
||||
elif content:
|
||||
sensitive_values[key_or_domain] = content
|
||||
|
||||
return sensitive_values
|
||||
|
||||
|
||||
def redact_sensitive_string(value: str, sensitive_values: dict[str, str]) -> str:
|
||||
"""Replace sensitive values with placeholders, longest matches first to avoid partial leaks."""
|
||||
for key, secret in sorted(sensitive_values.items(), key=lambda item: len(item[1]), reverse=True):
|
||||
value = value.replace(secret, f'<secret>{key}</secret>')
|
||||
return value
|
||||
|
||||
|
||||
def _get_openai_bad_request_error() -> type | None:
|
||||
"""Lazy loader for OpenAI BadRequestError."""
|
||||
global _openai_bad_request_error
|
||||
|
||||
Reference in New Issue
Block a user