From 65f87b7fcae6c329cb1ccb3fd7154ec5990703a1 Mon Sep 17 00:00:00 2001
From: Laith Weinberger <70768382+laithrw@users.noreply.github.com>
Date: Sat, 11 Apr 2026 18:16:24 -0400
Subject: [PATCH] fix sensitive_data redaction order to prevent substring leaks
---
browser_use/agent/message_manager/service.py | 27 ++++++--------------
browser_use/agent/views.py | 21 +++------------
browser_use/utils.py | 24 +++++++++++++++++
3 files changed, 35 insertions(+), 37 deletions(-)
diff --git a/browser_use/agent/message_manager/service.py b/browser_use/agent/message_manager/service.py
index a2be2883c..6c7cae11a 100644
--- a/browser_use/agent/message_manager/service.py
+++ b/browser_use/agent/message_manager/service.py
@@ -25,7 +25,12 @@ from browser_use.llm.messages import (
UserMessage,
)
from browser_use.observability import observe_debug
-from browser_use.utils import match_url_with_domain_pattern, time_execution_sync
+from browser_use.utils import (
+ collect_sensitive_data_values,
+ match_url_with_domain_pattern,
+ redact_sensitive_string,
+ time_execution_sync,
+)
logger = logging.getLogger(__name__)
@@ -573,30 +578,14 @@ class MessageManager:
if not self.sensitive_data:
return value
- # Collect all sensitive values, immediately converting old format to new format
- sensitive_values: dict[str, str] = {}
-
- # Process all sensitive data entries
- for key_or_domain, content in self.sensitive_data.items():
- if isinstance(content, dict):
- # Already in new format: {domain: {key: value}}
- for key, val in content.items():
- if val: # Skip empty values
- sensitive_values[key] = val
- elif content: # Old format: {key: value} - convert to new format internally
- # We treat this as if it was {'http*://*': {key_or_domain: content}}
- sensitive_values[key_or_domain] = content
+ sensitive_values = collect_sensitive_data_values(self.sensitive_data)
# If there are no valid sensitive data entries, just return the original value
if not sensitive_values:
logger.warning('No valid entries found in sensitive_data dictionary')
return value
- # Replace all valid sensitive data values with their placeholder tags
- for key, val in sensitive_values.items():
- value = value.replace(val, f'{key}')
-
- return value
+ return redact_sensitive_string(value, sensitive_values)
if isinstance(message.content, str):
message.content = replace_sensitive(message.content)
diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py
index a7209378f..dbec9a534 100644
--- a/browser_use/agent/views.py
+++ b/browser_use/agent/views.py
@@ -27,6 +27,7 @@ from browser_use.filesystem.file_system import FileSystemState
from browser_use.llm.base import BaseChatModel
from browser_use.tokens.views import UsageSummary
from browser_use.tools.registry.views import ActionModel
+from browser_use.utils import collect_sensitive_data_values, redact_sensitive_string
logger = logging.getLogger(__name__)
@@ -512,29 +513,13 @@ class AgentHistory(BaseModel):
if not sensitive_data:
return value
- # Collect all sensitive values, immediately converting old format to new format
- sensitive_values: dict[str, str] = {}
-
- # Process all sensitive data entries
- for key_or_domain, content in sensitive_data.items():
- if isinstance(content, dict):
- # Already in new format: {domain: {key: value}}
- for key, val in content.items():
- if val: # Skip empty values
- sensitive_values[key] = val
- elif content: # Old format: {key: value} - convert to new format internally
- # We treat this as if it was {'http*://*': {key_or_domain: content}}
- sensitive_values[key_or_domain] = content
+ sensitive_values = collect_sensitive_data_values(sensitive_data)
# If there are no valid sensitive data entries, just return the original value
if not sensitive_values:
return value
- # Replace all valid sensitive data values with their placeholder tags
- for key, val in sensitive_values.items():
- value = value.replace(val, f'{key}')
-
- return value
+ return redact_sensitive_string(value, sensitive_values)
def _filter_sensitive_data_from_dict(
self, data: dict[str, Any], sensitive_data: dict[str, str | dict[str, str]] | None
diff --git a/browser_use/utils.py b/browser_use/utils.py
index 5661c9f34..a949aa77d 100644
--- a/browser_use/utils.py
+++ b/browser_use/utils.py
@@ -31,6 +31,30 @@ _openai_bad_request_error: type | None = None
_groq_bad_request_error: type | None = None
+def collect_sensitive_data_values(sensitive_data: dict[str, str | dict[str, str]] | None) -> dict[str, str]:
+ """Flatten legacy and domain-scoped sensitive data into placeholder -> value mappings."""
+ if not sensitive_data:
+ return {}
+
+ sensitive_values: dict[str, str] = {}
+ for key_or_domain, content in sensitive_data.items():
+ if isinstance(content, dict):
+ for key, val in content.items():
+ if val:
+ sensitive_values[key] = val
+ elif content:
+ sensitive_values[key_or_domain] = content
+
+ return sensitive_values
+
+
+def redact_sensitive_string(value: str, sensitive_values: dict[str, str]) -> str:
+ """Replace sensitive values with placeholders, longest matches first to avoid partial leaks."""
+ for key, secret in sorted(sensitive_values.items(), key=lambda item: len(item[1]), reverse=True):
+ value = value.replace(secret, f'{key}')
+ return value
+
+
def _get_openai_bad_request_error() -> type | None:
"""Lazy loader for OpenAI BadRequestError."""
global _openai_bad_request_error