From 2e3550ebec025e9dceff2dbbd9a124fadb1dfea8 Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Thu, 29 May 2025 17:58:47 -0700 Subject: [PATCH] auto-load storage_state into remote browsers as well, use downloads_dir for everything --- browser_use/browser/session.py | 172 ++++++++++++++++++++++++--------- browser_use/utils.py | 14 +++ 2 files changed, 143 insertions(+), 43 deletions(-) diff --git a/browser_use/browser/session.py b/browser_use/browser/session.py index f8941899e..30f552748 100644 --- a/browser_use/browser/session.py +++ b/browser_use/browser/session.py @@ -32,7 +32,7 @@ from browser_use.browser.views import ( from browser_use.dom.clickable_element_processor.service import ClickableElementProcessor from browser_use.dom.service import DomService from browser_use.dom.views import DOMElementNode, SelectorMap -from browser_use.utils import match_url_with_domain_pattern, time_execution_async, time_execution_sync +from browser_use.utils import match_url_with_domain_pattern, merge_dicts, time_execution_async, time_execution_sync # Check if running in Docker IN_DOCKER = os.environ.get('IN_DOCKER', 'false').lower()[0] in 'ty1' @@ -770,9 +770,8 @@ class BrowserSession(BaseModel): except Exception as e: logger.warning(f'⚠️ Failed to update browser geolocation {self.browser_profile.geolocation}: {type(e).__name__}: {e}') - # if self.storage_state: - # TODO: implement applying self.stroage_state to an existing browser_context, currently only works on browser.new_context() I think - # await self.browser_context.set_storage_state(self.storage_state) + if self.storage_state: + await self.load_storage_state() page = None @@ -988,7 +987,7 @@ class BrowserSession(BaseModel): async def perform_click(click_func): """Performs the actual click, handling both download and navigation scenarios.""" - if self.browser_profile.save_downloads_path: + if self.browser_profile.downloads_dir: try: # Try short-timeout expect_download to detect a file download has been been triggered async with page.expect_download(timeout=5000) as download_info: @@ -996,10 +995,8 @@ class BrowserSession(BaseModel): download = await download_info.value # Determine file path suggested_filename = download.suggested_filename - unique_filename = await self._get_unique_filename( - self.browser_profile.save_downloads_path, suggested_filename - ) - download_path = os.path.join(self.browser_profile.save_downloads_path, unique_filename) + unique_filename = await self._get_unique_filename(self.browser_profile.downloads_dir, suggested_filename) + download_path = os.path.join(self.browser_profile.downloads_dir, unique_filename) await download.save_as(download_path) logger.debug(f'⬇️ Download triggered. Saved file to: {download_path}') return download_path @@ -1092,53 +1089,142 @@ class BrowserSession(BaseModel): return await self.browser_context.cookies() return [] - async def save_cookies(self, path: Path | None = None) -> None: + async def save_cookies(self, *args, **kwargs) -> None: """ - Save cookies to the specified path or the default cookies_file in the downloads_dir. + Old name for the new save_storage_state() function. """ - if self.browser_context: - # old deprecated cookies_file method: - cookies = await self.browser_context.cookies() - out_path = path or self.browser_profile.cookies_file - if out_path: - # If out_path is not absolute, resolve relative to downloads_dir - out_path = Path(out_path) - if not out_path.is_absolute(): - out_path = Path(self.browser_profile.downloads_dir or '.') / out_path - out_path.parent.mkdir(parents=True, exist_ok=True) - out_path.write_text(json.dumps(cookies, indent=4)) # TODO: replace with anyio asyncio or anyio write + await self.save_storage_state(*args, **kwargs) - # new recommended storage_state method: - storage_state = await self.browser_context.storage_state() - storage_state_path = Path(self.browser_profile.downloads_dir or '.') / 'storage_state.json' - storage_state_path.write_text(json.dumps(storage_state, indent=4)) + @require_initialization + async def save_storage_state(self, path: Path | None = None) -> None: + """ + Save cookies to the specified path or the configured cookies_file and/or storage_state. + """ + storage_state = await self.browser_context.storage_state() + cookies = storage_state['cookies'] + if cookies and self.browser_profile.cookies_file: + # only show warning if they configured cookies_file (not if they passed in a path to this function as an arg) + logger.warning( + '⚠️ cookies_file is deprecated and will be removed in a future version. ' + 'Please use storage_state instead for loading cookies and other browser state. ' + 'See: https://playwright.dev/python/docs/api/class-browsercontext#browser-context-storage-state' + ) - async def load_cookies_from_file(self) -> None: - """ - Load cookies from the cookies_file if it exists and apply them to the browser context. - """ - if not self.browser_profile.cookies_file or not self.browser_context: + # save cookies_file if passed a cookies file path or if profile cookies_file is configured + path_is_storage_state = path and str(path).endswith('storage_state.json') + if (path and not path_is_storage_state) or self.browser_profile.cookies_file: + try: + cookies_file_path = Path(path or self.browser_profile.cookies_file).expanduser().resolve() + cookies_file_path.parent.mkdir(parents=True, exist_ok=True) + cookies_file_path.write_text(json.dumps(cookies, indent=4)) # TODO: convert to async + logger.info(f'🍪 Saved {len(cookies)} cookies to cookies_file={_log_pretty_path(cookies_file_path)}') + except Exception as e: + logger.warning( + f'❌ Failed to save cookies to cookies_file={_log_pretty_path(cookies_file_path)}: {type(e).__name__}: {e}' + ) + + if path: + # if they passed in a path to the old save_cookies function, + # also save a new storage_state.json next to it to encourage adoption of the new format + storage_state_path = Path(path).expanduser().resolve().parent / 'storage_state.json' + else: + # otherwise use configured storage_state path + storage_state_path = self.browser_profile.storage_state + + if storage_state_path is None: + return + elif not isinstance(storage_state_path, (str, Path)): + logger.warning('⚠️ storage_state must be a json file path to be able to update it, skipping...') return - # Show deprecation warning - logger.warning( - '⚠️ cookies_file is deprecated and will be removed in a future version. ' - 'Please use storage_state instead for loading cookies and other browser state. ' - 'See: https://playwright.dev/python/docs/api/class-browsercontext#browser-context-storage-state' - ) + try: + storage_state_path = Path(storage_state_path).expanduser().resolve() + storage_state_path.parent.mkdir(parents=True, exist_ok=True) + storage_state = await self.browser_context.storage_state() - cookies_path = Path(self.browser_profile.cookies_file) - if not cookies_path.is_absolute(): - cookies_path = Path(self.browser_profile.downloads_dir or '.') / cookies_path + # always merge storage states, never overwrite (so two browsers can share the same storage_state.json) + if storage_state_path.exists(): + try: + existing_storage_state = json.loads(storage_state_path.read_text()) # TODO: convert to async + merged_storage_state = merge_dicts(existing_storage_state, storage_state) + # in case another process races us and updates the file here, we will overwrite their changes + # if we really want to support real concurrency we need a sqlite database or something + storage_state_path.write_text(json.dumps(merged_storage_state, indent=4)) # TODO: convert to async + except Exception as e: + logger.warning( + f'❌ Failed to merge storage state with existing storage_state={_log_pretty_path(storage_state_path)}: {type(e).__name__}: {e}' + ) + return + + storage_state_path.write_text(json.dumps(storage_state, indent=4)) # TODO: convert to async + logger.info( + f'🍪 Saved {len(storage_state["cookies"])} cookies to storage_state={_log_pretty_path(storage_state_path)}' + ) + except Exception as e: + logger.warning( + f'❌ Failed to save storage state to storage_state={_log_pretty_path(storage_state_path)}: {type(e).__name__}: {e}' + ) + + @require_initialization + async def load_storage_state(self) -> None: + """ + Load cookies from the storage_state or cookies_file and apply them to the browser context. + """ + + if self.browser_profile.cookies_file: + # Show deprecation warning + logger.warning( + '⚠️ cookies_file is deprecated and will be removed in a future version. ' + 'Please use storage_state instead for loading cookies and other browser state. ' + 'See: https://playwright.dev/python/docs/api/class-browsercontext#browser-context-storage-state' + ) + + cookies_path = Path(self.browser_profile.cookies_file) + if not cookies_path.is_absolute(): + cookies_path = Path(self.browser_profile.downloads_dir or '.') / cookies_path - if cookies_path.exists(): try: cookies_data = json.loads(cookies_path.read_text()) if cookies_data: await self.browser_context.add_cookies(cookies_data) - logger.info(f'🍪 Loaded {len(cookies_data)} cookies from {_log_pretty_path(cookies_path)}') + logger.info(f'🍪 Loaded {len(cookies_data)} cookies from cookies_file={_log_pretty_path(cookies_path)}') except Exception as e: - logger.warning(f'❌ Failed to load cookies from {_log_pretty_path(cookies_path)}: {type(e).__name__}: {e}') + logger.warning( + f'❌ Failed to load cookies from cookies_file={_log_pretty_path(cookies_path)}: {type(e).__name__}: {e}' + ) + + if self.browser_profile.storage_state: + storage_state = self.browser_profile.storage_state + if isinstance(storage_state, (str, Path)): + try: + storage_state = json.loads(Path(storage_state).read_text()) + except Exception as e: + logger.warning( + f'❌ Failed to load cookies from storage_state={_log_pretty_path(self.browser_profile.storage_state)}: {type(e).__name__}: {e}' + ) + return + + try: + assert isinstance(storage_state, dict), f'Got unexpected type for storage_state: {type(storage_state)}' + await self.browser_context.add_cookies(storage_state['cookies']) + # TODO: also handle localStroage, IndexedDB, SessionStorage + # playwright doesn't provide an API for setting these before launch + # https://playwright.dev/python/docs/auth#session-storage + # await self.browser_context.add_local_storage(storage_state['localStorage']) + logger.info( + f'🍪 Loaded {len(storage_state["cookies"])} cookies from storage_state={_log_pretty_path(self.browser_profile.storage_state)}' + ) + except Exception as e: + logger.warning( + f'❌ Failed to load cookies from storage_state={_log_pretty_path(self.browser_profile.storage_state)}: {type(e).__name__}: {e}' + ) + return + + async def load_cookies_from_file(self, *args, **kwargs) -> None: + """ + Old name for the new load_storage_state() function. + """ + await self.load_storage_state(*args, **kwargs) # @property # def browser_extension_pages(self) -> list[Page]: diff --git a/browser_use/utils.py b/browser_use/utils.py index d559e0450..75b54ac06 100644 --- a/browser_use/utils.py +++ b/browser_use/utils.py @@ -477,3 +477,17 @@ def match_url_with_domain_pattern(url: str, domain_pattern: str, log_warnings: b logger = logging.getLogger(__name__) logger.error(f'⛔️ Error matching URL {url} with pattern {domain_pattern}: {type(e).__name__}: {e}') return False + + +def merge_dicts(a: dict, b: dict, path: tuple[str, ...] = ()): + for key in b: + if key in a: + if isinstance(a[key], dict) and isinstance(b[key], dict): + merge_dicts(a[key], b[key], path + [str(key)]) + elif isinstance(a[key], list) and isinstance(b[key], list): + a[key] = a[key] + b[key] + elif a[key] != b[key]: + raise Exception('Conflict at ' + '.'.join(path + (str(key),))) + else: + a[key] = b[key] + return a