From ad92564af4a7d833873f6ca5a99231a793d4beb7 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Fri, 4 Jul 2025 14:09:22 +0200 Subject: [PATCH 01/13] refactor: simplify email query parameter description and enhance action result messages - Updated the description of the `query` parameter in `GetRecentEmailsParams` for clarity. - Refined the action result messages for better readability and consistency, including specific query details in the long-term memory response. --- browser_use/integrations/gmail/actions.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/browser_use/integrations/gmail/actions.py b/browser_use/integrations/gmail/actions.py index d19fd6211..5efdcb3a4 100644 --- a/browser_use/integrations/gmail/actions.py +++ b/browser_use/integrations/gmail/actions.py @@ -22,9 +22,7 @@ _gmail_service: GmailService | None = None class GetRecentEmailsParams(BaseModel): """Parameters for getting recent emails""" - query: str = Field( - default='', description='Gmail search query (e.g., "from:noreply@example.com") - optional additional filter' - ) + query: str = Field(default='', description='Gmail search query (e.g., "from:noreply@example.com")') max_results: int = Field(default=10, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 10)') @@ -49,10 +47,7 @@ def register_gmail_actions( _gmail_service = GmailService() @controller.action( - description='📧 **Get recent emails** - to fetch recent emails from the past 5 minutes with full content. ' - 'Perfect for retrieving verification codes, OTP, 2FA tokens, or any recent email content. ' - 'This action accesses your Gmail inbox to read email messages and extract verification codes. ' - 'Returns complete email content so you can extract verification codes or analyze email details yourself.', + description='Get recent emails from the mailbox with a query to retrieve verification codes, OTP, 2FA tokens, or any recent email content.', param_model=GetRecentEmailsParams, ) async def get_recent_emails(params: GetRecentEmailsParams) -> ActionResult: @@ -88,9 +83,10 @@ def register_gmail_actions( if not emails: query_info = f" matching '{params.query}'" if params.query.strip() else '' + memory = f'No recent emails found from last {time_filter}{query_info}' return ActionResult( - extracted_content=f'No emails found from the last {time_filter}{query_info}', - long_term_memory=f'No recent emails found from last {time_filter}', + extracted_content=memory, + long_term_memory=memory, ) # Format with full email content for large display @@ -108,7 +104,7 @@ def register_gmail_actions( return ActionResult( extracted_content=content, include_extracted_content_only_once=True, - long_term_memory=f'Retrieved {len(emails)} recent emails from last {time_filter}', + long_term_memory=f'Retrieved {len(emails)} recent emails from last {time_filter} for query {query}.', ) except Exception as e: From d8a08f088e06f382c7ef49e0d90b66608e23174e Mon Sep 17 00:00:00 2001 From: mertunsall Date: Fri, 4 Jul 2025 14:42:32 +0200 Subject: [PATCH 02/13] fix error in not initializing controller correctly --- browser_use/integrations/gmail/actions.py | 6 ++++-- eval/service.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/browser_use/integrations/gmail/actions.py b/browser_use/integrations/gmail/actions.py index 5efdcb3a4..2366372bf 100644 --- a/browser_use/integrations/gmail/actions.py +++ b/browser_use/integrations/gmail/actions.py @@ -28,7 +28,7 @@ class GetRecentEmailsParams(BaseModel): def register_gmail_actions( controller: Controller, gmail_service: GmailService | None = None, access_token: str | None = None -) -> None: +) -> Controller: """ Register Gmail actions with the provided controller Args: @@ -46,7 +46,7 @@ def register_gmail_actions( else: _gmail_service = GmailService() - @controller.action( + @controller.registry.action( description='Get recent emails from the mailbox with a query to retrieve verification codes, OTP, 2FA tokens, or any recent email content.', param_model=GetRecentEmailsParams, ) @@ -113,3 +113,5 @@ def register_gmail_actions( error=f'Error getting recent emails: {str(e)}', long_term_memory='Failed to get recent emails due to error', ) + + return controller diff --git a/eval/service.py b/eval/service.py index 974ba2ad9..56dcf534e 100644 --- a/eval/service.py +++ b/eval/service.py @@ -923,7 +923,7 @@ def create_controller( from browser_use.integrations.gmail import register_gmail_actions # Register Gmail actions using the access token - register_gmail_actions(controller, access_token=access_token) + controller = register_gmail_actions(controller, access_token=access_token) logger.info(f'Gmail 2FA integration registered successfully for user {user_id}') else: logger.info(f'No Gmail 2FA token found for user {user_id}, running without Gmail integration') From a60a2a37f9feb69667dc3e0a249b595cf097a8e0 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Fri, 4 Jul 2025 16:43:47 +0200 Subject: [PATCH 03/13] simplify --- browser_use/integrations/gmail/actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/browser_use/integrations/gmail/actions.py b/browser_use/integrations/gmail/actions.py index 2366372bf..153823eae 100644 --- a/browser_use/integrations/gmail/actions.py +++ b/browser_use/integrations/gmail/actions.py @@ -22,7 +22,7 @@ _gmail_service: GmailService | None = None class GetRecentEmailsParams(BaseModel): """Parameters for getting recent emails""" - query: str = Field(default='', description='Gmail search query (e.g., "from:noreply@example.com")') + query: str = Field(default='', description='Gmail search query, e.g., "github"') max_results: int = Field(default=10, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 10)') @@ -47,7 +47,7 @@ def register_gmail_actions( _gmail_service = GmailService() @controller.registry.action( - description='Get recent emails from the mailbox with a query to retrieve verification codes, OTP, 2FA tokens, or any recent email content.', + description='Get recent emails from the mailbox to retrieve verification codes, OTP, 2FA tokens, or any recent email content.', param_model=GetRecentEmailsParams, ) async def get_recent_emails(params: GetRecentEmailsParams) -> ActionResult: From 45dc441b5327720c05271b78f39a6192a0f045f6 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Fri, 4 Jul 2025 16:57:23 +0200 Subject: [PATCH 04/13] even simpler. --- browser_use/integrations/gmail/actions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/browser_use/integrations/gmail/actions.py b/browser_use/integrations/gmail/actions.py index 153823eae..f922f7043 100644 --- a/browser_use/integrations/gmail/actions.py +++ b/browser_use/integrations/gmail/actions.py @@ -22,7 +22,7 @@ _gmail_service: GmailService | None = None class GetRecentEmailsParams(BaseModel): """Parameters for getting recent emails""" - query: str = Field(default='', description='Gmail search query, e.g., "github"') + keyword: str = Field(default='', description='Search keyword') max_results: int = Field(default=10, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 10)') @@ -47,7 +47,7 @@ def register_gmail_actions( _gmail_service = GmailService() @controller.registry.action( - description='Get recent emails from the mailbox to retrieve verification codes, OTP, 2FA tokens, or any recent email content.', + description='Get recent emails from the mailbox with a keyword to retrieve verification codes, OTP, 2FA tokens, or any recent email content.', param_model=GetRecentEmailsParams, ) async def get_recent_emails(params: GetRecentEmailsParams) -> ActionResult: From bd0dfd8f11c92d68c10a186fe3c2ecdbf8507862 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Fri, 4 Jul 2025 17:16:34 +0200 Subject: [PATCH 05/13] fix --- browser_use/integrations/gmail/actions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/browser_use/integrations/gmail/actions.py b/browser_use/integrations/gmail/actions.py index f922f7043..ff9400919 100644 --- a/browser_use/integrations/gmail/actions.py +++ b/browser_use/integrations/gmail/actions.py @@ -72,8 +72,8 @@ def register_gmail_actions( # Build query with time filter and optional user query query_parts = [f'newer_than:{time_filter}'] - if params.query.strip(): - query_parts.append(params.query.strip()) + if params.keyword.strip(): + query_parts.append(params.keyword.strip()) query = ' '.join(query_parts) logger.info(f'🔍 Gmail search query: {query}') @@ -82,7 +82,7 @@ def register_gmail_actions( emails = await _gmail_service.get_recent_emails(max_results=max_results, query=query, time_filter=time_filter) if not emails: - query_info = f" matching '{params.query}'" if params.query.strip() else '' + query_info = f" matching '{params.keyword}'" if params.keyword.strip() else '' memory = f'No recent emails found from last {time_filter}{query_info}' return ActionResult( extracted_content=memory, From f80bf952603e7acf8595b909948db328c1bf9789 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Fri, 4 Jul 2025 17:21:18 +0200 Subject: [PATCH 06/13] add gmail connection only for tasks that have OTP --- eval/service.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/eval/service.py b/eval/service.py index 56dcf534e..57e2238a2 100644 --- a/eval/service.py +++ b/eval/service.py @@ -893,8 +893,8 @@ def create_controller( else: controller = Controller(output_model=output_model) - # Add Gmail 2FA support if tokens dict is available and task contains email - if gmail_tokens_dict and task: + # Add Gmail 2FA support if tokens dict is available and task has login_type OTP + if gmail_tokens_dict and task and hasattr(task, 'login_type') and task.login_type == 'OTP': try: # Extract username from task - check multiple possible sources username = None @@ -924,16 +924,22 @@ def create_controller( # Register Gmail actions using the access token controller = register_gmail_actions(controller, access_token=access_token) - logger.info(f'Gmail 2FA integration registered successfully for user {user_id}') + logger.info(f'Gmail 2FA integration registered successfully for user {user_id} (OTP task)') else: logger.info(f'No Gmail 2FA token found for user {user_id}, running without Gmail integration') else: - logger.info('No email found in task, running without Gmail integration') + logger.info('No email found in OTP task, running without Gmail integration') except Exception as e: logger.error(f'Failed to setup Gmail integration: {e}') else: - logger.info(f'No Gmail 2FA tokens provided, running without Gmail integration: {gmail_tokens_dict}, {task}') + if gmail_tokens_dict and task: + if not hasattr(task, 'login_type') or task.login_type != 'OTP': + logger.info(f'Task login_type is "{getattr(task, "login_type", "None")}", not OTP - skipping Gmail integration') + else: + logger.info('Gmail 2FA tokens provided but no task or task missing login_type') + else: + logger.info('No Gmail 2FA tokens provided or no task, running without Gmail integration') return controller From b9262aaebe3194af575343d42e1459c0ca159916 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Fri, 4 Jul 2025 17:29:22 +0200 Subject: [PATCH 07/13] make it MORE SIMPLE --- browser_use/integrations/gmail/actions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/browser_use/integrations/gmail/actions.py b/browser_use/integrations/gmail/actions.py index ff9400919..461ac5a8e 100644 --- a/browser_use/integrations/gmail/actions.py +++ b/browser_use/integrations/gmail/actions.py @@ -22,8 +22,8 @@ _gmail_service: GmailService | None = None class GetRecentEmailsParams(BaseModel): """Parameters for getting recent emails""" - keyword: str = Field(default='', description='Search keyword') - max_results: int = Field(default=10, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 10)') + keyword: str = Field(default='', description='A single keyword for search, e.g. github, airbnb, etc.') + max_results: int = Field(default=3, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 3)') def register_gmail_actions( @@ -47,7 +47,7 @@ def register_gmail_actions( _gmail_service = GmailService() @controller.registry.action( - description='Get recent emails from the mailbox with a keyword to retrieve verification codes, OTP, 2FA tokens, or any recent email content.', + description='Get recent emails from the mailbox with a keyword to retrieve verification codes, OTP, 2FA tokens, magic links, or any recent email content. Keep your query a single keyword.', param_model=GetRecentEmailsParams, ) async def get_recent_emails(params: GetRecentEmailsParams) -> ActionResult: From 550a2a25521613d88f2da3da372216475b88728d Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sat, 5 Jul 2025 01:36:04 +0200 Subject: [PATCH 08/13] feat: enhance file handling capabilities with JSON and CSV support - Added new file types: JsonFile and CsvFile to the file system. - Updated read_file method to handle external files with .json and .csv extensions. - Modified write_file action to allow .json and .csv file extensions in addition to .md and .txt. - Introduced pypdf dependency for PDF file handling. This update improves the flexibility of file operations within the application. --- browser_use/controller/service.py | 10 ++---- browser_use/filesystem/file_system.py | 49 ++++++++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/browser_use/controller/service.py b/browser_use/controller/service.py index 919b61b60..8f14bfa7d 100644 --- a/browser_use/controller/service.py +++ b/browser_use/controller/service.py @@ -566,7 +566,7 @@ Explain the content of the page and that the requested information is not availa return ActionResult(error=msg, include_in_memory=True) # File System Actions - @self.registry.action('Write content to file_name in file system, use only .md or .txt extensions.') + @self.registry.action('Write content to file_name in file system. Allowed extensions are .md, .txt, .json, .csv.') async def write_file(file_name: str, content: str, file_system: FileSystem): result = await file_system.write_file(file_name, content) logger.info(f'💾 {result}') @@ -581,13 +581,9 @@ Explain the content of the page and that the requested information is not availa @self.registry.action('Read file_name from file system') async def read_file(file_name: str, available_file_paths: list[str], file_system: FileSystem): if available_file_paths and file_name in available_file_paths: - import anyio - - async with await anyio.open_file(file_name, 'r') as f: - content = await f.read() - result = f'Read from file {file_name}.\n\n{content}\n' + result = await file_system.read_file(file_name, external_file=True) else: - result = file_system.read_file(file_name) + result = await file_system.read_file(file_name) MAX_MEMORY_SIZE = 1000 if len(result) > MAX_MEMORY_SIZE: diff --git a/browser_use/filesystem/file_system.py b/browser_use/filesystem/file_system.py index eb823e9fd..6a72e5a6e 100644 --- a/browser_use/filesystem/file_system.py +++ b/browser_use/filesystem/file_system.py @@ -94,6 +94,22 @@ class TxtFile(BaseFile): return 'txt' +class JsonFile(BaseFile): + """JSON file implementation""" + + @property + def extension(self) -> str: + return 'json' + + +class CsvFile(BaseFile): + """CSV file implementation""" + + @property + def extension(self) -> str: + return 'csv' + + class FileSystemState(BaseModel): """Serializable state of the file system""" @@ -120,6 +136,8 @@ class FileSystem: self._file_types: dict[str, type[BaseFile]] = { 'md': MarkdownFile, 'txt': TxtFile, + 'json': JsonFile, + 'csv': CsvFile, } self.files = {} @@ -188,8 +206,37 @@ class FileSystem: return file_obj.read() - def read_file(self, full_filename: str) -> str: + async def read_file(self, full_filename: str, external_file: bool = False) -> str: """Read file content using file-specific read method and return appropriate message to LLM""" + if external_file: + try: + _, extension = self._parse_filename(full_filename) + if extension in ['md', 'txt', 'json', 'csv']: + import anyio + + async with await anyio.open_file(full_filename, 'r') as f: + content = await f.read() + return f'Read from file {full_filename}.\n\n{content}\n' + elif extension == 'pdf': + import pypdf + + reader = pypdf.PdfReader(full_filename) + num_pages = len(reader.pages) + MAX_PDF_PAGES = 5 + extra_pages = num_pages - MAX_PDF_PAGES + extracted_text = '' + for page in reader.pages[:MAX_PDF_PAGES]: + extracted_text += page.extract_text() + return f'Read from file {full_filename}.\n\n{extracted_text}\n{extra_pages} more pages...' + else: + return f'Error: Cannot read file {full_filename} as {extension} extension is not supported.' + except FileNotFoundError: + return f"Error: File '{full_filename}' not found." + except PermissionError: + return f"Error: Permission denied to read file '{full_filename}'." + except Exception as e: + return f"Error: Could not read external file '{full_filename}'. {str(e)}" + if not self._is_valid_filename(full_filename): return INVALID_FILENAME_ERROR_MESSAGE diff --git a/pyproject.toml b/pyproject.toml index a3dd52848..e86f881eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ dependencies = [ "google-api-python-client>=2.174.0", "google-auth>=2.40.3", "google-auth-oauthlib>=1.2.2", + "pypdf>=5.7.0", ] # google-api-core: only used for Google LLM APIs # pyperclip: only used for examples that use copy/paste From f3e1a596429cdc33cc916ad2128db31d185efbd9 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sat, 5 Jul 2025 01:51:00 +0200 Subject: [PATCH 09/13] improve --- browser_use/agent/system_prompt.md | 1 + browser_use/agent/system_prompt_no_thinking.md | 1 + browser_use/filesystem/file_system.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md index 411fca56f..0c0b4c4ed 100644 --- a/browser_use/agent/system_prompt.md +++ b/browser_use/agent/system_prompt.md @@ -91,6 +91,7 @@ Strictly follow these rules while using the browser and navigating the web: - You have access to a persistent file system which you can use to track progress, store results, and manage long tasks. - Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Update it to mark completed items and track what remains. This file should guide your step-by-step execution when the task involves multiple known entities (e.g., a list of links or items to visit). ALWAYS use `write_file` to rewrite entire `todo.md` when you want to update your progress. NEVER use `append_file` on `todo.md` as this can explode your context. +- If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas. - Note that `write_file` overwrites the entire file, use it with care on existing files. - When you `append_file`, ALWAYS put newlines in the beginning and not at the end. - If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary. diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md index dee039363..792d0466e 100644 --- a/browser_use/agent/system_prompt_no_thinking.md +++ b/browser_use/agent/system_prompt_no_thinking.md @@ -91,6 +91,7 @@ Strictly follow these rules while using the browser and navigating the web: - You have access to a persistent file system which you can use to track progress, store results, and manage long tasks. - Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Update it to mark completed items and track what remains. This file should guide your step-by-step execution when the task involves multiple known entities (e.g., a list of links or items to visit). ALWAYS use `write_file` to rewrite entire `todo.md` when you want to update your progress. NEVER use `append_file` on `todo.md` as this can explode your context. +- If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas. - Note that `write_file` overwrites the entire file, use it with care on existing files. - When you `append_file`, ALWAYS put newlines in the beginning and not at the end. - If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary. diff --git a/browser_use/filesystem/file_system.py b/browser_use/filesystem/file_system.py index 6a72e5a6e..2bd16da86 100644 --- a/browser_use/filesystem/file_system.py +++ b/browser_use/filesystem/file_system.py @@ -235,7 +235,7 @@ class FileSystem: except PermissionError: return f"Error: Permission denied to read file '{full_filename}'." except Exception as e: - return f"Error: Could not read external file '{full_filename}'. {str(e)}" + return f"Error: Could not read file '{full_filename}'." if not self._is_valid_filename(full_filename): return INVALID_FILENAME_ERROR_MESSAGE From 304b065d59296c8303f136f75bf7597194f83229 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sat, 5 Jul 2025 02:07:11 +0200 Subject: [PATCH 10/13] more security --- browser_use/filesystem/file_system.py | 12 +- tests/ci/test_filesystem.py | 351 +++++++++++++++++++++++++- 2 files changed, 354 insertions(+), 9 deletions(-) diff --git a/browser_use/filesystem/file_system.py b/browser_use/filesystem/file_system.py index 2bd16da86..d4d3a4299 100644 --- a/browser_use/filesystem/file_system.py +++ b/browser_use/filesystem/file_system.py @@ -210,7 +210,10 @@ class FileSystem: """Read file content using file-specific read method and return appropriate message to LLM""" if external_file: try: - _, extension = self._parse_filename(full_filename) + try: + _, extension = self._parse_filename(full_filename) + except Exception: + return f'Error: Invalid filename format {full_filename}. Must be alphanumeric with a supported extension.' if extension in ['md', 'txt', 'json', 'csv']: import anyio @@ -227,7 +230,8 @@ class FileSystem: extracted_text = '' for page in reader.pages[:MAX_PDF_PAGES]: extracted_text += page.extract_text() - return f'Read from file {full_filename}.\n\n{extracted_text}\n{extra_pages} more pages...' + extra_pages_text = f'{extra_pages} more pages...' if extra_pages > 0 else '' + return f'Read from file {full_filename}.\n\n{extracted_text}\n{extra_pages_text}' else: return f'Error: Cannot read file {full_filename} as {extension} extension is not supported.' except FileNotFoundError: @@ -414,6 +418,10 @@ class FileSystem: file_obj = MarkdownFile(**file_info) elif file_type == 'TxtFile': file_obj = TxtFile(**file_info) + elif file_type == 'JsonFile': + file_obj = JsonFile(**file_info) + elif file_type == 'CsvFile': + file_obj = CsvFile(**file_info) else: # Skip unknown file types continue diff --git a/tests/ci/test_filesystem.py b/tests/ci/test_filesystem.py index b005c432d..4c5065c0b 100644 --- a/tests/ci/test_filesystem.py +++ b/tests/ci/test_filesystem.py @@ -9,8 +9,10 @@ import pytest from browser_use.filesystem.file_system import ( DEFAULT_FILE_SYSTEM_PATH, INVALID_FILENAME_ERROR_MESSAGE, + CsvFile, FileSystem, FileSystemState, + JsonFile, MarkdownFile, TxtFile, ) @@ -41,6 +43,30 @@ class TestBaseFile: assert txt_file.get_size == 11 assert txt_file.get_line_count == 2 + def test_json_file_creation(self): + """Test JsonFile creation and basic properties.""" + json_content = '{"name": "John", "age": 30, "city": "New York"}' + json_file = JsonFile(name='data', content=json_content) + + assert json_file.name == 'data' + assert json_file.content == json_content + assert json_file.extension == 'json' + assert json_file.full_name == 'data.json' + assert json_file.get_size == len(json_content) + assert json_file.get_line_count == 1 + + def test_csv_file_creation(self): + """Test CsvFile creation and basic properties.""" + csv_content = 'name,age,city\nJohn,30,New York\nJane,25,London' + csv_file = CsvFile(name='users', content=csv_content) + + assert csv_file.name == 'users' + assert csv_file.content == csv_content + assert csv_file.extension == 'csv' + assert csv_file.full_name == 'users.csv' + assert csv_file.get_size == len(csv_content) + assert csv_file.get_line_count == 3 + def test_file_content_operations(self): """Test content update and append operations.""" file_obj = TxtFile(name='test') @@ -88,6 +114,60 @@ class TestBaseFile: assert file_path.read_text() == expected_content assert file_obj.content == expected_content + async def test_json_file_disk_operations(self): + """Test JSON file sync to disk operations.""" + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) + json_content = '{"users": [{"name": "John", "age": 30}]}' + json_file = JsonFile(name='data', content=json_content) + + # Test sync to disk + await json_file.sync_to_disk(tmp_path) + + # Verify file was created on disk + file_path = tmp_path / 'data.json' + assert file_path.exists() + assert file_path.read_text() == json_content + + # Test write operation + new_content = '{"users": [{"name": "Jane", "age": 25}]}' + await json_file.write(new_content, tmp_path) + assert file_path.read_text() == new_content + assert json_file.content == new_content + + # Test append operation + await json_file.append(', {"name": "Bob", "age": 35}', tmp_path) + expected_content = new_content + ', {"name": "Bob", "age": 35}' + assert file_path.read_text() == expected_content + assert json_file.content == expected_content + + async def test_csv_file_disk_operations(self): + """Test CSV file sync to disk operations.""" + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = Path(tmp_dir) + csv_content = 'name,age,city\nJohn,30,New York' + csv_file = CsvFile(name='users', content=csv_content) + + # Test sync to disk + await csv_file.sync_to_disk(tmp_path) + + # Verify file was created on disk + file_path = tmp_path / 'users.csv' + assert file_path.exists() + assert file_path.read_text() == csv_content + + # Test write operation + new_content = 'name,age,city\nJane,25,London' + await csv_file.write(new_content, tmp_path) + assert file_path.read_text() == new_content + assert csv_file.content == new_content + + # Test append operation + await csv_file.append('\nBob,35,Paris', tmp_path) + expected_content = new_content + '\nBob,35,Paris' + assert file_path.read_text() == expected_content + assert csv_file.content == expected_content + def test_file_sync_to_disk_sync(self): """Test synchronous disk sync operation.""" with tempfile.TemporaryDirectory() as tmp_dir: @@ -160,7 +240,8 @@ class TestFileSystem: assert 'md' in extensions assert 'txt' in extensions - assert len(extensions) == 2 + assert 'json' in extensions + assert 'csv' in extensions def test_filename_validation(self, temp_filesystem): """Test filename validation.""" @@ -171,6 +252,8 @@ class TestFileSystem: assert fs._is_valid_filename('my_file.txt') is True assert fs._is_valid_filename('file-name.md') is True assert fs._is_valid_filename('file123.txt') is True + assert fs._is_valid_filename('data.json') is True + assert fs._is_valid_filename('users.csv') is True # Invalid filenames assert fs._is_valid_filename('test.doc') is False # wrong extension @@ -179,6 +262,8 @@ class TestFileSystem: assert fs._is_valid_filename('test with spaces.md') is False # spaces assert fs._is_valid_filename('test@file.md') is False # special chars assert fs._is_valid_filename('.md') is False # no name + assert fs._is_valid_filename('.json') is False # no name + assert fs._is_valid_filename('.csv') is False # no name def test_filename_parsing(self, temp_filesystem): """Test filename parsing into name and extension.""" @@ -192,6 +277,14 @@ class TestFileSystem: assert name == 'my_file' assert ext == 'txt' # Should be lowercased + name, ext = fs._parse_filename('data.json') + assert name == 'data' + assert ext == 'json' + + name, ext = fs._parse_filename('users.CSV') + assert name == 'users' + assert ext == 'csv' # Should be lowercased + def test_get_file(self, temp_filesystem): """Test getting files from the filesystem.""" fs = temp_filesystem @@ -228,21 +321,21 @@ class TestFileSystem: content = fs.display_file('invalid@name.md') assert content is None - def test_read_file(self, temp_filesystem): + async def test_read_file(self, temp_filesystem: FileSystem): """Test reading file content with proper formatting.""" - fs = temp_filesystem + fs: FileSystem = temp_filesystem # Read existing empty file - result = fs.read_file('todo.md') + result = await fs.read_file('todo.md') expected = 'Read from file todo.md.\n\n\n' assert result == expected # Read non-existent file - result = fs.read_file('nonexistent.md') + result = await fs.read_file('nonexistent.md') assert result == "File 'nonexistent.md' not found." # Read file with invalid name - result = fs.read_file('invalid@name.md') + result = await fs.read_file('invalid@name.md') assert result == INVALID_FILENAME_ERROR_MESSAGE async def test_write_file(self, temp_filesystem): @@ -254,7 +347,7 @@ class TestFileSystem: assert result == 'Data written to file results.md successfully.' # Verify content was written - content = fs.read_file('results.md') + content = await fs.read_file('results.md') assert '# Test Results\nThis is a test.' in content # Write to new file @@ -271,6 +364,56 @@ class TestFileSystem: result = await fs.write_file('test.doc', 'content') assert result == INVALID_FILENAME_ERROR_MESSAGE + async def test_write_json_file(self, temp_filesystem): + """Test writing JSON files.""" + fs = temp_filesystem + + # Write valid JSON content + json_content = '{"users": [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]}' + result = await fs.write_file('data.json', json_content) + assert result == 'Data written to file data.json successfully.' + + # Verify content was written + content = await fs.read_file('data.json') + assert json_content in content + + # Verify file object was created + assert 'data.json' in fs.files + file_obj = fs.get_file('data.json') + assert file_obj is not None + assert isinstance(file_obj, JsonFile) + assert file_obj.content == json_content + + # Write to new JSON file + result = await fs.write_file('config.json', '{"debug": true, "port": 8080}') + assert result == 'Data written to file config.json successfully.' + assert 'config.json' in fs.files + + async def test_write_csv_file(self, temp_filesystem): + """Test writing CSV files.""" + fs = temp_filesystem + + # Write valid CSV content + csv_content = 'name,age,city\nJohn,30,New York\nJane,25,London\nBob,35,Paris' + result = await fs.write_file('users.csv', csv_content) + assert result == 'Data written to file users.csv successfully.' + + # Verify content was written + content = await fs.read_file('users.csv') + assert csv_content in content + + # Verify file object was created + assert 'users.csv' in fs.files + file_obj = fs.get_file('users.csv') + assert file_obj is not None + assert isinstance(file_obj, CsvFile) + assert file_obj.content == csv_content + + # Write to new CSV file + result = await fs.write_file('products.csv', 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99') + assert result == 'Data written to file products.csv successfully.' + assert 'products.csv' in fs.files + async def test_append_file(self, temp_filesystem): """Test appending content to files.""" fs = temp_filesystem @@ -294,6 +437,45 @@ class TestFileSystem: result = await fs.append_file('invalid@name.md', 'content') assert result == INVALID_FILENAME_ERROR_MESSAGE + async def test_append_json_file(self, temp_filesystem): + """Test appending content to JSON files.""" + fs = temp_filesystem + + # First write some JSON content + await fs.write_file('data.json', '{"users": [{"name": "John", "age": 30}]}') + + # Append additional JSON content (note: this creates invalid JSON, but tests the append functionality) + result = await fs.append_file('data.json', ', {"name": "Jane", "age": 25}') + assert result == 'Data appended to file data.json successfully.' + + # Verify content was appended + file_obj = fs.get_file('data.json') + assert file_obj is not None + expected_content = '{"users": [{"name": "John", "age": 30}]}, {"name": "Jane", "age": 25}' + assert file_obj.content == expected_content + + async def test_append_csv_file(self, temp_filesystem): + """Test appending content to CSV files.""" + fs = temp_filesystem + + # First write some CSV content + await fs.write_file('users.csv', 'name,age,city\nJohn,30,New York') + + # Append additional CSV row + result = await fs.append_file('users.csv', '\nJane,25,London') + assert result == 'Data appended to file users.csv successfully.' + + # Verify content was appended + file_obj = fs.get_file('users.csv') + assert file_obj is not None + expected_content = 'name,age,city\nJohn,30,New York\nJane,25,London' + assert file_obj.content == expected_content + + # Append another row + await fs.append_file('users.csv', '\nBob,35,Paris') + expected_content = 'name,age,city\nJohn,30,New York\nJane,25,London\nBob,35,Paris' + assert file_obj.content == expected_content + async def test_save_extracted_content(self, temp_filesystem): """Test saving extracted content with auto-numbering.""" fs = temp_filesystem @@ -412,6 +594,161 @@ class TestFileSystem: # Clean up second filesystem fs2.nuke() + async def test_complete_workflow_with_json_csv(self): + """Test a complete filesystem workflow with JSON and CSV files.""" + with tempfile.TemporaryDirectory() as tmp_dir: + # Create filesystem + fs = FileSystem(base_dir=tmp_dir, create_default_files=True) + + # Write JSON configuration file + config_json = '{"app": {"name": "TestApp", "version": "1.0"}, "database": {"host": "localhost", "port": 5432}}' + await fs.write_file('config.json', config_json) + + # Write CSV data file + users_csv = 'id,name,email,age\n1,John Doe,john@example.com,30\n2,Jane Smith,jane@example.com,25' + await fs.write_file('users.csv', users_csv) + + # Append more data to CSV + await fs.append_file('users.csv', '\n3,Bob Johnson,bob@example.com,35') + + # Update JSON configuration + updated_config = '{"app": {"name": "TestApp", "version": "1.1"}, "database": {"host": "localhost", "port": 5432}, "features": {"logging": true}}' + await fs.write_file('config.json', updated_config) + + # Create another JSON file for API responses + api_response = '{"status": "success", "data": [{"id": 1, "name": "Item 1"}, {"id": 2, "name": "Item 2"}]}' + await fs.write_file('api_response.json', api_response) + + # Create a products CSV file + products_csv = ( + 'sku,name,price,category\nLAP001,Gaming Laptop,1299.99,Electronics\nMOU001,Wireless Mouse,29.99,Accessories' + ) + await fs.write_file('products.csv', products_csv) + + # Verify file listing + files = fs.list_files() + expected_files = ['todo.md', 'config.json', 'users.csv', 'api_response.json', 'products.csv'] + assert len(files) == len(expected_files) + for expected_file in expected_files: + assert expected_file in files + + # Verify JSON file contents + config_file = fs.get_file('config.json') + assert config_file is not None + assert isinstance(config_file, JsonFile) + assert config_file.content == updated_config + + api_file = fs.get_file('api_response.json') + assert api_file is not None + assert isinstance(api_file, JsonFile) + assert api_file.content == api_response + + # Verify CSV file contents + users_file = fs.get_file('users.csv') + assert users_file is not None + assert isinstance(users_file, CsvFile) + expected_users_content = 'id,name,email,age\n1,John Doe,john@example.com,30\n2,Jane Smith,jane@example.com,25\n3,Bob Johnson,bob@example.com,35' + assert users_file.content == expected_users_content + + products_file = fs.get_file('products.csv') + assert products_file is not None + assert isinstance(products_file, CsvFile) + assert products_file.content == products_csv + + # Test state persistence with JSON and CSV files + state = fs.get_state() + fs.nuke() + + # Restore from state + fs2 = FileSystem.from_state(state) + + # Verify restoration + assert len(fs2.files) == len(expected_files) + + # Verify JSON files were restored correctly + restored_config = fs2.get_file('config.json') + assert restored_config is not None + assert isinstance(restored_config, JsonFile) + assert restored_config.content == updated_config + + restored_api = fs2.get_file('api_response.json') + assert restored_api is not None + assert isinstance(restored_api, JsonFile) + assert restored_api.content == api_response + + # Verify CSV files were restored correctly + restored_users = fs2.get_file('users.csv') + assert restored_users is not None + assert isinstance(restored_users, CsvFile) + assert restored_users.content == expected_users_content + + restored_products = fs2.get_file('products.csv') + assert restored_products is not None + assert isinstance(restored_products, CsvFile) + assert restored_products.content == products_csv + + # Verify files exist on disk + for filename in expected_files: + if filename != 'todo.md': # Skip todo.md as it's already tested + assert (fs2.data_dir / filename).exists() + + fs2.nuke() + + async def test_from_state_with_json_csv_files(self, temp_filesystem): + """Test restoring filesystem from state with JSON and CSV files.""" + fs = temp_filesystem + + # Add JSON and CSV content + await fs.write_file('data.json', '{"version": "1.0", "users": [{"name": "John", "age": 30}]}') + await fs.write_file('users.csv', 'name,age,city\nJohn,30,New York\nJane,25,London') + await fs.write_file('config.json', '{"debug": true, "port": 8080}') + await fs.write_file('products.csv', 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99') + + # Get state + state = fs.get_state() + + # Create new filesystem from state + fs2 = FileSystem.from_state(state) + + # Verify restoration + assert fs2.base_dir == fs.base_dir + assert len(fs2.files) == len(fs.files) + + # Verify JSON file contents + json_file = fs2.get_file('data.json') + assert json_file is not None + assert isinstance(json_file, JsonFile) + assert json_file.content == '{"version": "1.0", "users": [{"name": "John", "age": 30}]}' + + config_file = fs2.get_file('config.json') + assert config_file is not None + assert isinstance(config_file, JsonFile) + assert config_file.content == '{"debug": true, "port": 8080}' + + # Verify CSV file contents + csv_file = fs2.get_file('users.csv') + assert csv_file is not None + assert isinstance(csv_file, CsvFile) + assert csv_file.content == 'name,age,city\nJohn,30,New York\nJane,25,London' + + products_file = fs2.get_file('products.csv') + assert products_file is not None + assert isinstance(products_file, CsvFile) + assert products_file.content == 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99' + + # Verify files exist on disk + assert (fs2.data_dir / 'data.json').exists() + assert (fs2.data_dir / 'users.csv').exists() + assert (fs2.data_dir / 'config.json').exists() + assert (fs2.data_dir / 'products.csv').exists() + + # Verify disk contents match + assert (fs2.data_dir / 'data.json').read_text() == '{"version": "1.0", "users": [{"name": "John", "age": 30}]}' + assert (fs2.data_dir / 'users.csv').read_text() == 'name,age,city\nJohn,30,New York\nJane,25,London' + + # Clean up second filesystem + fs2.nuke() + def test_nuke(self, empty_filesystem): """Test filesystem destruction.""" fs = empty_filesystem From 7a6644c1c3e6d465ab5faad59a20ad0d7d9d8f27 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sat, 5 Jul 2025 10:15:24 +0200 Subject: [PATCH 11/13] fix example --- examples/simple.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/examples/simple.py b/examples/simple.py index 54ae90fd6..3bfeb8045 100644 --- a/examples/simple.py +++ b/examples/simple.py @@ -9,37 +9,31 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from dotenv import load_dotenv load_dotenv() -from lmnr import Laminar try: + from lmnr import Laminar + Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY')) except Exception: + print('Error initializing Laminar') pass from browser_use import Agent # Initialize the model llm = ChatOpenAI( - model='gpt-4o', + model='gpt-4.1-mini', ) task = 'Go to google.com/travel/flights and search for flights to Tokyo next week' -task = """http://www.sadfdsafdssdafd.com/ go here and scroll around""" -task = 'Go to Louis Vuittons website, find every product and save the product details 1 by 1. Extract product details as JSON: productname (Full name as shown on the webpage), brand (Manufacturer or designer name), model (Specific version or edition), gender (Target audience: Men, Women, Unisex), sku (Unique identifier), releasedate (Launch date in YYYY-MM-DD format), retailprice (Price as a number, no currency symbols), colorway (Color description without spaces around slashes, e.g., White/PinkFoam), sizerange (Available sizes as a list, maintain decimals for half sizes, e.g., 7.5), requesturl (URL where product data is scraped), requesttimestamp (ISO 8601 timestamp of the request), primaryimgurl (URL of the main product image); ensure required fields are present, return null if data is missing.' - agent = Agent(task=task, llm=llm) async def main(): - import time - - start_time = time.time() history = await agent.run() # token usage print(history.usage) - end_time = time.time() - print(f'Time taken: {end_time - start_time} seconds') if __name__ == '__main__': From 3124ddcfe3529a6d20e7be458c7d9054a780a01e Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sat, 5 Jul 2025 10:15:53 +0200 Subject: [PATCH 12/13] add excel sheet example --- examples/file_system/excel_sheet.py | 47 +++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 examples/file_system/excel_sheet.py diff --git a/examples/file_system/excel_sheet.py b/examples/file_system/excel_sheet.py new file mode 100644 index 000000000..e0164c998 --- /dev/null +++ b/examples/file_system/excel_sheet.py @@ -0,0 +1,47 @@ +import asyncio +import os +import sys + +from browser_use.llm.openai.chat import ChatOpenAI + +sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + +from dotenv import load_dotenv + +load_dotenv() +from lmnr import Laminar + +try: + Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY')) +except Exception: + pass + +from browser_use import Agent + +# Initialize the model +llm = ChatOpenAI( + model='o4-mini', + temperature=1.0, +) + + +task = ( + 'Find current stock price of companies Meta and Amazon. Then, make me a CSV file with 2 columns: company name, stock price.' +) + +agent = Agent(task=task, llm=llm) + + +async def main(): + import time + + start_time = time.time() + history = await agent.run() + # token usage + print(history.usage) + end_time = time.time() + print(f'Time taken: {end_time - start_time} seconds') + + +if __name__ == '__main__': + asyncio.run(main()) From 220f0bc99460d9097a02905b7c492a085f74d0f3 Mon Sep 17 00:00:00 2001 From: mertunsall Date: Sat, 5 Jul 2025 10:32:49 +0200 Subject: [PATCH 13/13] update models to gpt-4.1 --- .cursor/rules/browser-use-rules.mdc | 2 +- .github/ISSUE_TEMPLATE/1_element_detection_bug.yml | 4 +++- README.md | 2 +- browser_use/cli.py | 6 +++--- browser_use/llm/tests/test_single_step.py | 4 ++-- docs/quickstart.mdx | 2 +- eval/service.py | 2 +- examples/browser/multiple_agents_same_browser.py | 2 +- examples/browser/real_browser.py | 2 +- examples/browser/stealth.py | 2 +- examples/custom-functions/2fa.py | 2 +- examples/custom-functions/action_filters.py | 2 +- examples/custom-functions/advanced_search.py | 2 +- examples/custom-functions/clipboard.py | 2 +- examples/custom-functions/custom_hooks_before_after_step.py | 4 ++-- examples/custom-functions/drag_and_drop.py | 4 ++-- examples/custom-functions/extract_pdf_content.py | 2 +- examples/custom-functions/hover_element.py | 2 +- examples/custom-functions/notification.py | 2 +- examples/custom-functions/onepassword_2fa.py | 2 +- examples/custom-functions/perplexity_search.py | 2 +- examples/custom-functions/save_to_file_hugging_face.py | 2 +- examples/custom-functions/solve_amazon_captcha.py | 2 +- examples/features/click_fallback_options.py | 2 +- examples/features/cross_origin_iframes.py | 2 +- examples/features/custom_output.py | 2 +- examples/features/custom_system_prompt.py | 2 +- examples/features/custom_user_agent.py | 2 +- examples/features/follow_up_tasks.py | 2 +- examples/features/initial_actions.py | 2 +- examples/features/multi-tab_handling.py | 2 +- examples/features/outsource_state.py | 4 ++-- examples/features/parallel_agents.py | 2 +- examples/features/pause_agent.py | 2 +- examples/features/planner.py | 2 +- examples/features/restrict_urls.py | 2 +- examples/features/result_processing.py | 2 +- examples/features/save_trace.py | 2 +- examples/features/small_model_for_extraction.py | 4 ++-- examples/features/validate_output.py | 2 +- examples/integrations/gmail_2fa_integration.py | 2 +- examples/models/azure_openai.py | 2 +- examples/models/langchain/README.md | 2 +- examples/models/langchain/example.py | 2 +- examples/ui/command_line.py | 2 +- examples/ui/gradio_demo.py | 4 ++-- examples/ui/streamlit_demo.py | 2 +- examples/use-cases/captcha.py | 2 +- examples/use-cases/check_appointment.py | 2 +- examples/use-cases/find_and_apply_to_jobs.py | 2 +- examples/use-cases/find_influencer_profiles.py | 2 +- examples/use-cases/google_sheets.py | 2 +- examples/use-cases/online_coding_agent.py | 2 +- examples/use-cases/play_chess.py | 2 +- examples/use-cases/post-twitter.py | 2 +- examples/use-cases/scrolling_page.py | 2 +- examples/use-cases/shopping.py | 2 +- examples/use-cases/web_voyager_agent.py | 4 ++-- examples/use-cases/wikipedia_banana_to_quantum.py | 2 +- tests/ci/test_sync_agent_events.py | 2 +- tests/old/test_agent_actions.py | 4 ++-- tests/old/test_core_functionality.py | 2 +- tests/old/test_dropdown_error.py | 2 +- tests/old/test_gif_path.py | 2 +- tests/old/test_mind2web.py | 2 +- tests/old/test_react_dropdown.py | 2 +- tests/old/test_self_registered_actions.py | 2 +- tests/old/test_vision.py | 2 +- tests/old/test_wait_for_element.py | 2 +- 69 files changed, 81 insertions(+), 79 deletions(-) diff --git a/.cursor/rules/browser-use-rules.mdc b/.cursor/rules/browser-use-rules.mdc index c58b40ef3..235a51455 100644 --- a/.cursor/rules/browser-use-rules.mdc +++ b/.cursor/rules/browser-use-rules.mdc @@ -76,7 +76,7 @@ from browser_use import Agent from browser_use.llm import ChatOpenAI task = "Find the CEO of OpenAI and return their name" -model = ChatOpenAI(model="gpt-4o") +model = ChatOpenAI(model="gpt-4.1-mini") agent = Agent(task=task, llm=model, controller=controller) diff --git a/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml b/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml index cd38ab813..80fbe6035 100644 --- a/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml +++ b/.github/ISSUE_TEMPLATE/1_element_detection_bug.yml @@ -44,6 +44,8 @@ body: - gpt-4.1 - gpt-4.1-mini - gpt-4.1-nano + - o4-mini + - o3 - claude-3.7-sonnet - claude-3.5-sonnet - gemini-2.6-flash-preview @@ -114,7 +116,7 @@ body: agent = Agent( task='...', - llm=ChatOpenAI(model="gpt-4o"), + llm=ChatOpenAI(model="gpt-4.1"), browser_session=BrowserSession(headless=False), ) ... diff --git a/README.md b/README.md index ae8bb36a1..5aa03ae83 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ from browser_use.llm import ChatOpenAI async def main(): agent = Agent( task="Compare the price of gpt-4o and DeepSeek-V3", - llm=ChatOpenAI(model="gpt-4o"), + llm=ChatOpenAI(model="o4-mini", temperature=1.0), ) await agent.run() diff --git a/browser_use/cli.py b/browser_use/cli.py index 17d5199bc..25bc173c9 100644 --- a/browser_use/cli.py +++ b/browser_use/cli.py @@ -211,11 +211,11 @@ def get_llm(config: dict[str, Any]): # Auto-detect based on available API keys if CONFIG.OPENAI_API_KEY: - return ChatOpenAI(model='gpt-4o', temperature=temperature) + return ChatOpenAI(model='gpt-4.1', temperature=temperature) elif CONFIG.ANTHROPIC_API_KEY: - return ChatAnthropic(model='claude-3.5-sonnet-exp', temperature=temperature) + return ChatAnthropic(model='claude-3.5-sonnet', temperature=temperature) elif CONFIG.GOOGLE_API_KEY: - return ChatGoogle(model='gemini-2.0-flash-lite', temperature=temperature) + return ChatGoogle(model='gemini-2.5-flash', temperature=temperature) else: print( '⚠️ No API keys found. Please update your config or set one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY.' diff --git a/browser_use/llm/tests/test_single_step.py b/browser_use/llm/tests/test_single_step.py index 315afe9ee..b990804b6 100644 --- a/browser_use/llm/tests/test_single_step.py +++ b/browser_use/llm/tests/test_single_step.py @@ -95,9 +95,9 @@ def create_mock_state_message(temp_dir: str): [ (ChatGroq, 'meta-llama/llama-4-maverick-17b-128e-instruct'), (ChatGoogle, 'gemini-2.0-flash-exp'), - (ChatOpenAI, 'gpt-4o-mini'), + (ChatOpenAI, 'gpt-4.1-mini'), (ChatAnthropic, 'claude-3-5-sonnet-latest'), - (ChatAzureOpenAI, 'gpt-4o-mini'), + (ChatAzureOpenAI, 'gpt-4.1-mini'), ], ) async def test_single_step_parametrized(llm_class, model_name): diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 3682424af..26382eee3 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -50,7 +50,7 @@ load_dotenv() import asyncio -llm = ChatOpenAI(model="gpt-4o") +llm = ChatOpenAI(model="gpt-4.1") async def main(): agent = Agent( diff --git a/eval/service.py b/eval/service.py index 41e8d5a52..3d7163422 100644 --- a/eval/service.py +++ b/eval/service.py @@ -3039,7 +3039,7 @@ if __name__ == '__main__': '--model', type=str, default='gpt-4o', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for the agent' ) parser.add_argument( - '--eval-model', type=str, default='gpt-4o', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for evaluation' + '--eval-model', type=str, default='gpt-4.1', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for evaluation' ) parser.add_argument('--no-vision', action='store_true', help='Disable vision capabilities in the agent') diff --git a/examples/browser/multiple_agents_same_browser.py b/examples/browser/multiple_agents_same_browser.py index 1205fcfa5..354c292cf 100644 --- a/examples/browser/multiple_agents_same_browser.py +++ b/examples/browser/multiple_agents_same_browser.py @@ -26,7 +26,7 @@ async def main(): await browser_session.start() current_agent = None - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') task1 = 'find todays weather on San Francisco and extract it as json' task2 = 'find todays weather in Zurich and extract it as json' diff --git a/examples/browser/real_browser.py b/examples/browser/real_browser.py index d3ad39435..e232ebea2 100644 --- a/examples/browser/real_browser.py +++ b/examples/browser/real_browser.py @@ -24,7 +24,7 @@ browser_session = BrowserSession(browser_profile=browser_profile) async def main(): agent = Agent( task='Find todays DOW stock price', - llm=ChatOpenAI(model='gpt-4o'), + llm=ChatOpenAI(model='gpt-4.1'), browser_session=browser_session, ) diff --git a/examples/browser/stealth.py b/examples/browser/stealth.py index be2f0485c..73ac7366a 100644 --- a/examples/browser/stealth.py +++ b/examples/browser/stealth.py @@ -18,7 +18,7 @@ from browser_use.browser.profile import BrowserProfile from browser_use.browser.types import async_patchright from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') terminal_width, terminal_height = shutil.get_terminal_size((80, 20)) diff --git a/examples/custom-functions/2fa.py b/examples/custom-functions/2fa.py index f5d4e537d..7622fab70 100644 --- a/examples/custom-functions/2fa.py +++ b/examples/custom-functions/2fa.py @@ -56,7 +56,7 @@ async def main(): You are completely FORBIDDEN to use any other method to get the 2FA code. """ - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, controller=controller) result = await agent.run() diff --git a/examples/custom-functions/action_filters.py b/examples/custom-functions/action_filters.py index 619d6ed94..f7a7ef41b 100644 --- a/examples/custom-functions/action_filters.py +++ b/examples/custom-functions/action_filters.py @@ -68,7 +68,7 @@ async def main(): """Main function to run the example""" browser_session = BrowserSession() await browser_session.start() - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') # Create the agent agent = Agent( # disco mode will not be triggered on apple.com because the LLM won't be able to see that action available, it should work on Google.com though. diff --git a/examples/custom-functions/advanced_search.py b/examples/custom-functions/advanced_search.py index 491c368de..efeae01d1 100644 --- a/examples/custom-functions/advanced_search.py +++ b/examples/custom-functions/advanced_search.py @@ -93,7 +93,7 @@ names = [ async def main(): task = 'use search_web with "find email address of the following ETH professor:" for each of the following persons in a list of actions. Finally return the list with name and email if provided - do always 5 at once' task += '\n' + '\n'.join(names) - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') browser_profile = BrowserProfile() agent = Agent(task=task, llm=model, controller=controller, browser_profile=browser_profile) diff --git a/examples/custom-functions/clipboard.py b/examples/custom-functions/clipboard.py index 5f5d82cc1..33e1d615d 100644 --- a/examples/custom-functions/clipboard.py +++ b/examples/custom-functions/clipboard.py @@ -39,7 +39,7 @@ async def paste_from_clipboard(page: Page): async def main(): task = 'Copy the text "Hello, world!" to the clipboard, then go to google.com and paste the text' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') browser_session = BrowserSession(browser_profile=browser_profile) await browser_session.start() agent = Agent( diff --git a/examples/custom-functions/custom_hooks_before_after_step.py b/examples/custom-functions/custom_hooks_before_after_step.py index 8d4c7b873..bd40acbbf 100644 --- a/examples/custom-functions/custom_hooks_before_after_step.py +++ b/examples/custom-functions/custom_hooks_before_after_step.py @@ -220,8 +220,8 @@ async def record_activity(agent_obj): agent = Agent( - task='Compare the price of gpt-4o and DeepSeek-V3', - llm=ChatOpenAI(model='gpt-4o'), + task='Compare the price of gpt-4.1 and DeepSeek-V3', + llm=ChatOpenAI(model='gpt-4.1'), ) diff --git a/examples/custom-functions/drag_and_drop.py b/examples/custom-functions/drag_and_drop.py index 22763142a..a8a6a588b 100644 --- a/examples/custom-functions/drag_and_drop.py +++ b/examples/custom-functions/drag_and_drop.py @@ -268,7 +268,7 @@ async def example_drag_drop_sortable_list(): controller = await create_drag_drop_controller() # Initialize LLM (replace with your preferred model) - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') # Create the agent agent = Agent( @@ -288,7 +288,7 @@ async def example_drag_drop_coordinates(): """Example: Direct coordinate-based drag and drop.""" controller = await create_drag_drop_controller() - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') agent = Agent( task='Go to a canvas drawing website and draw a simple line using drag and drop from coordinates (100, 100) to (300, 200)', diff --git a/examples/custom-functions/extract_pdf_content.py b/examples/custom-functions/extract_pdf_content.py index 87048b707..0dc451f34 100755 --- a/examples/custom-functions/extract_pdf_content.py +++ b/examples/custom-functions/extract_pdf_content.py @@ -79,7 +79,7 @@ async def main(): URL: https://docs.house.gov/meetings/GO/GO00/20220929/115171/HHRG-117-GO00-20220929-SD010.pdf """, - llm=ChatOpenAI(model='gpt-4o'), + llm=ChatOpenAI(model='gpt-4.1'), controller=controller, ) result = await agent.run() diff --git a/examples/custom-functions/hover_element.py b/examples/custom-functions/hover_element.py index 4ff3826fb..d363ba058 100644 --- a/examples/custom-functions/hover_element.py +++ b/examples/custom-functions/hover_element.py @@ -74,7 +74,7 @@ async def hover_element(params: HoverAction, browser_session: BrowserSession): async def main(): task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the css selector #hoverdivpara, then click on "Can you click me?"' # task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the xpath //*[@id="hoverdivpara"], then click on "Can you click me?"' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') browser_session = BrowserSession(browser_profile=browser_profile) await browser_session.start() agent = Agent( diff --git a/examples/custom-functions/notification.py b/examples/custom-functions/notification.py index 0d9f780a4..325f30c17 100644 --- a/examples/custom-functions/notification.py +++ b/examples/custom-functions/notification.py @@ -34,7 +34,7 @@ async def done(text: str): async def main(): task = 'go to brower-use.com and then done' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, controller=controller) await agent.run() diff --git a/examples/custom-functions/onepassword_2fa.py b/examples/custom-functions/onepassword_2fa.py index c34fd19b0..5b73d88a6 100644 --- a/examples/custom-functions/onepassword_2fa.py +++ b/examples/custom-functions/onepassword_2fa.py @@ -46,7 +46,7 @@ async def main(): # Example task using the 1Password 2FA action task = 'Go to account.google.com, enter username and password, then if prompted for 2FA code, get 2FA code from 1Password for and enter it' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, controller=controller) result = await agent.run() diff --git a/examples/custom-functions/perplexity_search.py b/examples/custom-functions/perplexity_search.py index b306d5d2a..253d42733 100644 --- a/examples/custom-functions/perplexity_search.py +++ b/examples/custom-functions/perplexity_search.py @@ -71,7 +71,7 @@ names = [ async def main(): task = 'use search_web with "find email address of the following ETH professor:" for each of the persons. Finally return the list with name and email if provided ' task += '\n' + '\n'.join(names) - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') browser_profile = BrowserProfile() agent = Agent(task=task, llm=model, controller=controller, browser_profile=browser_profile) diff --git a/examples/custom-functions/save_to_file_hugging_face.py b/examples/custom-functions/save_to_file_hugging_face.py index 284018cc3..dbfd280f8 100644 --- a/examples/custom-functions/save_to_file_hugging_face.py +++ b/examples/custom-functions/save_to_file_hugging_face.py @@ -40,7 +40,7 @@ def save_models(params: Models): async def main(): task = 'Look up models with a license of cc-by-sa-4.0 and sort by most likes on Hugging face, save top 5 to file.' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, controller=controller) await agent.run() diff --git a/examples/custom-functions/solve_amazon_captcha.py b/examples/custom-functions/solve_amazon_captcha.py index f3edbf458..27ab02e0f 100644 --- a/examples/custom-functions/solve_amazon_captcha.py +++ b/examples/custom-functions/solve_amazon_captcha.py @@ -71,7 +71,7 @@ async def solve_amazon_captcha(browser_session: BrowserSession): async def main(): task = 'Go to https://www.amazon.com/errors/validateCaptcha and solve the captcha using the solve_amazon_captcha tool' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') browser_session = BrowserSession(browser_profile=browser_profile) await browser_session.start() agent = Agent(task=task, llm=model, controller=controller, browser_session=browser_session) diff --git a/examples/features/click_fallback_options.py b/examples/features/click_fallback_options.py index e5306185a..0f65f68bf 100644 --- a/examples/features/click_fallback_options.py +++ b/examples/features/click_fallback_options.py @@ -181,7 +181,7 @@ async def main(): select_task = 'Open http://localhost:8000/, choose the car BMW' button_task = 'Open http://localhost:8000/, click on the button' - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') # llm = ChatGoogleGenerativeAI( # model="gemini-2.0-flash-lite", # ) diff --git a/examples/features/cross_origin_iframes.py b/examples/features/cross_origin_iframes.py index 5a186e253..abdd9190c 100644 --- a/examples/features/cross_origin_iframes.py +++ b/examples/features/cross_origin_iframes.py @@ -32,7 +32,7 @@ controller = Controller() async def main(): agent = Agent( task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within', - llm=ChatOpenAI(model='gpt-4o', temperature=0.0), + llm=ChatOpenAI(model='gpt-4.1', temperature=0.0), controller=controller, browser_session=browser_session, ) diff --git a/examples/features/custom_output.py b/examples/features/custom_output.py index 165e36901..40319afd0 100644 --- a/examples/features/custom_output.py +++ b/examples/features/custom_output.py @@ -36,7 +36,7 @@ controller = Controller(output_model=Posts) async def main(): task = 'Go to hackernews show hn and give me the first 5 posts' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, controller=controller) history = await agent.run() diff --git a/examples/features/custom_system_prompt.py b/examples/features/custom_system_prompt.py index c0770bb72..f28d4690f 100644 --- a/examples/features/custom_system_prompt.py +++ b/examples/features/custom_system_prompt.py @@ -29,7 +29,7 @@ extend_system_message = ( async def main(): task = 'do google search to find images of Elon Musk' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, extend_system_message=extend_system_message) print( diff --git a/examples/features/custom_user_agent.py b/examples/features/custom_user_agent.py index 9e05abaa6..82790a5e3 100644 --- a/examples/features/custom_user_agent.py +++ b/examples/features/custom_user_agent.py @@ -19,7 +19,7 @@ def get_llm(provider: str): if provider == 'anthropic': return ChatAnthropic(model='claude-3-5-sonnet-20240620', temperature=0.0) elif provider == 'openai': - return ChatOpenAI(model='gpt-4o', temperature=0.0) + return ChatOpenAI(model='gpt-4.1', temperature=0.0) else: raise ValueError(f'Unsupported provider: {provider}') diff --git a/examples/features/follow_up_tasks.py b/examples/features/follow_up_tasks.py index 93f448207..750bcf3b7 100644 --- a/examples/features/follow_up_tasks.py +++ b/examples/features/follow_up_tasks.py @@ -14,7 +14,7 @@ from browser_use.llm import ChatOpenAI # Initialize the model llm = ChatOpenAI( - model='gpt-4o', + model='gpt-4.1', temperature=0.0, ) # Get your chrome path diff --git a/examples/features/initial_actions.py b/examples/features/initial_actions.py index 2138bcfc7..49cdab022 100644 --- a/examples/features/initial_actions.py +++ b/examples/features/initial_actions.py @@ -11,7 +11,7 @@ load_dotenv() from browser_use import Agent from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') initial_actions = [ {'go_to_url': {'url': 'https://www.google.com', 'new_tab': True}}, diff --git a/examples/features/multi-tab_handling.py b/examples/features/multi-tab_handling.py index fa4d38447..9a83d0be5 100644 --- a/examples/features/multi-tab_handling.py +++ b/examples/features/multi-tab_handling.py @@ -18,7 +18,7 @@ from browser_use import Agent from browser_use.llm import ChatOpenAI # video: https://preview.screen.studio/share/clenCmS6 -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') agent = Agent( task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop', llm=llm, diff --git a/examples/features/outsource_state.py b/examples/features/outsource_state.py index 821d84250..a030c8b91 100644 --- a/examples/features/outsource_state.py +++ b/examples/features/outsource_state.py @@ -35,10 +35,10 @@ async def main(): for i in range(10): agent = Agent( task=task, - llm=ChatOpenAI(model='gpt-4o'), + llm=ChatOpenAI(model='gpt-4.1'), browser_session=browser_session, injected_agent_state=agent_state, - page_extraction_llm=ChatOpenAI(model='gpt-4o-mini'), + page_extraction_llm=ChatOpenAI(model='gpt-4.1-mini'), ) done, valid = await agent.take_step() diff --git a/examples/features/parallel_agents.py b/examples/features/parallel_agents.py index 39e957fbc..97b417242 100644 --- a/examples/features/parallel_agents.py +++ b/examples/features/parallel_agents.py @@ -20,7 +20,7 @@ browser_session = BrowserSession( user_data_dir='~/.config/browseruse/profiles/default', ) ) -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') async def main(): diff --git a/examples/features/pause_agent.py b/examples/features/pause_agent.py index cced99efc..914c528a6 100644 --- a/examples/features/pause_agent.py +++ b/examples/features/pause_agent.py @@ -15,7 +15,7 @@ from browser_use.llm import ChatOpenAI class AgentController: def __init__(self): - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') self.agent = Agent( task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com', llm=llm, diff --git a/examples/features/planner.py b/examples/features/planner.py index e528a81a2..cf7ecb2d4 100644 --- a/examples/features/planner.py +++ b/examples/features/planner.py @@ -11,7 +11,7 @@ load_dotenv() from browser_use import Agent from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o', temperature=0.0) +llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) planner_llm = ChatOpenAI( model='o3-mini', ) diff --git a/examples/features/restrict_urls.py b/examples/features/restrict_urls.py index 6383793b8..9269cc38f 100644 --- a/examples/features/restrict_urls.py +++ b/examples/features/restrict_urls.py @@ -12,7 +12,7 @@ from browser_use import Agent from browser_use.browser import BrowserProfile, BrowserSession from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o', temperature=0.0) +llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) task = ( "go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?" ) diff --git a/examples/features/result_processing.py b/examples/features/result_processing.py index 6030c6083..d4d9aa3da 100644 --- a/examples/features/result_processing.py +++ b/examples/features/result_processing.py @@ -14,7 +14,7 @@ from browser_use.agent.views import AgentHistoryList from browser_use.browser import BrowserProfile, BrowserSession from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') async def main(): diff --git a/examples/features/save_trace.py b/examples/features/save_trace.py index 6e93fb2ee..3e9caa100 100644 --- a/examples/features/save_trace.py +++ b/examples/features/save_trace.py @@ -12,7 +12,7 @@ from browser_use.agent.service import Agent from browser_use.browser import BrowserProfile, BrowserSession from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o', temperature=0.0) +llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) async def main(): diff --git a/examples/features/small_model_for_extraction.py b/examples/features/small_model_for_extraction.py index f0c1335bd..73e60a92e 100644 --- a/examples/features/small_model_for_extraction.py +++ b/examples/features/small_model_for_extraction.py @@ -11,8 +11,8 @@ load_dotenv() from browser_use import Agent from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o', temperature=0.0) -small_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0) +llm = ChatOpenAI(model='gpt-4.1', temperature=0.0) +small_llm = ChatOpenAI(model='gpt-4.1-mini', temperature=0.0) task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one' agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm) diff --git a/examples/features/validate_output.py b/examples/features/validate_output.py index c3bd5b558..ee592db3c 100644 --- a/examples/features/validate_output.py +++ b/examples/features/validate_output.py @@ -39,7 +39,7 @@ async def done(params: DoneResult): async def main(): task = 'Go to hackernews hn and give me the top 1 post' - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, controller=controller, validate_output=True) # NOTE: this should fail to demonstrate the validator await agent.run(max_steps=5) diff --git a/examples/integrations/gmail_2fa_integration.py b/examples/integrations/gmail_2fa_integration.py index 35843c8f6..18e13e4d6 100644 --- a/examples/integrations/gmail_2fa_integration.py +++ b/examples/integrations/gmail_2fa_integration.py @@ -43,7 +43,7 @@ async def main(): print() # Initialize LLM - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') # Example 1: Basic Gmail authentication test print('📧 Testing Gmail authentication...') diff --git a/examples/models/azure_openai.py b/examples/models/azure_openai.py index 4619b830b..c5d923670 100644 --- a/examples/models/azure_openai.py +++ b/examples/models/azure_openai.py @@ -27,7 +27,7 @@ if not azure_openai_api_key or not azure_openai_endpoint: # Initialize the Azure OpenAI client llm = ChatAzureOpenAI( - model='gpt-4o', + model='gpt-4.1', api_key=azure_openai_api_key, azure_endpoint=azure_openai_endpoint, # Corrected to use azure_endpoint instead of openai_api_base ) diff --git a/examples/models/langchain/README.md b/examples/models/langchain/README.md index 52eb6880d..d05bacc22 100644 --- a/examples/models/langchain/README.md +++ b/examples/models/langchain/README.md @@ -15,7 +15,7 @@ async def main(): # Create a LangChain model (OpenAI) langchain_model = ChatOpenAI( - model='gpt-4o-mini', + model='gpt-4.1-mini', temperature=0.1, ) diff --git a/examples/models/langchain/example.py b/examples/models/langchain/example.py index 03aaf43b4..9d308f296 100644 --- a/examples/models/langchain/example.py +++ b/examples/models/langchain/example.py @@ -25,7 +25,7 @@ async def main(): # Create a LangChain model (OpenAI) langchain_model = ChatOpenAI( - model='gpt-4o-mini', + model='gpt-4.1-mini', temperature=0.1, ) diff --git a/examples/ui/command_line.py b/examples/ui/command_line.py index dfe5c1c46..cc2efa33c 100644 --- a/examples/ui/command_line.py +++ b/examples/ui/command_line.py @@ -45,7 +45,7 @@ def get_llm(provider: str): if not api_key: raise ValueError('Error: OPENAI_API_KEY is not set. Please provide a valid API key.') - return ChatOpenAI(model='gpt-4o', temperature=0.0) + return ChatOpenAI(model='gpt-4.1', temperature=0.0) else: raise ValueError(f'Unsupported provider: {provider}') diff --git a/examples/ui/gradio_demo.py b/examples/ui/gradio_demo.py index 3961f7678..53f65ba8c 100644 --- a/examples/ui/gradio_demo.py +++ b/examples/ui/gradio_demo.py @@ -59,7 +59,7 @@ def parse_agent_history(history_str: str) -> None: async def run_browser_task( task: str, api_key: str, - model: str = 'gpt-4o', + model: str = 'gpt-4.1', headless: bool = True, ) -> str: if not api_key.strip(): @@ -70,7 +70,7 @@ async def run_browser_task( try: agent = Agent( task=task, - llm=ChatOpenAI(model='gpt-4o'), + llm=ChatOpenAI(model='gpt-4.1'), ) result = await agent.run() # TODO: The result could be parsed better diff --git a/examples/ui/streamlit_demo.py b/examples/ui/streamlit_demo.py index 2171f22bb..4d85792b9 100644 --- a/examples/ui/streamlit_demo.py +++ b/examples/ui/streamlit_demo.py @@ -44,7 +44,7 @@ def get_llm(provider: str): st.error('Error: OPENAI_API_KEY is not set. Please provide a valid API key.') st.stop() - return ChatOpenAI(model='gpt-4o', temperature=0.0) + return ChatOpenAI(model='gpt-4.1', temperature=0.0) else: st.error(f'Unsupported provider: {provider}') st.stop() diff --git a/examples/use-cases/captcha.py b/examples/use-cases/captcha.py index 9c012cd97..c0104f8e6 100644 --- a/examples/use-cases/captcha.py +++ b/examples/use-cases/captcha.py @@ -26,7 +26,7 @@ if not os.getenv('OPENAI_API_KEY'): async def main(): - llm = ChatOpenAI(model='gpt-4o') + llm = ChatOpenAI(model='gpt-4.1') agent = Agent( task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha', llm=llm, diff --git a/examples/use-cases/check_appointment.py b/examples/use-cases/check_appointment.py index b4c695d24..f01592a8f 100644 --- a/examples/use-cases/check_appointment.py +++ b/examples/use-cases/check_appointment.py @@ -42,7 +42,7 @@ async def main(): 'If there is no available date in both months, tell me there is no available date.' ) - model = ChatOpenAI(model='gpt-4o-mini') + model = ChatOpenAI(model='gpt-4.1-mini') agent = Agent(task, model, controller=controller, use_vision=True) await agent.run() diff --git a/examples/use-cases/find_and_apply_to_jobs.py b/examples/use-cases/find_and_apply_to_jobs.py index 9eeb30c0d..06bc8ad59 100644 --- a/examples/use-cases/find_and_apply_to_jobs.py +++ b/examples/use-cases/find_and_apply_to_jobs.py @@ -137,7 +137,7 @@ async def main(): # ground_task + '\n' + 'Meta', ] model = ChatAzureOpenAI( - model='gpt-4o', + model='gpt-4.1', ) agents = [] diff --git a/examples/use-cases/find_influencer_profiles.py b/examples/use-cases/find_influencer_profiles.py index 862cb4589..9b2567e0f 100644 --- a/examples/use-cases/find_influencer_profiles.py +++ b/examples/use-cases/find_influencer_profiles.py @@ -68,7 +68,7 @@ async def main(): 'Go to this tiktok video url, open it and extract the @username from the resulting url. Then do a websearch for this username to find all his social media profiles. Return me the links to the social media profiles with the platform name.' ' https://www.tiktokv.com/share/video/7470981717659110678/ ' ) - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') agent = Agent(task=task, llm=model, controller=controller) history = await agent.run() diff --git a/examples/use-cases/google_sheets.py b/examples/use-cases/google_sheets.py index fec41880a..f0100da68 100644 --- a/examples/use-cases/google_sheets.py +++ b/examples/use-cases/google_sheets.py @@ -44,7 +44,7 @@ async def main(): ) async with browser_session: - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') # eraser = Agent( # task=""" diff --git a/examples/use-cases/online_coding_agent.py b/examples/use-cases/online_coding_agent.py index 59bd27eb1..d9f55e20d 100644 --- a/examples/use-cases/online_coding_agent.py +++ b/examples/use-cases/online_coding_agent.py @@ -20,7 +20,7 @@ if not os.getenv('OPENAI_API_KEY'): async def main(): browser_session = BrowserSession() - model = ChatOpenAI(model='gpt-4o') + model = ChatOpenAI(model='gpt-4.1') # Initialize browser agent agent1 = Agent( diff --git a/examples/use-cases/play_chess.py b/examples/use-cases/play_chess.py index 912932d21..2ebc464da 100755 --- a/examples/use-cases/play_chess.py +++ b/examples/use-cases/play_chess.py @@ -327,7 +327,7 @@ async def main(): 8. Repeat steps 4-7 until the game ends. If anything seems wrong, use 'Read Chess Board' again. 9. Announce the final result. """, - llm=ChatOpenAI(model='gpt-4o'), + llm=ChatOpenAI(model='gpt-4.1'), controller=controller, ) result = await agent.run() diff --git a/examples/use-cases/post-twitter.py b/examples/use-cases/post-twitter.py index 0645be42f..7ea7347d0 100644 --- a/examples/use-cases/post-twitter.py +++ b/examples/use-cases/post-twitter.py @@ -49,7 +49,7 @@ class TwitterConfig: message: str reply_url: str headless: bool = False - model: str = 'gpt-4o-mini' + model: str = 'gpt-4.1-mini' base_url: str = 'https://x.com/home' diff --git a/examples/use-cases/scrolling_page.py b/examples/use-cases/scrolling_page.py index 738ed475c..aaf754290 100644 --- a/examples/use-cases/scrolling_page.py +++ b/examples/use-cases/scrolling_page.py @@ -24,7 +24,7 @@ This script demonstrates how the agent can navigate to a webpage and scroll down If no amount is specified, the agent will scroll down by one page height. """ -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') browser_profile = BrowserProfile(headless=False) browser_session = BrowserSession(browser_profile=browser_profile) diff --git a/examples/use-cases/shopping.py b/examples/use-cases/shopping.py index 331ed82fc..fda19db13 100644 --- a/examples/use-cases/shopping.py +++ b/examples/use-cases/shopping.py @@ -113,7 +113,7 @@ browser_session = BrowserSession() agent = Agent( task=task, - llm=ChatOpenAI(model='gpt-4o'), + llm=ChatOpenAI(model='gpt-4.1'), browser_session=browser_session, ) diff --git a/examples/use-cases/web_voyager_agent.py b/examples/use-cases/web_voyager_agent.py index c8ab09fba..74d678460 100644 --- a/examples/use-cases/web_voyager_agent.py +++ b/examples/use-cases/web_voyager_agent.py @@ -19,11 +19,11 @@ from browser_use.llm import ChatAzureOpenAI, ChatOpenAI # Set LLM based on defined environment variables if os.getenv('OPENAI_API_KEY'): llm = ChatOpenAI( - model='gpt-4o', + model='gpt-4.1', ) elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'): llm = ChatAzureOpenAI( - model='gpt-4o', + model='gpt-4.1', ) else: raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.') diff --git a/examples/use-cases/wikipedia_banana_to_quantum.py b/examples/use-cases/wikipedia_banana_to_quantum.py index 03fbc2eaa..e300a229d 100644 --- a/examples/use-cases/wikipedia_banana_to_quantum.py +++ b/examples/use-cases/wikipedia_banana_to_quantum.py @@ -14,7 +14,7 @@ from browser_use.llm import ChatOpenAI # video https://preview.screen.studio/share/vuq91Ej8 llm = ChatOpenAI( - model='gpt-4o', + model='gpt-4.1', temperature=0.0, ) task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics' diff --git a/tests/ci/test_sync_agent_events.py b/tests/ci/test_sync_agent_events.py index 5ebee94bf..fb6500048 100644 --- a/tests/ci/test_sync_agent_events.py +++ b/tests/ci/test_sync_agent_events.py @@ -404,7 +404,7 @@ class TestEventValidation: user_id='0683fb03-c5da-79c9-8000-d3a39c47c650', agent_session_id='0683fb03-c5da-79c9-8000-d3a39c47c651', task='test', - llm_model='gpt-4o', + llm_model='gpt-4.1', done_output=None, user_feedback_type=None, user_comment=None, diff --git a/tests/old/test_agent_actions.py b/tests/old/test_agent_actions.py index 773a622de..9825133c3 100644 --- a/tests/old/test_agent_actions.py +++ b/tests/old/test_agent_actions.py @@ -13,9 +13,9 @@ def llm(): # return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None) return ChatAzureOpenAI( - model='gpt-4o', + model='gpt-4.1', ) - # return ChatOpenAI(model='gpt-4o-mini') + # return ChatOpenAI(model='gpt-4.1-mini') @pytest.fixture diff --git a/tests/old/test_core_functionality.py b/tests/old/test_core_functionality.py index c9f1851f0..73cd82d42 100644 --- a/tests/old/test_core_functionality.py +++ b/tests/old/test_core_functionality.py @@ -79,7 +79,7 @@ class TestCoreFunctionality: def llm(self): """Initialize language model for testing with minimal settings.""" return ChatOpenAI( - model='gpt-4o', + model='gpt-4.1', temperature=0.0, ) diff --git a/tests/old/test_dropdown_error.py b/tests/old/test_dropdown_error.py index dedb723dd..94bbe507d 100644 --- a/tests/old/test_dropdown_error.py +++ b/tests/old/test_dropdown_error.py @@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from browser_use import Agent, AgentHistoryList from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True)) agent = Agent( diff --git a/tests/old/test_gif_path.py b/tests/old/test_gif_path.py index 31bb0dd24..445ab44ba 100644 --- a/tests/old/test_gif_path.py +++ b/tests/old/test_gif_path.py @@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from browser_use import Agent, AgentHistoryList from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True)) diff --git a/tests/old/test_mind2web.py b/tests/old/test_mind2web.py index 9156818d4..f0f6c6ee0 100644 --- a/tests/old/test_mind2web.py +++ b/tests/old/test_mind2web.py @@ -50,7 +50,7 @@ def llm(): # return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None) return ChatAzureOpenAI( - model='gpt-4o', + model='gpt-4.1', ) diff --git a/tests/old/test_react_dropdown.py b/tests/old/test_react_dropdown.py index f15287f6b..f3709253f 100644 --- a/tests/old/test_react_dropdown.py +++ b/tests/old/test_react_dropdown.py @@ -15,7 +15,7 @@ import asyncio from browser_use import Agent, AgentHistoryList from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True)) diff --git a/tests/old/test_self_registered_actions.py b/tests/old/test_self_registered_actions.py index 87c467064..c24cfd45a 100644 --- a/tests/old/test_self_registered_actions.py +++ b/tests/old/test_self_registered_actions.py @@ -78,7 +78,7 @@ def llm(): # return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None) return ChatAzureOpenAI( - model='gpt-4o', + model='gpt-4.1', ) diff --git a/tests/old/test_vision.py b/tests/old/test_vision.py index 600e0af17..346b78301 100644 --- a/tests/old/test_vision.py +++ b/tests/old/test_vision.py @@ -16,7 +16,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from browser_use import Agent, AgentHistoryList, BrowserSession, Controller from browser_use.llm import ChatOpenAI -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') controller = Controller() # use this test to ask the model questions about the page like diff --git a/tests/old/test_wait_for_element.py b/tests/old/test_wait_for_element.py index 2e8d55e26..f573e81d8 100644 --- a/tests/old/test_wait_for_element.py +++ b/tests/old/test_wait_for_element.py @@ -21,7 +21,7 @@ from browser_use.browser import BrowserProfile, BrowserSession load_dotenv() # Initialize language model and controller. -llm = ChatOpenAI(model='gpt-4o') +llm = ChatOpenAI(model='gpt-4.1') controller = Controller()