Merge branch 'main' into hot-fix/fix-highlight-interative-elements-in-shadow-dom

This commit is contained in:
Mert Unsal
2025-07-05 10:46:51 +02:00
committed by GitHub
78 changed files with 564 additions and 126 deletions

View File

@@ -76,7 +76,7 @@ from browser_use import Agent
from browser_use.llm import ChatOpenAI
task = "Find the CEO of OpenAI and return their name"
model = ChatOpenAI(model="gpt-4o")
model = ChatOpenAI(model="gpt-4.1-mini")
agent = Agent(task=task, llm=model, controller=controller)

View File

@@ -44,6 +44,8 @@ body:
- gpt-4.1
- gpt-4.1-mini
- gpt-4.1-nano
- o4-mini
- o3
- claude-3.7-sonnet
- claude-3.5-sonnet
- gemini-2.6-flash-preview
@@ -114,7 +116,7 @@ body:
agent = Agent(
task='...',
llm=ChatOpenAI(model="gpt-4o"),
llm=ChatOpenAI(model="gpt-4.1"),
browser_session=BrowserSession(headless=False),
)
...

View File

@@ -46,7 +46,7 @@ from browser_use.llm import ChatOpenAI
async def main():
agent = Agent(
task="Compare the price of gpt-4o and DeepSeek-V3",
llm=ChatOpenAI(model="gpt-4o"),
llm=ChatOpenAI(model="o4-mini", temperature=1.0),
)
await agent.run()

View File

@@ -91,6 +91,7 @@ Strictly follow these rules while using the browser and navigating the web:
<file_system>
- You have access to a persistent file system which you can use to track progress, store results, and manage long tasks.
- Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Update it to mark completed items and track what remains. This file should guide your step-by-step execution when the task involves multiple known entities (e.g., a list of links or items to visit). ALWAYS use `write_file` to rewrite entire `todo.md` when you want to update your progress. NEVER use `append_file` on `todo.md` as this can explode your context.
- If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas.
- Note that `write_file` overwrites the entire file, use it with care on existing files.
- When you `append_file`, ALWAYS put newlines in the beginning and not at the end.
- If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary.

View File

@@ -91,6 +91,7 @@ Strictly follow these rules while using the browser and navigating the web:
<file_system>
- You have access to a persistent file system which you can use to track progress, store results, and manage long tasks.
- Your file system is initialized with a `todo.md`: Use this to keep a checklist for known subtasks. Update it to mark completed items and track what remains. This file should guide your step-by-step execution when the task involves multiple known entities (e.g., a list of links or items to visit). ALWAYS use `write_file` to rewrite entire `todo.md` when you want to update your progress. NEVER use `append_file` on `todo.md` as this can explode your context.
- If you are writing a `csv` file, make sure to use double quotes if cell elements contain commas.
- Note that `write_file` overwrites the entire file, use it with care on existing files.
- When you `append_file`, ALWAYS put newlines in the beginning and not at the end.
- If the file is too large, you are only given a preview of your file. Use `read_file` to see the full content if necessary.

View File

@@ -211,11 +211,11 @@ def get_llm(config: dict[str, Any]):
# Auto-detect based on available API keys
if CONFIG.OPENAI_API_KEY:
return ChatOpenAI(model='gpt-4o', temperature=temperature)
return ChatOpenAI(model='gpt-4.1', temperature=temperature)
elif CONFIG.ANTHROPIC_API_KEY:
return ChatAnthropic(model='claude-3.5-sonnet-exp', temperature=temperature)
return ChatAnthropic(model='claude-3.5-sonnet', temperature=temperature)
elif CONFIG.GOOGLE_API_KEY:
return ChatGoogle(model='gemini-2.0-flash-lite', temperature=temperature)
return ChatGoogle(model='gemini-2.5-flash', temperature=temperature)
else:
print(
'⚠️ No API keys found. Please update your config or set one of: OPENAI_API_KEY, ANTHROPIC_API_KEY, or GOOGLE_API_KEY.'

View File

@@ -566,7 +566,7 @@ Explain the content of the page and that the requested information is not availa
return ActionResult(error=msg, include_in_memory=True)
# File System Actions
@self.registry.action('Write content to file_name in file system, use only .md or .txt extensions.')
@self.registry.action('Write content to file_name in file system. Allowed extensions are .md, .txt, .json, .csv.')
async def write_file(file_name: str, content: str, file_system: FileSystem):
result = await file_system.write_file(file_name, content)
logger.info(f'💾 {result}')
@@ -581,13 +581,9 @@ Explain the content of the page and that the requested information is not availa
@self.registry.action('Read file_name from file system')
async def read_file(file_name: str, available_file_paths: list[str], file_system: FileSystem):
if available_file_paths and file_name in available_file_paths:
import anyio
async with await anyio.open_file(file_name, 'r') as f:
content = await f.read()
result = f'Read from file {file_name}.\n<content>\n{content}\n</content>'
result = await file_system.read_file(file_name, external_file=True)
else:
result = file_system.read_file(file_name)
result = await file_system.read_file(file_name)
MAX_MEMORY_SIZE = 1000
if len(result) > MAX_MEMORY_SIZE:

View File

@@ -94,6 +94,22 @@ class TxtFile(BaseFile):
return 'txt'
class JsonFile(BaseFile):
"""JSON file implementation"""
@property
def extension(self) -> str:
return 'json'
class CsvFile(BaseFile):
"""CSV file implementation"""
@property
def extension(self) -> str:
return 'csv'
class FileSystemState(BaseModel):
"""Serializable state of the file system"""
@@ -120,6 +136,8 @@ class FileSystem:
self._file_types: dict[str, type[BaseFile]] = {
'md': MarkdownFile,
'txt': TxtFile,
'json': JsonFile,
'csv': CsvFile,
}
self.files = {}
@@ -188,8 +206,41 @@ class FileSystem:
return file_obj.read()
def read_file(self, full_filename: str) -> str:
async def read_file(self, full_filename: str, external_file: bool = False) -> str:
"""Read file content using file-specific read method and return appropriate message to LLM"""
if external_file:
try:
try:
_, extension = self._parse_filename(full_filename)
except Exception:
return f'Error: Invalid filename format {full_filename}. Must be alphanumeric with a supported extension.'
if extension in ['md', 'txt', 'json', 'csv']:
import anyio
async with await anyio.open_file(full_filename, 'r') as f:
content = await f.read()
return f'Read from file {full_filename}.\n<content>\n{content}\n</content>'
elif extension == 'pdf':
import pypdf
reader = pypdf.PdfReader(full_filename)
num_pages = len(reader.pages)
MAX_PDF_PAGES = 5
extra_pages = num_pages - MAX_PDF_PAGES
extracted_text = ''
for page in reader.pages[:MAX_PDF_PAGES]:
extracted_text += page.extract_text()
extra_pages_text = f'{extra_pages} more pages...' if extra_pages > 0 else ''
return f'Read from file {full_filename}.\n<content>\n{extracted_text}\n{extra_pages_text}</content>'
else:
return f'Error: Cannot read file {full_filename} as {extension} extension is not supported.'
except FileNotFoundError:
return f"Error: File '{full_filename}' not found."
except PermissionError:
return f"Error: Permission denied to read file '{full_filename}'."
except Exception as e:
return f"Error: Could not read file '{full_filename}'."
if not self._is_valid_filename(full_filename):
return INVALID_FILENAME_ERROR_MESSAGE
@@ -367,6 +418,10 @@ class FileSystem:
file_obj = MarkdownFile(**file_info)
elif file_type == 'TxtFile':
file_obj = TxtFile(**file_info)
elif file_type == 'JsonFile':
file_obj = JsonFile(**file_info)
elif file_type == 'CsvFile':
file_obj = CsvFile(**file_info)
else:
# Skip unknown file types
continue

View File

@@ -22,15 +22,13 @@ _gmail_service: GmailService | None = None
class GetRecentEmailsParams(BaseModel):
"""Parameters for getting recent emails"""
query: str = Field(
default='', description='Gmail search query (e.g., "from:noreply@example.com") - optional additional filter'
)
max_results: int = Field(default=10, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 10)')
keyword: str = Field(default='', description='A single keyword for search, e.g. github, airbnb, etc.')
max_results: int = Field(default=3, ge=1, le=50, description='Maximum number of emails to retrieve (1-50, default: 3)')
def register_gmail_actions(
controller: Controller, gmail_service: GmailService | None = None, access_token: str | None = None
) -> None:
) -> Controller:
"""
Register Gmail actions with the provided controller
Args:
@@ -48,11 +46,8 @@ def register_gmail_actions(
else:
_gmail_service = GmailService()
@controller.action(
description='📧 **Get recent emails** - to fetch recent emails from the past 5 minutes with full content. '
'Perfect for retrieving verification codes, OTP, 2FA tokens, or any recent email content. '
'This action accesses your Gmail inbox to read email messages and extract verification codes. '
'Returns complete email content so you can extract verification codes or analyze email details yourself.',
@controller.registry.action(
description='Get recent emails from the mailbox with a keyword to retrieve verification codes, OTP, 2FA tokens, magic links, or any recent email content. Keep your query a single keyword.',
param_model=GetRecentEmailsParams,
)
async def get_recent_emails(params: GetRecentEmailsParams) -> ActionResult:
@@ -77,8 +72,8 @@ def register_gmail_actions(
# Build query with time filter and optional user query
query_parts = [f'newer_than:{time_filter}']
if params.query.strip():
query_parts.append(params.query.strip())
if params.keyword.strip():
query_parts.append(params.keyword.strip())
query = ' '.join(query_parts)
logger.info(f'🔍 Gmail search query: {query}')
@@ -87,10 +82,11 @@ def register_gmail_actions(
emails = await _gmail_service.get_recent_emails(max_results=max_results, query=query, time_filter=time_filter)
if not emails:
query_info = f" matching '{params.query}'" if params.query.strip() else ''
query_info = f" matching '{params.keyword}'" if params.keyword.strip() else ''
memory = f'No recent emails found from last {time_filter}{query_info}'
return ActionResult(
extracted_content=f'No emails found from the last {time_filter}{query_info}',
long_term_memory=f'No recent emails found from last {time_filter}',
extracted_content=memory,
long_term_memory=memory,
)
# Format with full email content for large display
@@ -108,7 +104,7 @@ def register_gmail_actions(
return ActionResult(
extracted_content=content,
include_extracted_content_only_once=True,
long_term_memory=f'Retrieved {len(emails)} recent emails from last {time_filter}',
long_term_memory=f'Retrieved {len(emails)} recent emails from last {time_filter} for query {query}.',
)
except Exception as e:
@@ -117,3 +113,5 @@ def register_gmail_actions(
error=f'Error getting recent emails: {str(e)}',
long_term_memory='Failed to get recent emails due to error',
)
return controller

View File

@@ -95,9 +95,9 @@ def create_mock_state_message(temp_dir: str):
[
(ChatGroq, 'meta-llama/llama-4-maverick-17b-128e-instruct'),
(ChatGoogle, 'gemini-2.0-flash-exp'),
(ChatOpenAI, 'gpt-4o-mini'),
(ChatOpenAI, 'gpt-4.1-mini'),
(ChatAnthropic, 'claude-3-5-sonnet-latest'),
(ChatAzureOpenAI, 'gpt-4o-mini'),
(ChatAzureOpenAI, 'gpt-4.1-mini'),
],
)
async def test_single_step_parametrized(llm_class, model_name):

View File

@@ -50,7 +50,7 @@ load_dotenv()
import asyncio
llm = ChatOpenAI(model="gpt-4o")
llm = ChatOpenAI(model="gpt-4.1")
async def main():
agent = Agent(

View File

@@ -949,8 +949,8 @@ def create_controller(
else:
controller = Controller(output_model=output_model)
# Add Gmail 2FA support if tokens dict is available and task contains email
if gmail_tokens_dict and task:
# Add Gmail 2FA support if tokens dict is available and task has login_type OTP
if gmail_tokens_dict and task and hasattr(task, 'login_type') and task.login_type == 'OTP':
try:
# Extract username from task - check multiple possible sources
username = None
@@ -979,17 +979,23 @@ def create_controller(
from browser_use.integrations.gmail import register_gmail_actions
# Register Gmail actions using the access token
register_gmail_actions(controller, access_token=access_token)
logger.info(f'Gmail 2FA integration registered successfully for user {user_id}')
controller = register_gmail_actions(controller, access_token=access_token)
logger.info(f'Gmail 2FA integration registered successfully for user {user_id} (OTP task)')
else:
logger.info(f'No Gmail 2FA token found for user {user_id}, running without Gmail integration')
else:
logger.info('No email found in task, running without Gmail integration')
logger.info('No email found in OTP task, running without Gmail integration')
except Exception as e:
logger.error(f'Failed to setup Gmail integration: {e}')
else:
logger.info(f'No Gmail 2FA tokens provided, running without Gmail integration: {gmail_tokens_dict}, {task}')
if gmail_tokens_dict and task:
if not hasattr(task, 'login_type') or task.login_type != 'OTP':
logger.info(f'Task login_type is "{getattr(task, "login_type", "None")}", not OTP - skipping Gmail integration')
else:
logger.info('Gmail 2FA tokens provided but no task or task missing login_type')
else:
logger.info('No Gmail 2FA tokens provided or no task, running without Gmail integration')
return controller
@@ -3039,7 +3045,7 @@ if __name__ == '__main__':
'--model', type=str, default='gpt-4o', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for the agent'
)
parser.add_argument(
'--eval-model', type=str, default='gpt-4o', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for evaluation'
'--eval-model', type=str, default='gpt-4.1', choices=list(SUPPORTED_MODELS.keys()), help='Model to use for evaluation'
)
parser.add_argument('--no-vision', action='store_true', help='Disable vision capabilities in the agent')

View File

@@ -26,7 +26,7 @@ async def main():
await browser_session.start()
current_agent = None
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
task1 = 'find todays weather on San Francisco and extract it as json'
task2 = 'find todays weather in Zurich and extract it as json'

View File

@@ -24,7 +24,7 @@ browser_session = BrowserSession(browser_profile=browser_profile)
async def main():
agent = Agent(
task='Find todays DOW stock price',
llm=ChatOpenAI(model='gpt-4o'),
llm=ChatOpenAI(model='gpt-4.1'),
browser_session=browser_session,
)

View File

@@ -18,7 +18,7 @@ from browser_use.browser.profile import BrowserProfile
from browser_use.browser.types import async_patchright
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
terminal_width, terminal_height = shutil.get_terminal_size((80, 20))

View File

@@ -56,7 +56,7 @@ async def main():
You are completely FORBIDDEN to use any other method to get the 2FA code.
"""
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, controller=controller)
result = await agent.run()

View File

@@ -68,7 +68,7 @@ async def main():
"""Main function to run the example"""
browser_session = BrowserSession()
await browser_session.start()
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
# Create the agent
agent = Agent( # disco mode will not be triggered on apple.com because the LLM won't be able to see that action available, it should work on Google.com though.

View File

@@ -93,7 +93,7 @@ names = [
async def main():
task = 'use search_web with "find email address of the following ETH professor:" for each of the following persons in a list of actions. Finally return the list with name and email if provided - do always 5 at once'
task += '\n' + '\n'.join(names)
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
browser_profile = BrowserProfile()
agent = Agent(task=task, llm=model, controller=controller, browser_profile=browser_profile)

View File

@@ -39,7 +39,7 @@ async def paste_from_clipboard(page: Page):
async def main():
task = 'Copy the text "Hello, world!" to the clipboard, then go to google.com and paste the text'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
browser_session = BrowserSession(browser_profile=browser_profile)
await browser_session.start()
agent = Agent(

View File

@@ -220,8 +220,8 @@ async def record_activity(agent_obj):
agent = Agent(
task='Compare the price of gpt-4o and DeepSeek-V3',
llm=ChatOpenAI(model='gpt-4o'),
task='Compare the price of gpt-4.1 and DeepSeek-V3',
llm=ChatOpenAI(model='gpt-4.1'),
)

View File

@@ -268,7 +268,7 @@ async def example_drag_drop_sortable_list():
controller = await create_drag_drop_controller()
# Initialize LLM (replace with your preferred model)
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
# Create the agent
agent = Agent(
@@ -288,7 +288,7 @@ async def example_drag_drop_coordinates():
"""Example: Direct coordinate-based drag and drop."""
controller = await create_drag_drop_controller()
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
agent = Agent(
task='Go to a canvas drawing website and draw a simple line using drag and drop from coordinates (100, 100) to (300, 200)',

View File

@@ -79,7 +79,7 @@ async def main():
URL: https://docs.house.gov/meetings/GO/GO00/20220929/115171/HHRG-117-GO00-20220929-SD010.pdf
""",
llm=ChatOpenAI(model='gpt-4o'),
llm=ChatOpenAI(model='gpt-4.1'),
controller=controller,
)
result = await agent.run()

View File

@@ -74,7 +74,7 @@ async def hover_element(params: HoverAction, browser_session: BrowserSession):
async def main():
task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the css selector #hoverdivpara, then click on "Can you click me?"'
# task = 'Open https://testpages.eviltester.com/styled/csspseudo/css-hover.html and hover the element with the xpath //*[@id="hoverdivpara"], then click on "Can you click me?"'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
browser_session = BrowserSession(browser_profile=browser_profile)
await browser_session.start()
agent = Agent(

View File

@@ -34,7 +34,7 @@ async def done(text: str):
async def main():
task = 'go to brower-use.com and then done'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, controller=controller)
await agent.run()

View File

@@ -46,7 +46,7 @@ async def main():
# Example task using the 1Password 2FA action
task = 'Go to account.google.com, enter username and password, then if prompted for 2FA code, get 2FA code from 1Password for and enter it'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, controller=controller)
result = await agent.run()

View File

@@ -71,7 +71,7 @@ names = [
async def main():
task = 'use search_web with "find email address of the following ETH professor:" for each of the persons. Finally return the list with name and email if provided '
task += '\n' + '\n'.join(names)
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
browser_profile = BrowserProfile()
agent = Agent(task=task, llm=model, controller=controller, browser_profile=browser_profile)

View File

@@ -40,7 +40,7 @@ def save_models(params: Models):
async def main():
task = 'Look up models with a license of cc-by-sa-4.0 and sort by most likes on Hugging face, save top 5 to file.'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, controller=controller)
await agent.run()

View File

@@ -71,7 +71,7 @@ async def solve_amazon_captcha(browser_session: BrowserSession):
async def main():
task = 'Go to https://www.amazon.com/errors/validateCaptcha and solve the captcha using the solve_amazon_captcha tool'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
browser_session = BrowserSession(browser_profile=browser_profile)
await browser_session.start()
agent = Agent(task=task, llm=model, controller=controller, browser_session=browser_session)

View File

@@ -181,7 +181,7 @@ async def main():
select_task = 'Open http://localhost:8000/, choose the car BMW'
button_task = 'Open http://localhost:8000/, click on the button'
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
# llm = ChatGoogleGenerativeAI(
# model="gemini-2.0-flash-lite",
# )

View File

@@ -32,7 +32,7 @@ controller = Controller()
async def main():
agent = Agent(
task='Click "Go cross-site (simple page)" button on https://csreis.github.io/tests/cross-site-iframe.html then tell me the text within',
llm=ChatOpenAI(model='gpt-4o', temperature=0.0),
llm=ChatOpenAI(model='gpt-4.1', temperature=0.0),
controller=controller,
browser_session=browser_session,
)

View File

@@ -36,7 +36,7 @@ controller = Controller(output_model=Posts)
async def main():
task = 'Go to hackernews show hn and give me the first 5 posts'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, controller=controller)
history = await agent.run()

View File

@@ -29,7 +29,7 @@ extend_system_message = (
async def main():
task = 'do google search to find images of Elon Musk'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, extend_system_message=extend_system_message)
print(

View File

@@ -19,7 +19,7 @@ def get_llm(provider: str):
if provider == 'anthropic':
return ChatAnthropic(model='claude-3-5-sonnet-20240620', temperature=0.0)
elif provider == 'openai':
return ChatOpenAI(model='gpt-4o', temperature=0.0)
return ChatOpenAI(model='gpt-4.1', temperature=0.0)
else:
raise ValueError(f'Unsupported provider: {provider}')

View File

@@ -14,7 +14,7 @@ from browser_use.llm import ChatOpenAI
# Initialize the model
llm = ChatOpenAI(
model='gpt-4o',
model='gpt-4.1',
temperature=0.0,
)
# Get your chrome path

View File

@@ -11,7 +11,7 @@ load_dotenv()
from browser_use import Agent
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
initial_actions = [
{'go_to_url': {'url': 'https://www.google.com', 'new_tab': True}},

View File

@@ -18,7 +18,7 @@ from browser_use import Agent
from browser_use.llm import ChatOpenAI
# video: https://preview.screen.studio/share/clenCmS6
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
agent = Agent(
task='open 3 tabs with elon musk, trump, and steve jobs, then go back to the first and stop',
llm=llm,

View File

@@ -35,10 +35,10 @@ async def main():
for i in range(10):
agent = Agent(
task=task,
llm=ChatOpenAI(model='gpt-4o'),
llm=ChatOpenAI(model='gpt-4.1'),
browser_session=browser_session,
injected_agent_state=agent_state,
page_extraction_llm=ChatOpenAI(model='gpt-4o-mini'),
page_extraction_llm=ChatOpenAI(model='gpt-4.1-mini'),
)
done, valid = await agent.take_step()

View File

@@ -20,7 +20,7 @@ browser_session = BrowserSession(
user_data_dir='~/.config/browseruse/profiles/default',
)
)
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
async def main():

View File

@@ -15,7 +15,7 @@ from browser_use.llm import ChatOpenAI
class AgentController:
def __init__(self):
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
self.agent = Agent(
task='open in one action https://www.google.com, https://www.wikipedia.org, https://www.youtube.com, https://www.github.com, https://amazon.com',
llm=llm,

View File

@@ -11,7 +11,7 @@ load_dotenv()
from browser_use import Agent
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
planner_llm = ChatOpenAI(
model='o3-mini',
)

View File

@@ -12,7 +12,7 @@ from browser_use import Agent
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
task = (
"go to google.com and search for openai.com and click on the first link then extract content and scroll down - what's there?"
)

View File

@@ -14,7 +14,7 @@ from browser_use.agent.views import AgentHistoryList
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
async def main():

View File

@@ -12,7 +12,7 @@ from browser_use.agent.service import Agent
from browser_use.browser import BrowserProfile, BrowserSession
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
async def main():

View File

@@ -11,8 +11,8 @@ load_dotenv()
from browser_use import Agent
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o', temperature=0.0)
small_llm = ChatOpenAI(model='gpt-4o-mini', temperature=0.0)
llm = ChatOpenAI(model='gpt-4.1', temperature=0.0)
small_llm = ChatOpenAI(model='gpt-4.1-mini', temperature=0.0)
task = 'Find the founders of browser-use in ycombinator, extract all links and open the links one by one'
agent = Agent(task=task, llm=llm, page_extraction_llm=small_llm)

View File

@@ -39,7 +39,7 @@ async def done(params: DoneResult):
async def main():
task = 'Go to hackernews hn and give me the top 1 post'
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, controller=controller, validate_output=True)
# NOTE: this should fail to demonstrate the validator
await agent.run(max_steps=5)

View File

@@ -0,0 +1,47 @@
import asyncio
import os
import sys
from browser_use.llm.openai.chat import ChatOpenAI
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv()
from lmnr import Laminar
try:
Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
except Exception:
pass
from browser_use import Agent
# Initialize the model
llm = ChatOpenAI(
model='o4-mini',
temperature=1.0,
)
task = (
'Find current stock price of companies Meta and Amazon. Then, make me a CSV file with 2 columns: company name, stock price.'
)
agent = Agent(task=task, llm=llm)
async def main():
import time
start_time = time.time()
history = await agent.run()
# token usage
print(history.usage)
end_time = time.time()
print(f'Time taken: {end_time - start_time} seconds')
if __name__ == '__main__':
asyncio.run(main())

View File

@@ -43,7 +43,7 @@ async def main():
print()
# Initialize LLM
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
# Example 1: Basic Gmail authentication test
print('📧 Testing Gmail authentication...')

View File

@@ -27,7 +27,7 @@ if not azure_openai_api_key or not azure_openai_endpoint:
# Initialize the Azure OpenAI client
llm = ChatAzureOpenAI(
model='gpt-4o',
model='gpt-4.1',
api_key=azure_openai_api_key,
azure_endpoint=azure_openai_endpoint, # Corrected to use azure_endpoint instead of openai_api_base
)

View File

@@ -15,7 +15,7 @@ async def main():
# Create a LangChain model (OpenAI)
langchain_model = ChatOpenAI(
model='gpt-4o-mini',
model='gpt-4.1-mini',
temperature=0.1,
)

View File

@@ -25,7 +25,7 @@ async def main():
# Create a LangChain model (OpenAI)
langchain_model = ChatOpenAI(
model='gpt-4o-mini',
model='gpt-4.1-mini',
temperature=0.1,
)

View File

@@ -9,37 +9,31 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from dotenv import load_dotenv
load_dotenv()
from lmnr import Laminar
try:
from lmnr import Laminar
Laminar.initialize(project_api_key=os.getenv('LMNR_PROJECT_API_KEY'))
except Exception:
print('Error initializing Laminar')
pass
from browser_use import Agent
# Initialize the model
llm = ChatOpenAI(
model='gpt-4o',
model='gpt-4.1-mini',
)
task = 'Go to google.com/travel/flights and search for flights to Tokyo next week'
task = """http://www.sadfdsafdssdafd.com/ go here and scroll around"""
task = 'Go to Louis Vuittons website, find every product and save the product details 1 by 1. Extract product details as JSON: productname (Full name as shown on the webpage), brand (Manufacturer or designer name), model (Specific version or edition), gender (Target audience: Men, Women, Unisex), sku (Unique identifier), releasedate (Launch date in YYYY-MM-DD format), retailprice (Price as a number, no currency symbols), colorway (Color description without spaces around slashes, e.g., White/PinkFoam), sizerange (Available sizes as a list, maintain decimals for half sizes, e.g., 7.5), requesturl (URL where product data is scraped), requesttimestamp (ISO 8601 timestamp of the request), primaryimgurl (URL of the main product image); ensure required fields are present, return null if data is missing.'
agent = Agent(task=task, llm=llm)
async def main():
import time
start_time = time.time()
history = await agent.run()
# token usage
print(history.usage)
end_time = time.time()
print(f'Time taken: {end_time - start_time} seconds')
if __name__ == '__main__':

View File

@@ -45,7 +45,7 @@ def get_llm(provider: str):
if not api_key:
raise ValueError('Error: OPENAI_API_KEY is not set. Please provide a valid API key.')
return ChatOpenAI(model='gpt-4o', temperature=0.0)
return ChatOpenAI(model='gpt-4.1', temperature=0.0)
else:
raise ValueError(f'Unsupported provider: {provider}')

View File

@@ -59,7 +59,7 @@ def parse_agent_history(history_str: str) -> None:
async def run_browser_task(
task: str,
api_key: str,
model: str = 'gpt-4o',
model: str = 'gpt-4.1',
headless: bool = True,
) -> str:
if not api_key.strip():
@@ -70,7 +70,7 @@ async def run_browser_task(
try:
agent = Agent(
task=task,
llm=ChatOpenAI(model='gpt-4o'),
llm=ChatOpenAI(model='gpt-4.1'),
)
result = await agent.run()
# TODO: The result could be parsed better

View File

@@ -44,7 +44,7 @@ def get_llm(provider: str):
st.error('Error: OPENAI_API_KEY is not set. Please provide a valid API key.')
st.stop()
return ChatOpenAI(model='gpt-4o', temperature=0.0)
return ChatOpenAI(model='gpt-4.1', temperature=0.0)
else:
st.error(f'Unsupported provider: {provider}')
st.stop()

View File

@@ -26,7 +26,7 @@ if not os.getenv('OPENAI_API_KEY'):
async def main():
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
agent = Agent(
task='go to https://captcha.com/demos/features/captcha-demo.aspx and solve the captcha',
llm=llm,

View File

@@ -42,7 +42,7 @@ async def main():
'If there is no available date in both months, tell me there is no available date.'
)
model = ChatOpenAI(model='gpt-4o-mini')
model = ChatOpenAI(model='gpt-4.1-mini')
agent = Agent(task, model, controller=controller, use_vision=True)
await agent.run()

View File

@@ -137,7 +137,7 @@ async def main():
# ground_task + '\n' + 'Meta',
]
model = ChatAzureOpenAI(
model='gpt-4o',
model='gpt-4.1',
)
agents = []

View File

@@ -68,7 +68,7 @@ async def main():
'Go to this tiktok video url, open it and extract the @username from the resulting url. Then do a websearch for this username to find all his social media profiles. Return me the links to the social media profiles with the platform name.'
' https://www.tiktokv.com/share/video/7470981717659110678/ '
)
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
agent = Agent(task=task, llm=model, controller=controller)
history = await agent.run()

View File

@@ -44,7 +44,7 @@ async def main():
)
async with browser_session:
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
# eraser = Agent(
# task="""

View File

@@ -20,7 +20,7 @@ if not os.getenv('OPENAI_API_KEY'):
async def main():
browser_session = BrowserSession()
model = ChatOpenAI(model='gpt-4o')
model = ChatOpenAI(model='gpt-4.1')
# Initialize browser agent
agent1 = Agent(

View File

@@ -327,7 +327,7 @@ async def main():
8. Repeat steps 4-7 until the game ends. If anything seems wrong, use 'Read Chess Board' again.
9. Announce the final result.
""",
llm=ChatOpenAI(model='gpt-4o'),
llm=ChatOpenAI(model='gpt-4.1'),
controller=controller,
)
result = await agent.run()

View File

@@ -49,7 +49,7 @@ class TwitterConfig:
message: str
reply_url: str
headless: bool = False
model: str = 'gpt-4o-mini'
model: str = 'gpt-4.1-mini'
base_url: str = 'https://x.com/home'

View File

@@ -24,7 +24,7 @@ This script demonstrates how the agent can navigate to a webpage and scroll down
If no amount is specified, the agent will scroll down by one page height.
"""
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
browser_profile = BrowserProfile(headless=False)
browser_session = BrowserSession(browser_profile=browser_profile)

View File

@@ -113,7 +113,7 @@ browser_session = BrowserSession()
agent = Agent(
task=task,
llm=ChatOpenAI(model='gpt-4o'),
llm=ChatOpenAI(model='gpt-4.1'),
browser_session=browser_session,
)

View File

@@ -19,11 +19,11 @@ from browser_use.llm import ChatAzureOpenAI, ChatOpenAI
# Set LLM based on defined environment variables
if os.getenv('OPENAI_API_KEY'):
llm = ChatOpenAI(
model='gpt-4o',
model='gpt-4.1',
)
elif os.getenv('AZURE_OPENAI_KEY') and os.getenv('AZURE_OPENAI_ENDPOINT'):
llm = ChatAzureOpenAI(
model='gpt-4o',
model='gpt-4.1',
)
else:
raise ValueError('No LLM found. Please set OPENAI_API_KEY or AZURE_OPENAI_KEY and AZURE_OPENAI_ENDPOINT.')

View File

@@ -14,7 +14,7 @@ from browser_use.llm import ChatOpenAI
# video https://preview.screen.studio/share/vuq91Ej8
llm = ChatOpenAI(
model='gpt-4o',
model='gpt-4.1',
temperature=0.0,
)
task = 'go to https://en.wikipedia.org/wiki/Banana and click on buttons on the wikipedia page to go as fast as possible from banna to Quantum mechanics'

View File

@@ -39,6 +39,7 @@ dependencies = [
"google-api-python-client>=2.174.0",
"google-auth>=2.40.3",
"google-auth-oauthlib>=1.2.2",
"pypdf>=5.7.0",
]
# google-api-core: only used for Google LLM APIs
# pyperclip: only used for examples that use copy/paste

View File

@@ -9,8 +9,10 @@ import pytest
from browser_use.filesystem.file_system import (
DEFAULT_FILE_SYSTEM_PATH,
INVALID_FILENAME_ERROR_MESSAGE,
CsvFile,
FileSystem,
FileSystemState,
JsonFile,
MarkdownFile,
TxtFile,
)
@@ -41,6 +43,30 @@ class TestBaseFile:
assert txt_file.get_size == 11
assert txt_file.get_line_count == 2
def test_json_file_creation(self):
"""Test JsonFile creation and basic properties."""
json_content = '{"name": "John", "age": 30, "city": "New York"}'
json_file = JsonFile(name='data', content=json_content)
assert json_file.name == 'data'
assert json_file.content == json_content
assert json_file.extension == 'json'
assert json_file.full_name == 'data.json'
assert json_file.get_size == len(json_content)
assert json_file.get_line_count == 1
def test_csv_file_creation(self):
"""Test CsvFile creation and basic properties."""
csv_content = 'name,age,city\nJohn,30,New York\nJane,25,London'
csv_file = CsvFile(name='users', content=csv_content)
assert csv_file.name == 'users'
assert csv_file.content == csv_content
assert csv_file.extension == 'csv'
assert csv_file.full_name == 'users.csv'
assert csv_file.get_size == len(csv_content)
assert csv_file.get_line_count == 3
def test_file_content_operations(self):
"""Test content update and append operations."""
file_obj = TxtFile(name='test')
@@ -88,6 +114,60 @@ class TestBaseFile:
assert file_path.read_text() == expected_content
assert file_obj.content == expected_content
async def test_json_file_disk_operations(self):
"""Test JSON file sync to disk operations."""
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
json_content = '{"users": [{"name": "John", "age": 30}]}'
json_file = JsonFile(name='data', content=json_content)
# Test sync to disk
await json_file.sync_to_disk(tmp_path)
# Verify file was created on disk
file_path = tmp_path / 'data.json'
assert file_path.exists()
assert file_path.read_text() == json_content
# Test write operation
new_content = '{"users": [{"name": "Jane", "age": 25}]}'
await json_file.write(new_content, tmp_path)
assert file_path.read_text() == new_content
assert json_file.content == new_content
# Test append operation
await json_file.append(', {"name": "Bob", "age": 35}', tmp_path)
expected_content = new_content + ', {"name": "Bob", "age": 35}'
assert file_path.read_text() == expected_content
assert json_file.content == expected_content
async def test_csv_file_disk_operations(self):
"""Test CSV file sync to disk operations."""
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_path = Path(tmp_dir)
csv_content = 'name,age,city\nJohn,30,New York'
csv_file = CsvFile(name='users', content=csv_content)
# Test sync to disk
await csv_file.sync_to_disk(tmp_path)
# Verify file was created on disk
file_path = tmp_path / 'users.csv'
assert file_path.exists()
assert file_path.read_text() == csv_content
# Test write operation
new_content = 'name,age,city\nJane,25,London'
await csv_file.write(new_content, tmp_path)
assert file_path.read_text() == new_content
assert csv_file.content == new_content
# Test append operation
await csv_file.append('\nBob,35,Paris', tmp_path)
expected_content = new_content + '\nBob,35,Paris'
assert file_path.read_text() == expected_content
assert csv_file.content == expected_content
def test_file_sync_to_disk_sync(self):
"""Test synchronous disk sync operation."""
with tempfile.TemporaryDirectory() as tmp_dir:
@@ -160,7 +240,8 @@ class TestFileSystem:
assert 'md' in extensions
assert 'txt' in extensions
assert len(extensions) == 2
assert 'json' in extensions
assert 'csv' in extensions
def test_filename_validation(self, temp_filesystem):
"""Test filename validation."""
@@ -171,6 +252,8 @@ class TestFileSystem:
assert fs._is_valid_filename('my_file.txt') is True
assert fs._is_valid_filename('file-name.md') is True
assert fs._is_valid_filename('file123.txt') is True
assert fs._is_valid_filename('data.json') is True
assert fs._is_valid_filename('users.csv') is True
# Invalid filenames
assert fs._is_valid_filename('test.doc') is False # wrong extension
@@ -179,6 +262,8 @@ class TestFileSystem:
assert fs._is_valid_filename('test with spaces.md') is False # spaces
assert fs._is_valid_filename('test@file.md') is False # special chars
assert fs._is_valid_filename('.md') is False # no name
assert fs._is_valid_filename('.json') is False # no name
assert fs._is_valid_filename('.csv') is False # no name
def test_filename_parsing(self, temp_filesystem):
"""Test filename parsing into name and extension."""
@@ -192,6 +277,14 @@ class TestFileSystem:
assert name == 'my_file'
assert ext == 'txt' # Should be lowercased
name, ext = fs._parse_filename('data.json')
assert name == 'data'
assert ext == 'json'
name, ext = fs._parse_filename('users.CSV')
assert name == 'users'
assert ext == 'csv' # Should be lowercased
def test_get_file(self, temp_filesystem):
"""Test getting files from the filesystem."""
fs = temp_filesystem
@@ -228,21 +321,21 @@ class TestFileSystem:
content = fs.display_file('invalid@name.md')
assert content is None
def test_read_file(self, temp_filesystem):
async def test_read_file(self, temp_filesystem: FileSystem):
"""Test reading file content with proper formatting."""
fs = temp_filesystem
fs: FileSystem = temp_filesystem
# Read existing empty file
result = fs.read_file('todo.md')
result = await fs.read_file('todo.md')
expected = 'Read from file todo.md.\n<content>\n\n</content>'
assert result == expected
# Read non-existent file
result = fs.read_file('nonexistent.md')
result = await fs.read_file('nonexistent.md')
assert result == "File 'nonexistent.md' not found."
# Read file with invalid name
result = fs.read_file('invalid@name.md')
result = await fs.read_file('invalid@name.md')
assert result == INVALID_FILENAME_ERROR_MESSAGE
async def test_write_file(self, temp_filesystem):
@@ -254,7 +347,7 @@ class TestFileSystem:
assert result == 'Data written to file results.md successfully.'
# Verify content was written
content = fs.read_file('results.md')
content = await fs.read_file('results.md')
assert '# Test Results\nThis is a test.' in content
# Write to new file
@@ -271,6 +364,56 @@ class TestFileSystem:
result = await fs.write_file('test.doc', 'content')
assert result == INVALID_FILENAME_ERROR_MESSAGE
async def test_write_json_file(self, temp_filesystem):
"""Test writing JSON files."""
fs = temp_filesystem
# Write valid JSON content
json_content = '{"users": [{"name": "John", "age": 30}, {"name": "Jane", "age": 25}]}'
result = await fs.write_file('data.json', json_content)
assert result == 'Data written to file data.json successfully.'
# Verify content was written
content = await fs.read_file('data.json')
assert json_content in content
# Verify file object was created
assert 'data.json' in fs.files
file_obj = fs.get_file('data.json')
assert file_obj is not None
assert isinstance(file_obj, JsonFile)
assert file_obj.content == json_content
# Write to new JSON file
result = await fs.write_file('config.json', '{"debug": true, "port": 8080}')
assert result == 'Data written to file config.json successfully.'
assert 'config.json' in fs.files
async def test_write_csv_file(self, temp_filesystem):
"""Test writing CSV files."""
fs = temp_filesystem
# Write valid CSV content
csv_content = 'name,age,city\nJohn,30,New York\nJane,25,London\nBob,35,Paris'
result = await fs.write_file('users.csv', csv_content)
assert result == 'Data written to file users.csv successfully.'
# Verify content was written
content = await fs.read_file('users.csv')
assert csv_content in content
# Verify file object was created
assert 'users.csv' in fs.files
file_obj = fs.get_file('users.csv')
assert file_obj is not None
assert isinstance(file_obj, CsvFile)
assert file_obj.content == csv_content
# Write to new CSV file
result = await fs.write_file('products.csv', 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99')
assert result == 'Data written to file products.csv successfully.'
assert 'products.csv' in fs.files
async def test_append_file(self, temp_filesystem):
"""Test appending content to files."""
fs = temp_filesystem
@@ -294,6 +437,45 @@ class TestFileSystem:
result = await fs.append_file('invalid@name.md', 'content')
assert result == INVALID_FILENAME_ERROR_MESSAGE
async def test_append_json_file(self, temp_filesystem):
"""Test appending content to JSON files."""
fs = temp_filesystem
# First write some JSON content
await fs.write_file('data.json', '{"users": [{"name": "John", "age": 30}]}')
# Append additional JSON content (note: this creates invalid JSON, but tests the append functionality)
result = await fs.append_file('data.json', ', {"name": "Jane", "age": 25}')
assert result == 'Data appended to file data.json successfully.'
# Verify content was appended
file_obj = fs.get_file('data.json')
assert file_obj is not None
expected_content = '{"users": [{"name": "John", "age": 30}]}, {"name": "Jane", "age": 25}'
assert file_obj.content == expected_content
async def test_append_csv_file(self, temp_filesystem):
"""Test appending content to CSV files."""
fs = temp_filesystem
# First write some CSV content
await fs.write_file('users.csv', 'name,age,city\nJohn,30,New York')
# Append additional CSV row
result = await fs.append_file('users.csv', '\nJane,25,London')
assert result == 'Data appended to file users.csv successfully.'
# Verify content was appended
file_obj = fs.get_file('users.csv')
assert file_obj is not None
expected_content = 'name,age,city\nJohn,30,New York\nJane,25,London'
assert file_obj.content == expected_content
# Append another row
await fs.append_file('users.csv', '\nBob,35,Paris')
expected_content = 'name,age,city\nJohn,30,New York\nJane,25,London\nBob,35,Paris'
assert file_obj.content == expected_content
async def test_save_extracted_content(self, temp_filesystem):
"""Test saving extracted content with auto-numbering."""
fs = temp_filesystem
@@ -412,6 +594,161 @@ class TestFileSystem:
# Clean up second filesystem
fs2.nuke()
async def test_complete_workflow_with_json_csv(self):
"""Test a complete filesystem workflow with JSON and CSV files."""
with tempfile.TemporaryDirectory() as tmp_dir:
# Create filesystem
fs = FileSystem(base_dir=tmp_dir, create_default_files=True)
# Write JSON configuration file
config_json = '{"app": {"name": "TestApp", "version": "1.0"}, "database": {"host": "localhost", "port": 5432}}'
await fs.write_file('config.json', config_json)
# Write CSV data file
users_csv = 'id,name,email,age\n1,John Doe,john@example.com,30\n2,Jane Smith,jane@example.com,25'
await fs.write_file('users.csv', users_csv)
# Append more data to CSV
await fs.append_file('users.csv', '\n3,Bob Johnson,bob@example.com,35')
# Update JSON configuration
updated_config = '{"app": {"name": "TestApp", "version": "1.1"}, "database": {"host": "localhost", "port": 5432}, "features": {"logging": true}}'
await fs.write_file('config.json', updated_config)
# Create another JSON file for API responses
api_response = '{"status": "success", "data": [{"id": 1, "name": "Item 1"}, {"id": 2, "name": "Item 2"}]}'
await fs.write_file('api_response.json', api_response)
# Create a products CSV file
products_csv = (
'sku,name,price,category\nLAP001,Gaming Laptop,1299.99,Electronics\nMOU001,Wireless Mouse,29.99,Accessories'
)
await fs.write_file('products.csv', products_csv)
# Verify file listing
files = fs.list_files()
expected_files = ['todo.md', 'config.json', 'users.csv', 'api_response.json', 'products.csv']
assert len(files) == len(expected_files)
for expected_file in expected_files:
assert expected_file in files
# Verify JSON file contents
config_file = fs.get_file('config.json')
assert config_file is not None
assert isinstance(config_file, JsonFile)
assert config_file.content == updated_config
api_file = fs.get_file('api_response.json')
assert api_file is not None
assert isinstance(api_file, JsonFile)
assert api_file.content == api_response
# Verify CSV file contents
users_file = fs.get_file('users.csv')
assert users_file is not None
assert isinstance(users_file, CsvFile)
expected_users_content = 'id,name,email,age\n1,John Doe,john@example.com,30\n2,Jane Smith,jane@example.com,25\n3,Bob Johnson,bob@example.com,35'
assert users_file.content == expected_users_content
products_file = fs.get_file('products.csv')
assert products_file is not None
assert isinstance(products_file, CsvFile)
assert products_file.content == products_csv
# Test state persistence with JSON and CSV files
state = fs.get_state()
fs.nuke()
# Restore from state
fs2 = FileSystem.from_state(state)
# Verify restoration
assert len(fs2.files) == len(expected_files)
# Verify JSON files were restored correctly
restored_config = fs2.get_file('config.json')
assert restored_config is not None
assert isinstance(restored_config, JsonFile)
assert restored_config.content == updated_config
restored_api = fs2.get_file('api_response.json')
assert restored_api is not None
assert isinstance(restored_api, JsonFile)
assert restored_api.content == api_response
# Verify CSV files were restored correctly
restored_users = fs2.get_file('users.csv')
assert restored_users is not None
assert isinstance(restored_users, CsvFile)
assert restored_users.content == expected_users_content
restored_products = fs2.get_file('products.csv')
assert restored_products is not None
assert isinstance(restored_products, CsvFile)
assert restored_products.content == products_csv
# Verify files exist on disk
for filename in expected_files:
if filename != 'todo.md': # Skip todo.md as it's already tested
assert (fs2.data_dir / filename).exists()
fs2.nuke()
async def test_from_state_with_json_csv_files(self, temp_filesystem):
"""Test restoring filesystem from state with JSON and CSV files."""
fs = temp_filesystem
# Add JSON and CSV content
await fs.write_file('data.json', '{"version": "1.0", "users": [{"name": "John", "age": 30}]}')
await fs.write_file('users.csv', 'name,age,city\nJohn,30,New York\nJane,25,London')
await fs.write_file('config.json', '{"debug": true, "port": 8080}')
await fs.write_file('products.csv', 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99')
# Get state
state = fs.get_state()
# Create new filesystem from state
fs2 = FileSystem.from_state(state)
# Verify restoration
assert fs2.base_dir == fs.base_dir
assert len(fs2.files) == len(fs.files)
# Verify JSON file contents
json_file = fs2.get_file('data.json')
assert json_file is not None
assert isinstance(json_file, JsonFile)
assert json_file.content == '{"version": "1.0", "users": [{"name": "John", "age": 30}]}'
config_file = fs2.get_file('config.json')
assert config_file is not None
assert isinstance(config_file, JsonFile)
assert config_file.content == '{"debug": true, "port": 8080}'
# Verify CSV file contents
csv_file = fs2.get_file('users.csv')
assert csv_file is not None
assert isinstance(csv_file, CsvFile)
assert csv_file.content == 'name,age,city\nJohn,30,New York\nJane,25,London'
products_file = fs2.get_file('products.csv')
assert products_file is not None
assert isinstance(products_file, CsvFile)
assert products_file.content == 'id,name,price\n1,Laptop,999.99\n2,Mouse,29.99'
# Verify files exist on disk
assert (fs2.data_dir / 'data.json').exists()
assert (fs2.data_dir / 'users.csv').exists()
assert (fs2.data_dir / 'config.json').exists()
assert (fs2.data_dir / 'products.csv').exists()
# Verify disk contents match
assert (fs2.data_dir / 'data.json').read_text() == '{"version": "1.0", "users": [{"name": "John", "age": 30}]}'
assert (fs2.data_dir / 'users.csv').read_text() == 'name,age,city\nJohn,30,New York\nJane,25,London'
# Clean up second filesystem
fs2.nuke()
def test_nuke(self, empty_filesystem):
"""Test filesystem destruction."""
fs = empty_filesystem

View File

@@ -404,7 +404,7 @@ class TestEventValidation:
user_id='0683fb03-c5da-79c9-8000-d3a39c47c650',
agent_session_id='0683fb03-c5da-79c9-8000-d3a39c47c651',
task='test',
llm_model='gpt-4o',
llm_model='gpt-4.1',
done_output=None,
user_feedback_type=None,
user_comment=None,

View File

@@ -13,9 +13,9 @@ def llm():
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
return ChatAzureOpenAI(
model='gpt-4o',
model='gpt-4.1',
)
# return ChatOpenAI(model='gpt-4o-mini')
# return ChatOpenAI(model='gpt-4.1-mini')
@pytest.fixture

View File

@@ -79,7 +79,7 @@ class TestCoreFunctionality:
def llm(self):
"""Initialize language model for testing with minimal settings."""
return ChatOpenAI(
model='gpt-4o',
model='gpt-4.1',
temperature=0.0,
)

View File

@@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from browser_use import Agent, AgentHistoryList
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True))
agent = Agent(

View File

@@ -14,7 +14,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from browser_use import Agent, AgentHistoryList
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))

View File

@@ -50,7 +50,7 @@ def llm():
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
return ChatAzureOpenAI(
model='gpt-4o',
model='gpt-4.1',
)

View File

@@ -15,7 +15,7 @@ import asyncio
from browser_use import Agent, AgentHistoryList
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
browser_session = BrowserSession(browser_profile=BrowserProfile(headless=True, disable_security=True))

View File

@@ -78,7 +78,7 @@ def llm():
# return ChatAnthropic(model_name='claude-3-5-sonnet-20240620', timeout=25, stop=None)
return ChatAzureOpenAI(
model='gpt-4o',
model='gpt-4.1',
)

View File

@@ -16,7 +16,7 @@ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from browser_use import Agent, AgentHistoryList, BrowserSession, Controller
from browser_use.llm import ChatOpenAI
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
controller = Controller()
# use this test to ask the model questions about the page like

View File

@@ -21,7 +21,7 @@ from browser_use.browser import BrowserProfile, BrowserSession
load_dotenv()
# Initialize language model and controller.
llm = ChatOpenAI(model='gpt-4o')
llm = ChatOpenAI(model='gpt-4.1')
controller = Controller()