diff --git a/browser_use/agent/system_prompt.md b/browser_use/agent/system_prompt.md index 55f90005a..f0586cb42 100644 --- a/browser_use/agent/system_prompt.md +++ b/browser_use/agent/system_prompt.md @@ -147,6 +147,7 @@ Do not try multiple different paths in one step. Always have one clear goal per Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. - do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. - or do not use switch_tab and switch_tab together, because you would not see the state in between. +- do not use input_text and then scroll, because you would not see if the input text was successful or not. diff --git a/browser_use/agent/system_prompt_flash.md b/browser_use/agent/system_prompt_flash.md index 97d454868..aaf190953 100644 --- a/browser_use/agent/system_prompt_flash.md +++ b/browser_use/agent/system_prompt_flash.md @@ -144,6 +144,7 @@ Do not try multiple different paths in one step. Always have one clear goal per Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. - do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. - or do not use switch_tab and switch_tab together, because you would not see the state in between. +- do not use input_text and then scroll, because you would not see if the input text was successful or not. diff --git a/browser_use/agent/system_prompt_no_thinking.md b/browser_use/agent/system_prompt_no_thinking.md index c155da1f3..8a43b0552 100644 --- a/browser_use/agent/system_prompt_no_thinking.md +++ b/browser_use/agent/system_prompt_no_thinking.md @@ -146,6 +146,7 @@ Do not try multiple different paths in one step. Always have one clear goal per Its important that you see in the next step if your action was successful, so do not chain actions which change the browser state multiple times, e.g. - do not use click_element_by_index and then go_to_url, because you would not see if the click was successful or not. - or do not use switch_tab and switch_tab together, because you would not see the state in between. +- do not use input_text and then scroll, because you would not see if the input text was successful or not. diff --git a/browser_use/browser/profile.py b/browser_use/browser/profile.py index 4fa41b1ad..282a6fc84 100644 --- a/browser_use/browser/profile.py +++ b/browser_use/browser/profile.py @@ -413,10 +413,19 @@ class BrowserLaunchArgs(BaseModel): def set_default_downloads_path(self) -> Self: """Set a unique default downloads path if none is provided.""" if self.downloads_path is None: - import tempfile + import uuid - # Create unique temporary directory for downloads - self.downloads_path = Path(tempfile.mkdtemp(prefix='browser-use-downloads-')) + # Create unique directory in /tmp for downloads + unique_id = str(uuid.uuid4())[:8] # 8 characters + downloads_path = Path(f'/tmp/browser-use-downloads-{unique_id}') + + # Ensure path doesn't already exist (extremely unlikely but possible) + while downloads_path.exists(): + unique_id = str(uuid.uuid4())[:8] + downloads_path = Path(f'/tmp/browser-use-downloads-{unique_id}') + + self.downloads_path = downloads_path + self.downloads_path.mkdir(parents=True, exist_ok=True) return self @staticmethod @@ -587,15 +596,13 @@ class BrowserProfile(BrowserConnectArgs, BrowserLaunchPersistentContextArgs, Bro # --- UI/viewport/DOM --- - highlight_elements: bool = Field(default=True, description='Highlight interactive elements on the page.') + highlight_elements: bool = Field(default=False, description='Highlight interactive elements on the page.') filter_highlight_ids: bool = Field( default=True, description='Only show element IDs in highlights if llm_representation is less than 10 characters.' ) # --- Downloads --- - auto_download_pdfs: bool = Field( - default=False, description='Automatically download PDFs when navigating to PDF viewer pages.' - ) + auto_download_pdfs: bool = Field(default=True, description='Automatically download PDFs when navigating to PDF viewer pages.') profile_directory: str = 'Default' # e.g. 'Profile 1', 'Profile 2', 'Custom Profile', etc. diff --git a/docs/customize/browser/all-parameters.mdx b/docs/customize/browser/all-parameters.mdx index 066b10044..3f6c6516a 100644 --- a/docs/customize/browser/all-parameters.mdx +++ b/docs/customize/browser/all-parameters.mdx @@ -61,7 +61,7 @@ mode: "wide" - `wait_between_actions` (default: `0.5`): Time to wait between agent actions in seconds ## AI Integration -- `highlight_elements` (default: `True`): Highlight interactive elements for AI vision +- `highlight_elements` (default: `False`): Highlight interactive elements for AI vision ## Downloads & Files - `accept_downloads` (default: `True`): Automatically accept all downloads diff --git a/examples/file_system/alphabet_earnings.py b/examples/file_system/alphabet_earnings.py index 52fe95a37..fcbfa82fe 100644 --- a/examples/file_system/alphabet_earnings.py +++ b/examples/file_system/alphabet_earnings.py @@ -6,24 +6,13 @@ import shutil from dotenv import load_dotenv from browser_use import Agent, ChatOpenAI -from browser_use.browser import BrowserProfile, BrowserSession load_dotenv() -'' SCRIPT_DIR = pathlib.Path(os.path.dirname(os.path.abspath(__file__))) agent_dir = SCRIPT_DIR / 'alphabet_earnings' agent_dir.mkdir(exist_ok=True) - -llm = ChatOpenAI( - model='o4-mini', -) - -browser_session = BrowserSession( - browser_profile=BrowserProfile(downloads_path=str(agent_dir / 'downloads')), -) - task = """ Go to https://abc.xyz/assets/cc/27/3ada14014efbadd7a58472f1f3f4/2025q2-alphabet-earnings-release.pdf. Read the PDF and save 3 interesting data points in "alphabet_earnings.pdf" and share it with me! @@ -31,16 +20,15 @@ Read the PDF and save 3 interesting data points in "alphabet_earnings.pdf" and s agent = Agent( task=task, - llm=llm, - browser_session=browser_session, + llm=ChatOpenAI(model='o4-mini'), file_system_path=str(agent_dir / 'fs'), flash_mode=True, ) async def main(): - agent_history = await agent.run() - input('Press Enter to clean the file system...') + await agent.run() + input(f'Press Enter to clean the file system at {agent_dir}...') # clean the file system shutil.rmtree(str(agent_dir / 'fs')) diff --git a/pyproject.toml b/pyproject.toml index 22a067623..93a240e01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ name = "browser-use" description = "Make websites accessible for AI agents" authors = [{ name = "Gregor Zunic" }] -version = "0.7.0" +version = "0.7.1" readme = "README.md" requires-python = ">=3.11,<4.0" classifiers = [