diff --git a/browser_use/agent/prompts.py b/browser_use/agent/prompts.py index 834843551..6569b3de2 100644 --- a/browser_use/agent/prompts.py +++ b/browser_use/agent/prompts.py @@ -20,7 +20,7 @@ class SystemPrompt: 1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format: { "current_state": { - "evaluation_previous_goal": "Success|Failed|Unknown-Brief description of why", + "evaluation_previous_goal": "Success|Failed|Unknown - Brief description of why", "memory": "Brief description of what has been done-What you need to remember until the end of the task", "next_goal": "What needs to be done with the next actions" }, @@ -70,10 +70,11 @@ class SystemPrompt: - Visual context helps verify element locations and relationships 7. ACTION SEQUENCING: - - Actions are executed in the order they appear in the list + - Actions are executed in the order they appear in the list - Each action should logically follow from the previous one - - If the page changes between actions. The sequence is interrupted and you get the new page state. + - If the page changes between actions, the sequence is interrupted and you get the new page state. - Only provide the action sequence until you think the DOM will change. + - Try to be efficient. If the dom changes a little bit we find the right element. """ def input_format(self) -> str: @@ -152,11 +153,13 @@ Interactive elements: if self.result: for i, result in enumerate(self.result): if result.extracted_content: - state_description += f'\nResult of action {i}: {result.extracted_content}' + state_description += ( + f'\nResult of action {i + 1}/{len(self.result)}: {result.extracted_content}' + ) if result.error: # only use last 300 characters of error error = result.error[-self.max_error_length :] - state_description += f'\nError of action {i}: ...{error}' + state_description += f'\nError of action {i + 1}/{len(self.result)}: ...{error}' if self.state.screenshot: # Format message for vision model diff --git a/browser_use/agent/service.py b/browser_use/agent/service.py index c00a441cd..738498f05 100644 --- a/browser_use/agent/service.py +++ b/browser_use/agent/service.py @@ -240,7 +240,7 @@ class Agent: logger.info(f'🎯 Next Goal: {response.current_state.next_goal}') for i, action in enumerate(response.action): logger.info( - f'🛠️ Action {i}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}' + f'🛠️ Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}' ) def _save_conversation(self, input_messages: list[BaseMessage], response: Any) -> None: diff --git a/browser_use/agent/views.py b/browser_use/agent/views.py index 9a9c5bcb7..1d29f42f0 100644 --- a/browser_use/agent/views.py +++ b/browser_use/agent/views.py @@ -73,7 +73,7 @@ class AgentHistory(BaseModel): elements = [] for action in model_output.action: index = action.get_index() - if index: + if index and index in selector_map: el: DOMElementNode = selector_map[index] elements.append(HistoryTreeProcessor.convert_dom_element_to_history_element(el)) else: diff --git a/browser_use/browser/service.py b/browser_use/browser/service.py index 6a4be5b52..0c8360781 100644 --- a/browser_use/browser/service.py +++ b/browser_use/browser/service.py @@ -70,6 +70,7 @@ class BrowserConfig: minimum_wait_page_load_time: float = 0.5 wait_for_network_idle_page_load_time: float = 1 maximum_wait_page_load_time: float = 5 + wait_between_actions: float = 1 extra_chromium_args: list[str] = field(default_factory=list) browser_window_size: BrowserWindowSize = field( diff --git a/browser_use/controller/service.py b/browser_use/controller/service.py index 1c14bed9c..37bb1cea8 100644 --- a/browser_use/controller/service.py +++ b/browser_use/controller/service.py @@ -1,3 +1,4 @@ +import asyncio import logging from main_content_extractor import MainContentExtractor @@ -27,6 +28,7 @@ class Controller: browser_config: BrowserConfig = BrowserConfig(), ): self.browser = Browser(config=browser_config) + self.wait_between_actions = browser_config.wait_between_actions self.registry = Registry() self._register_default_actions() @@ -79,7 +81,9 @@ class Controller: await browser.switch_to_tab(-1) return ActionResult(extracted_content=msg) except Exception as e: - logger.warning(f'Element no longer available: {str(e)}') + logger.warning( + f'Element no longer available with index {params.index} - most likely the page changed' + ) return ActionResult(error=str(e)) @self.registry.action('Input text', param_model=InputTextAction, requires_browser=True) @@ -182,6 +186,7 @@ class Controller: results.append(await self.act(action)) if results[-1].is_done or results[-1].error: break + await asyncio.sleep(self.wait_between_actions) return results @time_execution_sync('--act') diff --git a/examples/find_and_apply_to_jobs.py b/examples/find_and_apply_to_jobs.py index 2fabe8593..6aec338f3 100644 --- a/examples/find_and_apply_to_jobs.py +++ b/examples/find_and_apply_to_jobs.py @@ -91,7 +91,7 @@ async def close_file_dialog(browser: Browser): async def main(): task = ( - 'Read my cv & find machine learning engineer jobs for me. ' + 'Read my cv & find machine learning engineer jobs in Bangalore for me. ' 'Save them to a file' 'then start applying for them in new tabs - please not via job portals like linkedin, indeed, etc. ' 'If you need more information or help, ask me.'