From b579308c3bccb0796f223d8d32b9ab8165739f75 Mon Sep 17 00:00:00 2001 From: Marian Schneider Date: Sun, 31 Aug 2025 15:38:28 +0200 Subject: [PATCH 1/7] docs: update data available in hooks for Playwright removal --- docs/customize/hooks.mdx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/customize/hooks.mdx b/docs/customize/hooks.mdx index a56cf66b8..78cbc5073 100644 --- a/docs/customize/hooks.mdx +++ b/docs/customize/hooks.mdx @@ -96,12 +96,12 @@ When working with agent hooks, you have access to the entire `Agent` instance. H - `agent.history.model_actions()`: Actions taken by the agent - `agent.history.extracted_content()`: Content extracted from web pages - `agent.history.urls()`: URLs visited by the agent -- `agent.browser_session` gives direct access to the `Browser()` and CDP interface +- `agent.browser_session` gives direct access to the `BrowserSession` and CDP interface - `agent.browser_session.agent_focus`: Get the current CDP session the agent is focused on - `agent.browser_session.get_or_create_cdp_session()`: Get the current CDP session for browser interaction - `agent.browser_session.get_tabs()`: Get all tabs currently open - - `agent.browser_session.get_page_html()`: Current page HTML - - `agent.browser_session.take_screenshot()`: Screenshot of the current page + - `agent.browser_session.get_current_page_url()`: Get the URL of the current active tab + - `agent.browser_session.get_current_page_title()`: Get the title of the current active tab ## Tips for Using Hooks From ce6b88c26fb08a5678524f31d39a49fe1458feac Mon Sep 17 00:00:00 2001 From: Marian Schneider Date: Sun, 31 Aug 2025 17:04:35 +0200 Subject: [PATCH 2/7] docs: update hooks basic example to work with latest release --- docs/customize/hooks.mdx | 88 ++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 39 deletions(-) diff --git a/docs/customize/hooks.mdx b/docs/customize/hooks.mdx index 78cbc5073..697a3efe5 100644 --- a/docs/customize/hooks.mdx +++ b/docs/customize/hooks.mdx @@ -27,55 +27,65 @@ Each hook should be an `async` callable function that accepts the `agent` instan ### Basic Example ```python +import asyncio +from pathlib import Path + from browser_use import Agent, ChatOpenAI +from browser_use.browser.events import ScreenshotEvent async def my_step_hook(agent: Agent): - # inside a hook you can access all the state and methods under the Agent object: - # agent.settings, agent.state, agent.task - # agent.tools, agent.llm, agent.browser_session - # agent.pause(), agent.resume(), agent.add_new_task(...), etc. + # inside a hook you can access all the state and methods under the Agent object: + # agent.settings, agent.state, agent.task + # agent.tools, agent.llm, agent.browser_session + # agent.pause(), agent.resume(), agent.add_new_task(...), etc. - # You also have direct access to the browser state - state = await agent.browser_session.get_browser_state_summary() - - current_url = state.url - visit_log = agent.history.urls() - previous_url = visit_log[-2] if len(visit_log) >= 2 else None - print(f"Agent was last on URL: {previous_url} and is now on {current_url}") + # You also have direct access to the browser state + state = await agent.browser_session.get_browser_state_summary() - # Example: listen for events on the page, interact with the DOM, run JS directly, etc. - await page.on('domcontentloaded', lambda: print('page navigated to a new url...')) - await page.locator("css=form > input[type=submit]").click() - await page.evaluate('() => alert(1)') - await page.browser.new_tab - await agent.browser_session.session.context.add_init_script('/* some JS to run on every page */') + current_url = state.url + visit_log = agent.history.urls() + previous_url = visit_log[-2] if len(visit_log) >= 2 else None + print(f'Agent was last on URL: {previous_url} and is now on {current_url}') + cdp_session = await agent.browser_session.get_or_create_cdp_session() - # Example: monitor or intercept all network requests - async def handle_request(route): - # Print, modify, block, etc. do anything to the requests here - # https://playwright.dev/python/docs/network#handle-requests - print(route.request, route.request.headers) - await route.continue_(headers=route.request.headers) - await page.route("**/*", handle_route) + # Example: Get page HTML content + doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id) + html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML( + params={'nodeId': doc['root']['nodeId']}, session_id=cdp_session.session_id + ) + page_html = html_result['outerHTML'] - # Example: pause agent execution and resume it based on some custom code - if '/completed' in current_url: - agent.pause() - Path('result.txt').write_text(await page.content()) - input('Saved "completed" page content to result.txt, press [Enter] to resume...') - agent.resume() + # Example: Take a screenshot using the event system + screenshot_event = agent.browser_session.event_bus.dispatch(ScreenshotEvent(full_page=False)) + await screenshot_event -agent = Agent( - task="Search for the latest news about AI", - llm=ChatOpenAI(model="gpt-4.1-mini"), -) + # Example: Add initialization script for new pages + await agent.browser_session._cdp_add_init_script('console.log("Hello from hook!")') -await agent.run( - on_step_start=my_step_hook, - # on_step_end=... - max_steps=10 -) + # Example: pause agent execution and resume it based on some custom code + if '/finished' in current_url: + agent.pause() + Path('result.txt').write_text(page_html) + input('Saved "completed" page content to result.txt, press [Enter] to resume...') + agent.resume() + + +async def main(): + agent = Agent( + task='Search for the latest news about AI', + llm=ChatOpenAI(model='gpt-5-mini'), + ) + + await agent.run( + on_step_start=my_step_hook, + # on_step_end=... + max_steps=10, + ) + + +if __name__ == '__main__': + asyncio.run(main()) ``` ## Data Available in Hooks From d827aea29b67a5056d4b080bf9fa60f5a70fe098 Mon Sep 17 00:00:00 2001 From: Marian Schneider Date: Sun, 31 Aug 2025 17:34:45 +0200 Subject: [PATCH 3/7] docs: update hooks complex example to work with latest release --- docs/customize/hooks.mdx | 288 +++++++++++++++++++-------------------- 1 file changed, 143 insertions(+), 145 deletions(-) diff --git a/docs/customize/hooks.mdx b/docs/customize/hooks.mdx index 697a3efe5..06a8bccbf 100644 --- a/docs/customize/hooks.mdx +++ b/docs/customize/hooks.mdx @@ -132,7 +132,7 @@ To use this example, you'll need to: 1. Set up the required dependencies: ```bash - pip install fastapi uvicorn prettyprinter pyobjtojson dotenv browser-use + uv pip install fastapi uvicorn prettyprinter pyobjtojson dotenv browser-use ``` 2. Create two separate Python files: @@ -156,74 +156,77 @@ The server component handles receiving and storing the agent's activity data: # Save this code to api.py and run with `python api.py` # -import json import base64 +import json from pathlib import Path -from fastapi import FastAPI, Request import prettyprinter import uvicorn +from fastapi import FastAPI, Request prettyprinter.install_extras() + # Utility function to save screenshots def b64_to_png(b64_string: str, output_file): - """ - Convert a Base64-encoded string to a PNG file. + """ + Convert a Base64-encoded string to a PNG file. + + :param b64_string: A string containing Base64-encoded data + :param output_file: The path to the output PNG file + """ + with open(output_file, 'wb') as f: + f.write(base64.b64decode(b64_string)) - :param b64_string: A string containing Base64-encoded data - :param output_file: The path to the output PNG file - """ - with open(output_file, "wb") as f: - f.write(base64.b64decode(b64_string)) # Initialize FastAPI app app = FastAPI() -@app.post("/post_agent_history_step") +@app.post('/post_agent_history_step') async def post_agent_history_step(request: Request): - data = await request.json() - prettyprinter.cpprint(data) + data = await request.json() + prettyprinter.cpprint(data) - # Ensure the "recordings" folder exists using pathlib - recordings_folder = Path("recordings") - recordings_folder.mkdir(exist_ok=True) + # Ensure the "recordings" folder exists using pathlib + recordings_folder = Path('recordings') + recordings_folder.mkdir(exist_ok=True) - # Determine the next file number by examining existing .json files - existing_numbers = [] - for item in recordings_folder.iterdir(): - if item.is_file() and item.suffix == ".json": - try: - file_num = int(item.stem) - existing_numbers.append(file_num) - except ValueError: - # In case the file name isn't just a number - pass + # Determine the next file number by examining existing .json files + existing_numbers = [] + for item in recordings_folder.iterdir(): + if item.is_file() and item.suffix == '.json': + try: + file_num = int(item.stem) + existing_numbers.append(file_num) + except ValueError: + # In case the file name isn't just a number + pass - if existing_numbers: - next_number = max(existing_numbers) + 1 - else: - next_number = 1 + if existing_numbers: + next_number = max(existing_numbers) + 1 + else: + next_number = 1 - # Construct the file path - file_path = recordings_folder / f"{next_number}.json" + # Construct the file path + file_path = recordings_folder / f'{next_number}.json' - # Save the JSON data to the file - with file_path.open("w") as f: - json.dump(data, f, indent=2) + # Save the JSON data to the file + with file_path.open('w') as f: + json.dump(data, f, indent=2) - # Optionally save screenshot if needed - # if "website_screenshot" in data and data["website_screenshot"]: - # screenshot_folder = Path("screenshots") - # screenshot_folder.mkdir(exist_ok=True) - # b64_to_png(data["website_screenshot"], screenshot_folder / f"{next_number}.png") + # Optionally save screenshot if needed + # if "website_screenshot" in data and data["website_screenshot"]: + # screenshot_folder = Path("screenshots") + # screenshot_folder.mkdir(exist_ok=True) + # b64_to_png(data["website_screenshot"], screenshot_folder / f"{next_number}.png") - return {"status": "ok", "message": f"Saved to {file_path}"} + return {'status': 'ok', 'message': f'Saved to {file_path}'} -if __name__ == "__main__": - print("Starting Browser-Use recording API on http://0.0.0.0:9000") - uvicorn.run(app, host="0.0.0.0", port=9000) + +if __name__ == '__main__': + print('Starting Browser-Use recording API on http://0.0.0.0:9000') + uvicorn.run(app, host='0.0.0.0', port=9000) ``` ### Client Component (client.py) @@ -239,140 +242,135 @@ The client component runs the Browser-Use agent with a recording hook: # import asyncio + import requests from dotenv import load_dotenv from pyobjtojson import obj_to_json -from browser_use.llm import ChatOpenAI + from browser_use import Agent +from browser_use.browser.events import ScreenshotEvent +from browser_use.llm import ChatOpenAI # Load environment variables (for API keys) load_dotenv() def send_agent_history_step(data): - """Send the agent step data to the recording API""" - url = "http://127.0.0.1:9000/post_agent_history_step" - response = requests.post(url, json=data) - return response.json() + """Send the agent step data to the recording API""" + url = 'http://127.0.0.1:9000/post_agent_history_step' + response = requests.post(url, json=data) + return response.json() async def record_activity(agent_obj): - """Hook function that captures and records agent activity at each step""" - website_html = None - website_screenshot = None - urls_json_last_elem = None - model_thoughts_last_elem = None - model_outputs_json_last_elem = None - model_actions_json_last_elem = None - extracted_content_json_last_elem = None + """Hook function that captures and records agent activity at each step""" + website_html = None + website_screenshot = None + urls_json_last_elem = None + model_thoughts_last_elem = None + model_outputs_json_last_elem = None + model_actions_json_last_elem = None + extracted_content_json_last_elem = None - print('--- ON_STEP_START HOOK ---') + print('--- ON_STEP_START HOOK ---') - # Capture current page state - website_html = await agent_obj.browser_session.get_page_html() - website_screenshot = await agent_obj.browser_session.take_screenshot() + # Capture current page state + cdp_session = await agent_obj.browser_session.get_or_create_cdp_session() + doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id) + html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML( + params={'nodeId': doc['root']['nodeId']}, session_id=cdp_session.session_id + ) + website_html = html_result['outerHTML'] - # Make sure we have state history - if hasattr(agent_obj, "state"): - history = agent_obj.state.history - else: - history = None - print("Warning: Agent has no state history") - return + # Get screenshot using event system + screenshot_event = agent_obj.browser_session.event_bus.dispatch(ScreenshotEvent(full_page=False)) + await screenshot_event + website_screenshot = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True) - # Process model thoughts - model_thoughts = obj_to_json( - obj=history.model_thoughts(), - check_circular=False - ) - if len(model_thoughts) > 0: - model_thoughts_last_elem = model_thoughts[-1] + # Make sure we have agent history + if hasattr(agent_obj, 'history'): + history = agent_obj.history + else: + history = None + print('Warning: Agent has no history') + return - # Process model outputs - model_outputs = agent_obj.state.history.model_outputs() - model_outputs_json = obj_to_json( - obj=model_outputs, - check_circular=False - ) - if len(model_outputs_json) > 0: - model_outputs_json_last_elem = model_outputs_json[-1] + # Process model thoughts + model_thoughts = obj_to_json(obj=history.model_thoughts(), check_circular=False) + if len(model_thoughts) > 0: + model_thoughts_last_elem = model_thoughts[-1] - # Process model actions - model_actions = agent_obj.state.history.model_actions() - model_actions_json = obj_to_json( - obj=model_actions, - check_circular=False - ) - if len(model_actions_json) > 0: - model_actions_json_last_elem = model_actions_json[-1] + # Process model outputs + model_outputs = history.model_outputs() + model_outputs_json = obj_to_json(obj=model_outputs, check_circular=False) + if len(model_outputs_json) > 0: + model_outputs_json_last_elem = model_outputs_json[-1] - # Process extracted content - extracted_content = agent_obj.state.history.extracted_content() - extracted_content_json = obj_to_json( - obj=extracted_content, - check_circular=False - ) - if len(extracted_content_json) > 0: - extracted_content_json_last_elem = extracted_content_json[-1] + # Process model actions + model_actions = history.model_actions() + model_actions_json = obj_to_json(obj=model_actions, check_circular=False) + if len(model_actions_json) > 0: + model_actions_json_last_elem = model_actions_json[-1] - # Process URLs - urls = agent_obj.state.history.urls() - urls_json = obj_to_json( - obj=urls, - check_circular=False - ) - if len(urls_json) > 0: - urls_json_last_elem = urls_json[-1] + # Process extracted content + extracted_content = history.extracted_content() + extracted_content_json = obj_to_json(obj=extracted_content, check_circular=False) + if len(extracted_content_json) > 0: + extracted_content_json_last_elem = extracted_content_json[-1] - # Create a summary of all data for this step - model_step_summary = { - "website_html": website_html, - "website_screenshot": website_screenshot, - "url": urls_json_last_elem, - "model_thoughts": model_thoughts_last_elem, - "model_outputs": model_outputs_json_last_elem, - "model_actions": model_actions_json_last_elem, - "extracted_content": extracted_content_json_last_elem - } + # Process URLs + urls = history.urls() + urls_json = obj_to_json(obj=urls, check_circular=False) + if len(urls_json) > 0: + urls_json_last_elem = urls_json[-1] - print("--- MODEL STEP SUMMARY ---") - print(f"URL: {urls_json_last_elem}") + # Create a summary of all data for this step + model_step_summary = { + 'website_html': website_html, + 'website_screenshot': website_screenshot, + 'url': urls_json_last_elem, + 'model_thoughts': model_thoughts_last_elem, + 'model_outputs': model_outputs_json_last_elem, + 'model_actions': model_actions_json_last_elem, + 'extracted_content': extracted_content_json_last_elem, + } - # Send data to the API - result = send_agent_history_step(data=model_step_summary) - print(f"Recording API response: {result}") + print('--- MODEL STEP SUMMARY ---') + print(f'URL: {urls_json_last_elem}') + + # Send data to the API + result = send_agent_history_step(data=model_step_summary) + print(f'Recording API response: {result}') async def run_agent(): - """Run the Browser-Use agent with the recording hook""" - agent = Agent( - task="Compare the price of gpt-4o and DeepSeek-V3", - llm=ChatOpenAI(model="gpt-4.1-mini"), - ) + """Run the Browser-Use agent with the recording hook""" + agent = Agent( + task='Compare the price of gpt-4o and DeepSeek-V3', + llm=ChatOpenAI(model='gpt-5-mini'), + ) - try: - print("Starting Browser-Use agent with recording hook") - await agent.run( - on_step_start=record_activity, - max_steps=30 - ) - except Exception as e: - print(f"Error running agent: {e}") + try: + print('Starting Browser-Use agent with recording hook') + await agent.run(on_step_start=record_activity, max_steps=30) + except Exception as e: + print(f'Error running agent: {e}') -if __name__ == "__main__": - # Check if API is running - try: - requests.get("http://127.0.0.1:9000") - print("Recording API is available") - except: - print("Warning: Recording API may not be running. Start api.py first.") +if __name__ == '__main__': + # Check if API is running + try: + requests.get('http://127.0.0.1:9000') + print('Recording API is available') + except Exception as e: + print('Warning: Recording API may not be running. Start api.py first.') + print(f'Error: {e}') - # Run the agent - asyncio.run(run_agent()) + # Run the agent + asyncio.run(run_agent()) ``` -Contribution by Carlos A. Planchón. +Contribution by Carlos A. Planchón. Updated by Marian Schneider. ### Working with the Recorded Data From 0b4f5d3485c9b12dd94e2439214f1aa707b16eb9 Mon Sep 17 00:00:00 2001 From: Marian Schneider Date: Sun, 31 Aug 2025 23:24:22 +0200 Subject: [PATCH 4/7] docs: updated hooks examples to use timeouts, removed internal API call, and clearer wording --- docs/customize/hooks.mdx | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/docs/customize/hooks.mdx b/docs/customize/hooks.mdx index 06a8bccbf..62ad87d2a 100644 --- a/docs/customize/hooks.mdx +++ b/docs/customize/hooks.mdx @@ -60,14 +60,11 @@ async def my_step_hook(agent: Agent): screenshot_event = agent.browser_session.event_bus.dispatch(ScreenshotEvent(full_page=False)) await screenshot_event - # Example: Add initialization script for new pages - await agent.browser_session._cdp_add_init_script('console.log("Hello from hook!")') - # Example: pause agent execution and resume it based on some custom code if '/finished' in current_url: agent.pause() Path('result.txt').write_text(page_html) - input('Saved "completed" page content to result.txt, press [Enter] to resume...') + input('Saved "finished" page content to result.txt, press [Enter] to resume...') agent.resume() @@ -258,7 +255,7 @@ load_dotenv() def send_agent_history_step(data): """Send the agent step data to the recording API""" url = 'http://127.0.0.1:9000/post_agent_history_step' - response = requests.post(url, json=data) + response = requests.post(url, json=data, timeout=10) return response.json() @@ -360,7 +357,7 @@ async def run_agent(): if __name__ == '__main__': # Check if API is running try: - requests.get('http://127.0.0.1:9000') + requests.get('http://127.0.0.1:9000', timeout=5) print('Recording API is available') except Exception as e: print('Warning: Recording API may not be running. Start api.py first.') From 1c269fffe710e1c48d6f4698dbae22e09f6735f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Thu, 4 Sep 2025 08:14:43 -0700 Subject: [PATCH 5/7] Clean up hooks --- docs/customize/hooks.mdx | 276 +-------------------------------------- 1 file changed, 4 insertions(+), 272 deletions(-) diff --git a/docs/customize/hooks.mdx b/docs/customize/hooks.mdx index 62ad87d2a..345e072e0 100644 --- a/docs/customize/hooks.mdx +++ b/docs/customize/hooks.mdx @@ -2,7 +2,6 @@ title: "Lifecycle Hooks" description: "Customize agent behavior with lifecycle hooks" icon: "Wrench" -author: "Carlos A. Planchón" mode: "wide" --- @@ -59,6 +58,7 @@ async def my_step_hook(agent: Agent): # Example: Take a screenshot using the event system screenshot_event = agent.browser_session.event_bus.dispatch(ScreenshotEvent(full_page=False)) await screenshot_event + result = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True) # Example: pause agent execution and resume it based on some custom code if '/finished' in current_url: @@ -112,276 +112,8 @@ When working with agent hooks, you have access to the entire `Agent` instance. H ## Tips for Using Hooks -- **Avoid blocking operations**: Since hooks run in the same execution thread as the agent, try to keep them efficient or use asynchronous patterns. -- **Handle exceptions**: Make sure your hook functions handle exceptions gracefully to prevent interrupting the agent's main flow. -- **Use custom actions instead**: hooks are fairly advanced, most things can be implemented with [custom action functions](/customize/custom-functions) instead +- **Avoid blocking operations**: Since hooks run in the same execution thread as the agent, keep them efficient and avoid blocking operations. +- **Use custom tools instead**: hooks are fairly advanced, most things can be implemented with [custom tools](/customize/tools/basics) instead +- **Increase step_timeout**: If your hook is doing something that takes a long time, you can increase the `step_timeout` parameter in the `Agent(...)` constructor. --- - -## Complex Example: Agent Activity Recording System - -This comprehensive example demonstrates a complete implementation for recording and saving Browser-Use agent activity, consisting of both server and client components. - -### Setup Instructions - -To use this example, you'll need to: - -1. Set up the required dependencies: - - ```bash - uv pip install fastapi uvicorn prettyprinter pyobjtojson dotenv browser-use - ``` - -2. Create two separate Python files: - - - `api.py` - The FastAPI server component - - `client.py` - The Browser-Use agent with recording hook - -3. Run both components: - - Start the API server first: `python api.py` - - Then run the client: `python client.py` - -### Server Component (api.py) - -The server component handles receiving and storing the agent's activity data: - -```python -#!/usr/bin/env python3 - -# -# FastAPI API to record and save Browser-Use activity data. -# Save this code to api.py and run with `python api.py` -# - -import base64 -import json -from pathlib import Path - -import prettyprinter -import uvicorn -from fastapi import FastAPI, Request - -prettyprinter.install_extras() - - -# Utility function to save screenshots -def b64_to_png(b64_string: str, output_file): - """ - Convert a Base64-encoded string to a PNG file. - - :param b64_string: A string containing Base64-encoded data - :param output_file: The path to the output PNG file - """ - with open(output_file, 'wb') as f: - f.write(base64.b64decode(b64_string)) - - -# Initialize FastAPI app -app = FastAPI() - - -@app.post('/post_agent_history_step') -async def post_agent_history_step(request: Request): - data = await request.json() - prettyprinter.cpprint(data) - - # Ensure the "recordings" folder exists using pathlib - recordings_folder = Path('recordings') - recordings_folder.mkdir(exist_ok=True) - - # Determine the next file number by examining existing .json files - existing_numbers = [] - for item in recordings_folder.iterdir(): - if item.is_file() and item.suffix == '.json': - try: - file_num = int(item.stem) - existing_numbers.append(file_num) - except ValueError: - # In case the file name isn't just a number - pass - - if existing_numbers: - next_number = max(existing_numbers) + 1 - else: - next_number = 1 - - # Construct the file path - file_path = recordings_folder / f'{next_number}.json' - - # Save the JSON data to the file - with file_path.open('w') as f: - json.dump(data, f, indent=2) - - # Optionally save screenshot if needed - # if "website_screenshot" in data and data["website_screenshot"]: - # screenshot_folder = Path("screenshots") - # screenshot_folder.mkdir(exist_ok=True) - # b64_to_png(data["website_screenshot"], screenshot_folder / f"{next_number}.png") - - return {'status': 'ok', 'message': f'Saved to {file_path}'} - - -if __name__ == '__main__': - print('Starting Browser-Use recording API on http://0.0.0.0:9000') - uvicorn.run(app, host='0.0.0.0', port=9000) -``` - -### Client Component (client.py) - -The client component runs the Browser-Use agent with a recording hook: - -```python -#!/usr/bin/env python3 - -# -# Client to record and save Browser-Use activity. -# Save this code to client.py and run with `python client.py` -# - -import asyncio - -import requests -from dotenv import load_dotenv -from pyobjtojson import obj_to_json - -from browser_use import Agent -from browser_use.browser.events import ScreenshotEvent -from browser_use.llm import ChatOpenAI - -# Load environment variables (for API keys) -load_dotenv() - - -def send_agent_history_step(data): - """Send the agent step data to the recording API""" - url = 'http://127.0.0.1:9000/post_agent_history_step' - response = requests.post(url, json=data, timeout=10) - return response.json() - - -async def record_activity(agent_obj): - """Hook function that captures and records agent activity at each step""" - website_html = None - website_screenshot = None - urls_json_last_elem = None - model_thoughts_last_elem = None - model_outputs_json_last_elem = None - model_actions_json_last_elem = None - extracted_content_json_last_elem = None - - print('--- ON_STEP_START HOOK ---') - - # Capture current page state - cdp_session = await agent_obj.browser_session.get_or_create_cdp_session() - doc = await cdp_session.cdp_client.send.DOM.getDocument(session_id=cdp_session.session_id) - html_result = await cdp_session.cdp_client.send.DOM.getOuterHTML( - params={'nodeId': doc['root']['nodeId']}, session_id=cdp_session.session_id - ) - website_html = html_result['outerHTML'] - - # Get screenshot using event system - screenshot_event = agent_obj.browser_session.event_bus.dispatch(ScreenshotEvent(full_page=False)) - await screenshot_event - website_screenshot = await screenshot_event.event_result(raise_if_any=True, raise_if_none=True) - - # Make sure we have agent history - if hasattr(agent_obj, 'history'): - history = agent_obj.history - else: - history = None - print('Warning: Agent has no history') - return - - # Process model thoughts - model_thoughts = obj_to_json(obj=history.model_thoughts(), check_circular=False) - if len(model_thoughts) > 0: - model_thoughts_last_elem = model_thoughts[-1] - - # Process model outputs - model_outputs = history.model_outputs() - model_outputs_json = obj_to_json(obj=model_outputs, check_circular=False) - if len(model_outputs_json) > 0: - model_outputs_json_last_elem = model_outputs_json[-1] - - # Process model actions - model_actions = history.model_actions() - model_actions_json = obj_to_json(obj=model_actions, check_circular=False) - if len(model_actions_json) > 0: - model_actions_json_last_elem = model_actions_json[-1] - - # Process extracted content - extracted_content = history.extracted_content() - extracted_content_json = obj_to_json(obj=extracted_content, check_circular=False) - if len(extracted_content_json) > 0: - extracted_content_json_last_elem = extracted_content_json[-1] - - # Process URLs - urls = history.urls() - urls_json = obj_to_json(obj=urls, check_circular=False) - if len(urls_json) > 0: - urls_json_last_elem = urls_json[-1] - - # Create a summary of all data for this step - model_step_summary = { - 'website_html': website_html, - 'website_screenshot': website_screenshot, - 'url': urls_json_last_elem, - 'model_thoughts': model_thoughts_last_elem, - 'model_outputs': model_outputs_json_last_elem, - 'model_actions': model_actions_json_last_elem, - 'extracted_content': extracted_content_json_last_elem, - } - - print('--- MODEL STEP SUMMARY ---') - print(f'URL: {urls_json_last_elem}') - - # Send data to the API - result = send_agent_history_step(data=model_step_summary) - print(f'Recording API response: {result}') - - -async def run_agent(): - """Run the Browser-Use agent with the recording hook""" - agent = Agent( - task='Compare the price of gpt-4o and DeepSeek-V3', - llm=ChatOpenAI(model='gpt-5-mini'), - ) - - try: - print('Starting Browser-Use agent with recording hook') - await agent.run(on_step_start=record_activity, max_steps=30) - except Exception as e: - print(f'Error running agent: {e}') - - -if __name__ == '__main__': - # Check if API is running - try: - requests.get('http://127.0.0.1:9000', timeout=5) - print('Recording API is available') - except Exception as e: - print('Warning: Recording API may not be running. Start api.py first.') - print(f'Error: {e}') - - # Run the agent - asyncio.run(run_agent()) -``` - -Contribution by Carlos A. Planchón. Updated by Marian Schneider. - -### Working with the Recorded Data - -After running the agent, you'll find the recorded data in the `recordings` directory. Here's how you can use this data: - -1. **View recorded sessions**: Each JSON file contains a snapshot of agent activity for one step -2. **Extract screenshots**: You can modify the API to save screenshots separately -3. **Analyze agent behavior**: Use the recorded data to study how the agent navigates websites - -### Extending the Example - -You can extend this recording system in several ways: - -1. **Save screenshots separately**: Uncomment the screenshot saving code in the API -2. **Add a web dashboard**: Create a simple web interface to view recorded sessions -3. **Add session IDs**: Modify the API to group steps by agent session -4. **Add filtering**: Implement filters to record only specific types of actions From 2a3666d23a6ab84fe88342b8406731bfa351315b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Thu, 4 Sep 2025 08:43:15 -0700 Subject: [PATCH 6/7] Add redirect for old hooks link --- docs/docs.json | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/docs.json b/docs/docs.json index 6dfdb1d4f..98d01807e 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -66,6 +66,10 @@ { "source": "/development/observability", "destination": "/development/monitoring/observability" + }, + { + "source": "/development/hooks", + "destination": "/customize/hooks" } ], "navigation": { @@ -145,6 +149,14 @@ "development/setup/contribution-guide" ] }, + { + "group": "Advanced", + "icon": "gear", + "isDefaultOpen": false, + "pages": [ + "customize/hooks" + ] + }, { "group": "Monitoring", "icon": "chart-mixed", @@ -236,4 +248,4 @@ "linkedin": "https://linkedin.com/company/browser-use" } } -} +} \ No newline at end of file From f06a021b663903198d36894e4aef8be5aff68d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Magnus=20M=C3=BCller?= <67061560+MagMueller@users.noreply.github.com> Date: Thu, 4 Sep 2025 08:47:29 -0700 Subject: [PATCH 7/7] linter --- docs/docs.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs.json b/docs/docs.json index 98d01807e..58d40acfa 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -248,4 +248,4 @@ "linkedin": "https://linkedin.com/company/browser-use" } } -} \ No newline at end of file +}