Files
browser-use/tests/test_core_functionality.py
Gregor Žunič 89c63fdd63 Added custom actions registry and fixed extraction layer (#20)
* Validator

* Test mind2web

* Cleaned up logger

* Pytest logger

* Cleaned up logger

* Disable flag for human input

* Multiple clicks per button

* Multiple clicks per button

* More structured system prompt

* Fields with description

* System prompt example

* One logger

* Cleaner logging

* Log step in step function

* Fix critical clicking error - wrong argument used

* Improved thought process of agent

* Improve system prompt

* Remove human input message

* Custome action registration

* Pydantic model for custom actions

* Pydantic model for custome output

* Runs through, model outputs functions, but not called yet

* Work in progress - description for custome actions

* Description works, but schema not yet

* Model can call the right action - but is not executed

* Seperate is_controller_action  and is_custom_action

* Works! Model can call custom function

* Use registry for action, but result is not feed back to model

* Include result in messages

* Works with custom function - but typing is not correct

* Renamed registry

* First test cases

* Captcha tests

* Pydantic for tests

* Improve prompts for multy step

* System prompt structure

* Handle errors like validation error

* Refactor error handling in agent

* Refactor error handling in agent

* Improved logging

* Update view

* Fix click parameter to index

* Simplify dynamic actions

* Use run instead of step

* Rename history

* Rename AgentService to Agent

* Rename ControllerService to Controller

* Pytest file

* Rename get state

* Rename BrowserService

* reversed dom extraction recursion to while

* Rename use_vision

* Rename use_vision

* reversed dom tree items and made browser less anoying

* Renaming and fixing type errors

* Renamed class names for agent

* updated requirements

* Update prompt

* Action registration works for user and controller

* Fix done call by returning ActionResult

* Fix if result is none

* Rename AgentOutput and ActionModel

* Improved prompt Passes 6/8 tests from test_agent_actions

* Calculate token cost

* Improve display

* Simplified logger

* Test function calling

* created super simple xpath extraction algo

* Tests logging

* tiny fixes to dom extraction

* Remove test

* Dont log number of clicks

* Pytest file

* merged per element js checks

* Check if driver is still open

* super fast processing

* fixed agent planning and stuff

* Fix example

* Fix example

* Improve error

* Improved error correction

* New line for step

* small type error fixes

* Test for pydantic

* Fix line

* Removed sample

* fixed readme and examples

---------

Co-authored-by: magmueller <mamagnus00@gmail.com>
2024-11-15 21:42:02 +01:00

158 lines
5.1 KiB
Python

import asyncio
import pytest
from langchain_openai import ChatOpenAI
from browser_use.agent.service import Agent
from browser_use.controller.service import Controller
@pytest.fixture
def llm():
"""Initialize language model for testing"""
return ChatOpenAI(model='gpt-4o') # Use appropriate model
@pytest.fixture
async def controller():
"""Initialize the controller"""
controller = Controller()
try:
yield controller
finally:
if controller.browser:
controller.browser.close(force=True)
@pytest.mark.asyncio
async def test_search_google(llm, controller):
"""Test 'Search Google' action"""
agent = Agent(
task="Search Google for 'OpenAI'.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=2)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'search_google' in action_names
@pytest.mark.asyncio
async def test_go_to_url(llm, controller):
"""Test 'Navigate to URL' action"""
agent = Agent(
task="Navigate to 'https://www.python.org'.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=2)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'go_to_url' in action_names
@pytest.mark.asyncio
async def test_go_back(llm, controller):
"""Test 'Go back' action"""
agent = Agent(
task="Go to 'https://www.example.com', then go back.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=3)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'go_to_url' in action_names
assert 'go_back' in action_names
@pytest.mark.asyncio
async def test_click_element(llm, controller):
"""Test 'Click element' action"""
agent = Agent(
task="Go to 'https://www.python.org' and click on the first link.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=4)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'go_to_url' in action_names
assert 'click_element' in action_names
@pytest.mark.asyncio
async def test_input_text(llm, controller):
"""Test 'Input text' action"""
agent = Agent(
task="Go to 'https://www.google.com' and input 'OpenAI' into the search box.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=4)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'go_to_url' in action_names
assert 'input_text' in action_names
@pytest.mark.asyncio
async def test_switch_tab(llm, controller):
"""Test 'Switch tab' action"""
agent = Agent(
task="Open new tabs with 'https://www.google.com' and 'https://www.wikipedia.org', then switch to the first tab.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=6)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
open_tab_count = action_names.count('open_tab')
assert open_tab_count >= 2
assert 'switch_tab' in action_names
@pytest.mark.asyncio
async def test_open_new_tab(llm, controller):
"""Test 'Open new tab' action"""
agent = Agent(
task="Open a new tab and go to 'https://www.example.com'.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=3)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'open_tab' in action_names
@pytest.mark.asyncio
async def test_extract_page_content(llm, controller):
"""Test 'Extract page content' action"""
agent = Agent(
task="Go to 'https://www.example.com' and extract the page content.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=3)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'go_to_url' in action_names
assert 'extract_content' in action_names
@pytest.mark.asyncio
async def test_done_action(llm, controller):
"""Test 'Complete task' action"""
agent = Agent(
task="Navigate to 'https://www.example.com' and signal that the task is done.",
llm=llm,
controller=controller,
)
history = await agent.run(max_steps=3)
actions = [h.model_output.action for h in history if h.model_output and h.model_output.action]
action_names = [list(action.model_dump(exclude_unset=True).keys())[0] for action in actions]
assert 'go_to_url' in action_names
assert 'done' in action_names