mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
57 lines
1.5 KiB
Python
57 lines
1.5 KiB
Python
"""
|
|
Simple try of the agent.
|
|
|
|
@dev You need to add OPENAI_API_KEY to your environment variables.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import sys
|
|
from doctest import OutputChecker
|
|
from pprint import pprint
|
|
|
|
import pytest
|
|
|
|
from browser_use.browser.browser import Browser, BrowserConfig
|
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
import asyncio
|
|
|
|
from langchain_openai import ChatOpenAI
|
|
|
|
from browser_use import ActionModel, Agent, AgentHistoryList, Controller
|
|
from browser_use.agent.views import AgentOutput
|
|
|
|
llm = ChatOpenAI(model='gpt-4o')
|
|
controller = Controller()
|
|
|
|
# use this test to ask the model questions about the page like
|
|
# which color do you see for bbox labels, list all with their label
|
|
# whats the smallest bboxes with labels and
|
|
|
|
|
|
@controller.registry.action(description='explain what you see on the screen and ask user for input')
|
|
async def explain_screen(text: str) -> str:
|
|
pprint(text)
|
|
answer = input('\nuser input next question: \n')
|
|
return answer
|
|
|
|
|
|
@controller.registry.action(description='done')
|
|
async def done(text: str) -> str:
|
|
# pprint(text)
|
|
return 'call explain_screen'
|
|
|
|
|
|
agent = Agent(
|
|
task='call explain_screen all the time the user asks you questions e.g. about the page like bbox which you see are labels - your task is to expalin it and get the next question',
|
|
llm=llm,
|
|
controller=controller,
|
|
browser=Browser(config=BrowserConfig(disable_security=True, headless=False)),
|
|
)
|
|
|
|
|
|
@pytest.mark.skip(reason='this is for local testing only')
|
|
async def test_vision():
|
|
history: AgentHistoryList = await agent.run(20)
|