mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
Merge remote-tracking branch 'origin/HEAD' into multi-step
This commit is contained in:
File diff suppressed because one or more lines are too long
30
README.md
30
README.md
@@ -40,8 +40,7 @@ async def main():
|
||||
result = await agent.run()
|
||||
print(result)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
asyncio.run(main())
|
||||
```
|
||||
|
||||
And don't forget to add your API keys to your `.env` file.
|
||||
@@ -79,6 +78,7 @@ https://github.com/user-attachments/assets/de73ee39-432c-4b97-b4e8-939fd7f323b3
|
||||
- Add custom actions (e.g. save to file, push to database, notify me, get human input)
|
||||
- Self-correcting
|
||||
- Use any LLM supported by LangChain (e.g. gpt4o, gpt4o mini, claude 3.5 sonnet, llama 3.1 405b, etc.)
|
||||
- Parallelize as many agents as you want
|
||||
|
||||
## Register custom actions
|
||||
|
||||
@@ -126,6 +126,30 @@ agent = Agent(task=task, llm=model, controller=controller)
|
||||
await agent.run()
|
||||
```
|
||||
|
||||
## Parallelize agents
|
||||
|
||||
In 99% cases you should use 1 Browser instance and parallelize the agents with 1 context per agent.
|
||||
You can also reuse the context after the agent finishes.
|
||||
|
||||
```python
|
||||
browser = Browser()
|
||||
```
|
||||
|
||||
```python
|
||||
for i in range(10):
|
||||
# This create a new context and automatically closes it after the agent finishes (with `__aexit__`)
|
||||
async with browser.new_context() as context:
|
||||
agent = Agent(task=f"Task {i}", llm=model, browser_context=context)
|
||||
|
||||
# ... reuse context
|
||||
```
|
||||
|
||||
If you would like to learn more about how this works under the hood you can learn more at [playwright browser-context](https://playwright.dev/python/docs/api/class-browsercontext).
|
||||
|
||||
### Context vs Browser
|
||||
|
||||
If you don't specify a `browser` or `browser_context` the agent will create a new browser instance and context.
|
||||
|
||||
## Get XPath history
|
||||
|
||||
To get the entire history of everything the agent has done, you can use the output of the `run` method:
|
||||
@@ -138,7 +162,7 @@ print(history)
|
||||
|
||||
## Browser configuration
|
||||
|
||||
You can configure the browser using the `BrowserConfig` class.
|
||||
You can configure the browser using the `BrowserConfig` and `BrowserContextConfig` classes.
|
||||
|
||||
The most important options are:
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
[{"search_google": {"query": "Elon Musk"}}, {"click_element": {"index": 40, "num_clicks": 1, "xpath": "//div[2]/div[3]/span[1]/div[1]/div[1]/div[1]/div[3]/div[1]/button[2]"}}, {"click_element": {"index": 101, "num_clicks": 1, "xpath": "//div[4]/div[1]/div[13]/div[4]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/span[1]/a[1]"}}, {"done": {"text": "Opened the Wikipedia page for Elon Musk."}}]
|
||||
@@ -229,6 +229,7 @@ class Agent:
|
||||
title=state.title,
|
||||
tabs=state.tabs,
|
||||
interacted_element=interacted_elements,
|
||||
screenshot=state.screenshot,
|
||||
)
|
||||
|
||||
history_item = AgentHistory(model_output=model_output, result=result, state=state_history)
|
||||
|
||||
@@ -679,7 +679,7 @@ class BrowserContext:
|
||||
|
||||
def _enhanced_css_selector_for_element(self, element: DOMElementNode) -> str:
|
||||
"""
|
||||
Creates a CSS selector for a DOM element, handling various edge cases and special characters.
|
||||
Creates a CSS selector for a DOM element, prioritizing unique identifiers.
|
||||
|
||||
Args:
|
||||
element: The DOM element to create a selector for
|
||||
@@ -691,14 +691,44 @@ class BrowserContext:
|
||||
# Get base selector from XPath
|
||||
css_selector = self._convert_simple_xpath_to_css_selector(element.xpath)
|
||||
|
||||
# Handle class attributes
|
||||
# First priority - unique identifiers
|
||||
UNIQUE_IDENTIFIERS = {'id', 'data-testid', 'data-id', 'data-qa', 'data-cy', 'data-test'}
|
||||
|
||||
# Second priority - attributes that often contain unique values
|
||||
SEMI_UNIQUE_ATTRIBUTES = {
|
||||
'href',
|
||||
'src',
|
||||
'value',
|
||||
'name',
|
||||
'for',
|
||||
'aria-controls',
|
||||
'action',
|
||||
'data-url',
|
||||
'data-href',
|
||||
}
|
||||
|
||||
# Third priority - descriptive attributes
|
||||
DESCRIPTIVE_ATTRIBUTES = {
|
||||
'type',
|
||||
'role',
|
||||
'aria-label',
|
||||
'title',
|
||||
'placeholder',
|
||||
'alt',
|
||||
'aria-expanded',
|
||||
'aria-haspopup',
|
||||
'aria-selected',
|
||||
'aria-current',
|
||||
'aria-pressed',
|
||||
'autocomplete',
|
||||
}
|
||||
|
||||
# Handle class attributes first (keeping original logic)
|
||||
if 'class' in element.attributes and element.attributes['class']:
|
||||
classes = element.attributes['class'].split()
|
||||
for class_name in classes:
|
||||
# Skip empty class names
|
||||
if not class_name:
|
||||
continue
|
||||
|
||||
# Escape special characters in class names
|
||||
if any(char in class_name for char in ':()[],>+~|.# '):
|
||||
# Use attribute contains for special characters
|
||||
@@ -706,32 +736,39 @@ class BrowserContext:
|
||||
else:
|
||||
css_selector += f'.{class_name}'
|
||||
|
||||
# Handle other attributes
|
||||
for attribute, value in element.attributes.items():
|
||||
if attribute == 'class':
|
||||
continue
|
||||
# Check for unique identifiers
|
||||
for attr in UNIQUE_IDENTIFIERS:
|
||||
if attr in element.attributes and element.attributes[attr]:
|
||||
value = element.attributes[attr].strip()
|
||||
if value:
|
||||
value = value.replace('"', '\\"')
|
||||
return f'{css_selector}[{attr}="{value}"]'
|
||||
|
||||
# Skip invalid attribute names
|
||||
if not attribute.strip():
|
||||
continue
|
||||
# Then check semi-unique attributes
|
||||
for attr in SEMI_UNIQUE_ATTRIBUTES:
|
||||
if attr in element.attributes and element.attributes[attr]:
|
||||
value = element.attributes[attr].strip()
|
||||
if value and len(value) < 100: # Avoid extremely long values
|
||||
value = value.replace('"', '\\"')
|
||||
css_selector += f'[{attr}="{value}"]'
|
||||
return css_selector # Return early as these are usually unique enough
|
||||
|
||||
# Escape special characters in attribute names
|
||||
safe_attribute = attribute.replace(':', r'\:')
|
||||
|
||||
# Handle different value cases
|
||||
if value == '':
|
||||
css_selector += f'[{safe_attribute}]'
|
||||
elif any(char in value for char in '"\'<>`'):
|
||||
# Use contains for values with special characters
|
||||
safe_value = value.replace('"', '\\"')
|
||||
css_selector += f'[{safe_attribute}*="{safe_value}"]'
|
||||
else:
|
||||
css_selector += f'[{safe_attribute}="{value}"]'
|
||||
# Finally, add descriptive attributes if selector isn't unique enough
|
||||
attr_count = 0
|
||||
for attr in DESCRIPTIVE_ATTRIBUTES:
|
||||
if attr_count >= 2: # Limit to 2 descriptive attributes
|
||||
break
|
||||
if attr in element.attributes and element.attributes[attr]:
|
||||
value = element.attributes[attr].strip()
|
||||
if value and len(value) < 50: # Skip very long values
|
||||
value = value.replace('"', '\\"')
|
||||
css_selector += f'[{attr}="{value}"]'
|
||||
attr_count += 1
|
||||
|
||||
return css_selector
|
||||
|
||||
except Exception:
|
||||
# Fallback to a more basic selector if something goes wrong
|
||||
# Fallback to a simple but unique selector
|
||||
tag_name = element.tag_name or '*'
|
||||
return f"{tag_name}[highlight_index='{element.highlight_index}']"
|
||||
|
||||
|
||||
@@ -19,7 +19,8 @@ async def test_highlight_elements():
|
||||
# await page.goto('https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/service-plans')
|
||||
# await page.goto('https://google.com/search?q=elon+musk')
|
||||
# await page.goto('https://kayak.com')
|
||||
await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
|
||||
# await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
|
||||
await page.goto('https://dictionary.cambridge.org')
|
||||
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ from browser_use.browser.context import BrowserContextConfig
|
||||
browser = Browser(
|
||||
config=BrowserConfig(
|
||||
disable_security=True,
|
||||
headless=True,
|
||||
headless=False,
|
||||
new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
|
||||
)
|
||||
)
|
||||
|
||||
@@ -4,7 +4,7 @@ description = "Make websites accessible for AI agents"
|
||||
authors = [
|
||||
{ name = "Gregor Zunic" }
|
||||
]
|
||||
version = "0.1.15"
|
||||
version = "0.1.16"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.11"
|
||||
classifiers = [
|
||||
|
||||
Reference in New Issue
Block a user