Merge remote-tracking branch 'origin/HEAD' into multi-step

This commit is contained in:
magmueller
2024-12-04 17:31:21 +01:00
8 changed files with 93 additions and 4583 deletions

File diff suppressed because one or more lines are too long

View File

@@ -40,8 +40,7 @@ async def main():
result = await agent.run()
print(result)
if __name__ == "__main__":
asyncio.run(main())
asyncio.run(main())
```
And don't forget to add your API keys to your `.env` file.
@@ -79,6 +78,7 @@ https://github.com/user-attachments/assets/de73ee39-432c-4b97-b4e8-939fd7f323b3
- Add custom actions (e.g. save to file, push to database, notify me, get human input)
- Self-correcting
- Use any LLM supported by LangChain (e.g. gpt4o, gpt4o mini, claude 3.5 sonnet, llama 3.1 405b, etc.)
- Parallelize as many agents as you want
## Register custom actions
@@ -126,6 +126,30 @@ agent = Agent(task=task, llm=model, controller=controller)
await agent.run()
```
## Parallelize agents
In 99% cases you should use 1 Browser instance and parallelize the agents with 1 context per agent.
You can also reuse the context after the agent finishes.
```python
browser = Browser()
```
```python
for i in range(10):
# This create a new context and automatically closes it after the agent finishes (with `__aexit__`)
async with browser.new_context() as context:
agent = Agent(task=f"Task {i}", llm=model, browser_context=context)
# ... reuse context
```
If you would like to learn more about how this works under the hood you can learn more at [playwright browser-context](https://playwright.dev/python/docs/api/class-browsercontext).
### Context vs Browser
If you don't specify a `browser` or `browser_context` the agent will create a new browser instance and context.
## Get XPath history
To get the entire history of everything the agent has done, you can use the output of the `run` method:
@@ -138,7 +162,7 @@ print(history)
## Browser configuration
You can configure the browser using the `BrowserConfig` class.
You can configure the browser using the `BrowserConfig` and `BrowserContextConfig` classes.
The most important options are:

View File

@@ -1 +0,0 @@
[{"search_google": {"query": "Elon Musk"}}, {"click_element": {"index": 40, "num_clicks": 1, "xpath": "//div[2]/div[3]/span[1]/div[1]/div[1]/div[1]/div[3]/div[1]/button[2]"}}, {"click_element": {"index": 101, "num_clicks": 1, "xpath": "//div[4]/div[1]/div[13]/div[4]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/span[1]/a[1]"}}, {"done": {"text": "Opened the Wikipedia page for Elon Musk."}}]

View File

@@ -229,6 +229,7 @@ class Agent:
title=state.title,
tabs=state.tabs,
interacted_element=interacted_elements,
screenshot=state.screenshot,
)
history_item = AgentHistory(model_output=model_output, result=result, state=state_history)

View File

@@ -679,7 +679,7 @@ class BrowserContext:
def _enhanced_css_selector_for_element(self, element: DOMElementNode) -> str:
"""
Creates a CSS selector for a DOM element, handling various edge cases and special characters.
Creates a CSS selector for a DOM element, prioritizing unique identifiers.
Args:
element: The DOM element to create a selector for
@@ -691,14 +691,44 @@ class BrowserContext:
# Get base selector from XPath
css_selector = self._convert_simple_xpath_to_css_selector(element.xpath)
# Handle class attributes
# First priority - unique identifiers
UNIQUE_IDENTIFIERS = {'id', 'data-testid', 'data-id', 'data-qa', 'data-cy', 'data-test'}
# Second priority - attributes that often contain unique values
SEMI_UNIQUE_ATTRIBUTES = {
'href',
'src',
'value',
'name',
'for',
'aria-controls',
'action',
'data-url',
'data-href',
}
# Third priority - descriptive attributes
DESCRIPTIVE_ATTRIBUTES = {
'type',
'role',
'aria-label',
'title',
'placeholder',
'alt',
'aria-expanded',
'aria-haspopup',
'aria-selected',
'aria-current',
'aria-pressed',
'autocomplete',
}
# Handle class attributes first (keeping original logic)
if 'class' in element.attributes and element.attributes['class']:
classes = element.attributes['class'].split()
for class_name in classes:
# Skip empty class names
if not class_name:
continue
# Escape special characters in class names
if any(char in class_name for char in ':()[],>+~|.# '):
# Use attribute contains for special characters
@@ -706,32 +736,39 @@ class BrowserContext:
else:
css_selector += f'.{class_name}'
# Handle other attributes
for attribute, value in element.attributes.items():
if attribute == 'class':
continue
# Check for unique identifiers
for attr in UNIQUE_IDENTIFIERS:
if attr in element.attributes and element.attributes[attr]:
value = element.attributes[attr].strip()
if value:
value = value.replace('"', '\\"')
return f'{css_selector}[{attr}="{value}"]'
# Skip invalid attribute names
if not attribute.strip():
continue
# Then check semi-unique attributes
for attr in SEMI_UNIQUE_ATTRIBUTES:
if attr in element.attributes and element.attributes[attr]:
value = element.attributes[attr].strip()
if value and len(value) < 100: # Avoid extremely long values
value = value.replace('"', '\\"')
css_selector += f'[{attr}="{value}"]'
return css_selector # Return early as these are usually unique enough
# Escape special characters in attribute names
safe_attribute = attribute.replace(':', r'\:')
# Handle different value cases
if value == '':
css_selector += f'[{safe_attribute}]'
elif any(char in value for char in '"\'<>`'):
# Use contains for values with special characters
safe_value = value.replace('"', '\\"')
css_selector += f'[{safe_attribute}*="{safe_value}"]'
else:
css_selector += f'[{safe_attribute}="{value}"]'
# Finally, add descriptive attributes if selector isn't unique enough
attr_count = 0
for attr in DESCRIPTIVE_ATTRIBUTES:
if attr_count >= 2: # Limit to 2 descriptive attributes
break
if attr in element.attributes and element.attributes[attr]:
value = element.attributes[attr].strip()
if value and len(value) < 50: # Skip very long values
value = value.replace('"', '\\"')
css_selector += f'[{attr}="{value}"]'
attr_count += 1
return css_selector
except Exception:
# Fallback to a more basic selector if something goes wrong
# Fallback to a simple but unique selector
tag_name = element.tag_name or '*'
return f"{tag_name}[highlight_index='{element.highlight_index}']"

View File

@@ -19,7 +19,8 @@ async def test_highlight_elements():
# await page.goto('https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/service-plans')
# await page.goto('https://google.com/search?q=elon+musk')
# await page.goto('https://kayak.com')
await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
# await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
await page.goto('https://dictionary.cambridge.org')
await asyncio.sleep(1)

View File

@@ -13,7 +13,7 @@ from browser_use.browser.context import BrowserContextConfig
browser = Browser(
config=BrowserConfig(
disable_security=True,
headless=True,
headless=False,
new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
)
)

View File

@@ -4,7 +4,7 @@ description = "Make websites accessible for AI agents"
authors = [
{ name = "Gregor Zunic" }
]
version = "0.1.15"
version = "0.1.16"
readme = "README.md"
requires-python = ">=3.11"
classifiers = [