Merge remote-tracking branch 'origin/HEAD' into multi-step

2026-05-06 17:52:15 +02:00 · 2024-12-04 17:31:21 +01:00
parent a299b68f2b 45a46f8fbd
commit f27e65500a
8 changed files with 93 additions and 4583 deletions
--- a/AgentHistoryList.json
+++ b/AgentHistoryList.json
--- a/README.md
+++ b/README.md
@@ -40,8 +40,7 @@ async def main():
    result = await agent.run()
    print(result)

-if __name__ == "__main__":
-    asyncio.run(main())
+asyncio.run(main())
 ```

 And don't forget to add your API keys to your `.env` file.
@@ -79,6 +78,7 @@ https://github.com/user-attachments/assets/de73ee39-432c-4b97-b4e8-939fd7f323b3
 - Add custom actions (e.g. save to file, push to database, notify me, get human input)
 - Self-correcting
 - Use any LLM supported by LangChain (e.g. gpt4o, gpt4o mini, claude 3.5 sonnet, llama 3.1 405b, etc.)
+- Parallelize as many agents as you want

 ## Register custom actions

@@ -126,6 +126,30 @@ agent = Agent(task=task, llm=model, controller=controller)
 await agent.run()
 ```

+## Parallelize agents
+
+In 99% cases you should use 1 Browser instance and parallelize the agents with 1 context per agent.
+You can also reuse the context after the agent finishes.
+
+```python
+browser = Browser()
+```
+
+```python
+for i in range(10):
+    # This create a new context and automatically closes it after the agent finishes (with `__aexit__`)
+    async with browser.new_context() as context:
+        agent = Agent(task=f"Task {i}", llm=model, browser_context=context)
+
+        # ... reuse context
+```
+
+If you would like to learn more about how this works under the hood you can learn more at [playwright browser-context](https://playwright.dev/python/docs/api/class-browsercontext).
+
+### Context vs Browser
+
+If you don't specify a `browser` or `browser_context` the agent will create a new browser instance and context.
+
 ## Get XPath history

 To get the entire history of everything the agent has done, you can use the output of the `run` method:
@@ -138,7 +162,7 @@ print(history)

 ## Browser configuration

-You can configure the browser using the `BrowserConfig` class.
+You can configure the browser using the `BrowserConfig` and `BrowserContextConfig` classes.

 The most important options are:

--- a/actions.json
+++ b/actions.json
@@ -1 +0,0 @@
-[{"search_google": {"query": "Elon Musk"}}, {"click_element": {"index": 40, "num_clicks": 1, "xpath": "//div[2]/div[3]/span[1]/div[1]/div[1]/div[1]/div[3]/div[1]/button[2]"}}, {"click_element": {"index": 101, "num_clicks": 1, "xpath": "//div[4]/div[1]/div[13]/div[4]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/span[1]/a[1]"}}, {"done": {"text": "Opened the Wikipedia page for Elon Musk."}}]
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -229,6 +229,7 @@ class Agent:
 			title=state.title,
 			tabs=state.tabs,
 			interacted_element=interacted_elements,
+			screenshot=state.screenshot,
 		)

 		history_item = AgentHistory(model_output=model_output, result=result, state=state_history)
--- a/browser_use/browser/context.py
+++ b/browser_use/browser/context.py
@@ -679,7 +679,7 @@ class BrowserContext:

 	def _enhanced_css_selector_for_element(self, element: DOMElementNode) -> str:
 		"""
-		Creates a CSS selector for a DOM element, handling various edge cases and special characters.
+		Creates a CSS selector for a DOM element, prioritizing unique identifiers.

 		Args:
 		        element: The DOM element to create a selector for
@@ -691,14 +691,44 @@ class BrowserContext:
 			# Get base selector from XPath
 			css_selector = self._convert_simple_xpath_to_css_selector(element.xpath)

-			# Handle class attributes
+			# First priority - unique identifiers
+			UNIQUE_IDENTIFIERS = {'id', 'data-testid', 'data-id', 'data-qa', 'data-cy', 'data-test'}
+
+			# Second priority - attributes that often contain unique values
+			SEMI_UNIQUE_ATTRIBUTES = {
+				'href',
+				'src',
+				'value',
+				'name',
+				'for',
+				'aria-controls',
+				'action',
+				'data-url',
+				'data-href',
+			}
+
+			# Third priority - descriptive attributes
+			DESCRIPTIVE_ATTRIBUTES = {
+				'type',
+				'role',
+				'aria-label',
+				'title',
+				'placeholder',
+				'alt',
+				'aria-expanded',
+				'aria-haspopup',
+				'aria-selected',
+				'aria-current',
+				'aria-pressed',
+				'autocomplete',
+			}
+
+			# Handle class attributes first (keeping original logic)
 			if 'class' in element.attributes and element.attributes['class']:
 				classes = element.attributes['class'].split()
 				for class_name in classes:
-					# Skip empty class names
 					if not class_name:
 						continue
-
 					# Escape special characters in class names
 					if any(char in class_name for char in ':()[],>+~|.# '):
 						# Use attribute contains for special characters
@@ -706,32 +736,39 @@ class BrowserContext:
 					else:
 						css_selector += f'.{class_name}'

-			# Handle other attributes
-			for attribute, value in element.attributes.items():
-				if attribute == 'class':
-					continue
+			# Check for unique identifiers
+			for attr in UNIQUE_IDENTIFIERS:
+				if attr in element.attributes and element.attributes[attr]:
+					value = element.attributes[attr].strip()
+					if value:
+						value = value.replace('"', '\\"')
+						return f'{css_selector}[{attr}="{value}"]'

-				# Skip invalid attribute names
-				if not attribute.strip():
-					continue
+			# Then check semi-unique attributes
+			for attr in SEMI_UNIQUE_ATTRIBUTES:
+				if attr in element.attributes and element.attributes[attr]:
+					value = element.attributes[attr].strip()
+					if value and len(value) < 100:  # Avoid extremely long values
+						value = value.replace('"', '\\"')
+						css_selector += f'[{attr}="{value}"]'
+						return css_selector  # Return early as these are usually unique enough

-				# Escape special characters in attribute names
-				safe_attribute = attribute.replace(':', r'\:')
-
-				# Handle different value cases
-				if value == '':
-					css_selector += f'[{safe_attribute}]'
-				elif any(char in value for char in '"\'<>`'):
-					# Use contains for values with special characters
-					safe_value = value.replace('"', '\\"')
-					css_selector += f'[{safe_attribute}*="{safe_value}"]'
-				else:
-					css_selector += f'[{safe_attribute}="{value}"]'
+			# Finally, add descriptive attributes if selector isn't unique enough
+			attr_count = 0
+			for attr in DESCRIPTIVE_ATTRIBUTES:
+				if attr_count >= 2:  # Limit to 2 descriptive attributes
+					break
+				if attr in element.attributes and element.attributes[attr]:
+					value = element.attributes[attr].strip()
+					if value and len(value) < 50:  # Skip very long values
+						value = value.replace('"', '\\"')
+						css_selector += f'[{attr}="{value}"]'
+						attr_count += 1

 			return css_selector

 		except Exception:
-			# Fallback to a more basic selector if something goes wrong
+			# Fallback to a simple but unique selector
 			tag_name = element.tag_name or '*'
 			return f"{tag_name}[highlight_index='{element.highlight_index}']"

--- a/browser_use/browser/tests/test_clicks.py
+++ b/browser_use/browser/tests/test_clicks.py
@@ -19,7 +19,8 @@ async def test_highlight_elements():
 		# await page.goto('https://help.sap.com/docs/sap-ai-core/sap-ai-core-service-guide/service-plans')
 		# await page.goto('https://google.com/search?q=elon+musk')
 		# await page.goto('https://kayak.com')
-		await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
+		# await page.goto('https://www.w3schools.com/tags/tryit.asp?filename=tryhtml_iframe')
+		await page.goto('https://dictionary.cambridge.org')

 		await asyncio.sleep(1)

--- a/examples/parallel_agents.py
+++ b/examples/parallel_agents.py
@@ -13,7 +13,7 @@ from browser_use.browser.context import BrowserContextConfig
 browser = Browser(
 	config=BrowserConfig(
 		disable_security=True,
-		headless=True,
+		headless=False,
 		new_context_config=BrowserContextConfig(save_recording_path='./tmp/recordings'),
 	)
 )
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ description = "Make websites accessible for AI agents"
 authors = [
    { name = "Gregor Zunic" }
 ]
-version = "0.1.15"
+version = "0.1.16"
 readme = "README.md"
 requires-python = ">=3.11"
 classifiers = [
				`@@ -1 +0,0 @@`
				[{"search_google": {"query": "Elon Musk"}}, {"click_element": {"index": 40, "num_clicks": 1, "xpath": "//div[2]/div[3]/span[1]/div[1]/div[1]/div[1]/div[3]/div[1]/button[2]"}}, {"click_element": {"index": 101, "num_clicks": 1, "xpath": "//div[4]/div[1]/div[13]/div[4]/div[1]/div[2]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[3]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/div[1]/span[1]/a[1]"}}, {"done": {"text": "Opened the Wikipedia page for Elon Musk."}}]