Fix small actions - improve prompt for multiaction - improve logging

This commit is contained in:
magmueller
2024-12-01 22:34:17 +01:00
parent 5128b6f0c4
commit 34ca52ea3a
6 changed files with 18 additions and 9 deletions

View File

@@ -20,7 +20,7 @@ class SystemPrompt:
1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
{
"current_state": {
"evaluation_previous_goal": "Success|Failed|Unknown-Brief description of why",
"evaluation_previous_goal": "Success|Failed|Unknown - Brief description of why",
"memory": "Brief description of what has been done-What you need to remember until the end of the task",
"next_goal": "What needs to be done with the next actions"
},
@@ -70,10 +70,11 @@ class SystemPrompt:
- Visual context helps verify element locations and relationships
7. ACTION SEQUENCING:
- Actions are executed in the order they appear in the list
- Actions are executed in the order they appear in the list
- Each action should logically follow from the previous one
- If the page changes between actions. The sequence is interrupted and you get the new page state.
- If the page changes between actions, the sequence is interrupted and you get the new page state.
- Only provide the action sequence until you think the DOM will change.
- Try to be efficient. If the dom changes a little bit we find the right element.
"""
def input_format(self) -> str:
@@ -152,11 +153,13 @@ Interactive elements:
if self.result:
for i, result in enumerate(self.result):
if result.extracted_content:
state_description += f'\nResult of action {i}: {result.extracted_content}'
state_description += (
f'\nResult of action {i + 1}/{len(self.result)}: {result.extracted_content}'
)
if result.error:
# only use last 300 characters of error
error = result.error[-self.max_error_length :]
state_description += f'\nError of action {i}: ...{error}'
state_description += f'\nError of action {i + 1}/{len(self.result)}: ...{error}'
if self.state.screenshot:
# Format message for vision model

View File

@@ -240,7 +240,7 @@ class Agent:
logger.info(f'🎯 Next Goal: {response.current_state.next_goal}')
for i, action in enumerate(response.action):
logger.info(
f'🛠️ Action {i}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}'
f'🛠️ Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}'
)
def _save_conversation(self, input_messages: list[BaseMessage], response: Any) -> None:

View File

@@ -73,7 +73,7 @@ class AgentHistory(BaseModel):
elements = []
for action in model_output.action:
index = action.get_index()
if index:
if index and index in selector_map:
el: DOMElementNode = selector_map[index]
elements.append(HistoryTreeProcessor.convert_dom_element_to_history_element(el))
else:

View File

@@ -70,6 +70,7 @@ class BrowserConfig:
minimum_wait_page_load_time: float = 0.5
wait_for_network_idle_page_load_time: float = 1
maximum_wait_page_load_time: float = 5
wait_between_actions: float = 1
extra_chromium_args: list[str] = field(default_factory=list)
browser_window_size: BrowserWindowSize = field(

View File

@@ -1,3 +1,4 @@
import asyncio
import logging
from main_content_extractor import MainContentExtractor
@@ -27,6 +28,7 @@ class Controller:
browser_config: BrowserConfig = BrowserConfig(),
):
self.browser = Browser(config=browser_config)
self.wait_between_actions = browser_config.wait_between_actions
self.registry = Registry()
self._register_default_actions()
@@ -79,7 +81,9 @@ class Controller:
await browser.switch_to_tab(-1)
return ActionResult(extracted_content=msg)
except Exception as e:
logger.warning(f'Element no longer available: {str(e)}')
logger.warning(
f'Element no longer available with index {params.index} - most likely the page changed'
)
return ActionResult(error=str(e))
@self.registry.action('Input text', param_model=InputTextAction, requires_browser=True)
@@ -182,6 +186,7 @@ class Controller:
results.append(await self.act(action))
if results[-1].is_done or results[-1].error:
break
await asyncio.sleep(self.wait_between_actions)
return results
@time_execution_sync('--act')

View File

@@ -91,7 +91,7 @@ async def close_file_dialog(browser: Browser):
async def main():
task = (
'Read my cv & find machine learning engineer jobs for me. '
'Read my cv & find machine learning engineer jobs in Bangalore for me. '
'Save them to a file'
'then start applying for them in new tabs - please not via job portals like linkedin, indeed, etc. '
'If you need more information or help, ask me.'