mirror of
https://github.com/browser-use/browser-use
synced 2026-05-13 17:56:35 +02:00
have library judge show up in logs
This commit is contained in:
@@ -1730,6 +1730,15 @@ class Agent(Generic[Context, AgentStructuredOutput]):
|
||||
# Modify the last action result (that should have is_done=True) to include the judgement
|
||||
if self.history.history[-1].result[-1].is_done:
|
||||
self.history.history[-1].result[-1].judgement = judgement
|
||||
# Log the judgement verdict
|
||||
if judgement:
|
||||
verdict_color = '\033[32m' if judgement.verdict else '\033[31m'
|
||||
verdict_text = '✅ PASS' if judgement.verdict else '❌ FAIL'
|
||||
judge_log = f'\n⚖️ {verdict_color}Judge Verdict: {verdict_text}\033[0m\n'
|
||||
if judgement.failure_reason:
|
||||
judge_log += f' Failure: {judgement.failure_reason}\n'
|
||||
judge_log += f' {judgement.reasoning}\n'
|
||||
self.logger.info(judge_log)
|
||||
|
||||
break
|
||||
else:
|
||||
|
||||
@@ -21,20 +21,15 @@ from browser_use.llm import ChatGoogle
|
||||
|
||||
async def main():
|
||||
llm = ChatBrowserUse()
|
||||
# Use Claude for judging since it supports vision + structured output
|
||||
# Use gemini flash for judging since it supports vision + structured output
|
||||
judge_llm = ChatGoogle(model='gemini-flash-latest')
|
||||
task = "Search Google for 'what is browser automation' and tell me the top 3 results"
|
||||
agent = Agent(task=task, llm=llm, judge_llm=judge_llm)
|
||||
agent = Agent(task=task, llm=llm, use_judge=True, judge_llm=judge_llm)
|
||||
history = await agent.run()
|
||||
|
||||
# Print the judgement result
|
||||
# Get the judgement result
|
||||
if history.is_judged():
|
||||
judgement = history.judgement()
|
||||
print('\n' + '=' * 80)
|
||||
print('JUDGE EVALUATION')
|
||||
print(judgement)
|
||||
else:
|
||||
print('\nNo judgement available (task may not have completed or use_judge=False)')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user