have library judge show up in logs

2026-05-13 17:56:35 +02:00 · 2025-10-31 20:30:03 -07:00
parent 15911c8c72
commit 10ec99dd26
2 changed files with 12 additions and 8 deletions
--- a/browser_use/agent/service.py
+++ b/browser_use/agent/service.py
@@ -1730,6 +1730,15 @@ class Agent(Generic[Context, AgentStructuredOutput]):
 						# Modify the last action result (that should have is_done=True) to include the judgement
 						if self.history.history[-1].result[-1].is_done:
 							self.history.history[-1].result[-1].judgement = judgement
+							# Log the judgement verdict
+							if judgement:
+								verdict_color = '\033[32m' if judgement.verdict else '\033[31m'
+								verdict_text = '✅ PASS' if judgement.verdict else '❌ FAIL'
+								judge_log = f'\n⚖️  {verdict_color}Judge Verdict: {verdict_text}\033[0m\n'
+								if judgement.failure_reason:
+									judge_log += f'   Failure: {judgement.failure_reason}\n'
+								judge_log += f'   {judgement.reasoning}\n'
+								self.logger.info(judge_log)

 					break
 			else:
--- a/examples/features/judge_trace.py
+++ b/examples/features/judge_trace.py
@@ -21,20 +21,15 @@ from browser_use.llm import ChatGoogle

 async def main():
 	llm = ChatBrowserUse()
-	# Use Claude for judging since it supports vision + structured output
+	# Use gemini flash for judging since it supports vision + structured output
 	judge_llm = ChatGoogle(model='gemini-flash-latest')
 	task = "Search Google for 'what is browser automation' and tell me the top 3 results"
-	agent = Agent(task=task, llm=llm, judge_llm=judge_llm)
+	agent = Agent(task=task, llm=llm, use_judge=True, judge_llm=judge_llm)
 	history = await agent.run()

-	# Print the judgement result
+	# Get the judgement result
 	if history.is_judged():
 		judgement = history.judgement()
-		print('\n' + '=' * 80)
-		print('JUDGE EVALUATION')
-		print(judgement)
-	else:
-		print('\nNo judgement available (task may not have completed or use_judge=False)')


 if __name__ == '__main__':