diff --git a/browser_use/controller/tests/test_controller_structured_output.py b/browser_use/controller/tests/test_controller_structured_output.py deleted file mode 100644 index d638175ea..000000000 --- a/browser_use/controller/tests/test_controller_structured_output.py +++ /dev/null @@ -1,55 +0,0 @@ -import asyncio - -from pydantic import BaseModel - -from browser_use.agent.service import Agent -from browser_use.agent.utils import create_pydantic_model_from_schema -from browser_use.agent.views import AgentOutput -from browser_use.controller.service import Controller -from browser_use.llm.openai.chat import ChatOpenAI - - -class OutputModel(BaseModel): - """Test output model""" - - city: str - country: str - - -async def test_optimized_schema(): - """Test the optimized schema generation and save to file.""" - - # Create controller and get all registered actions - controller = Controller() - ActionModel = controller.registry.create_action_model() - - # Create the agent output model with custom actions - agent_output_model = AgentOutput.type_with_custom_actions(ActionModel) - - # # Get original schema for comparison - # original_schema = agent_output_model.model_json_schema() - - # # Create the optimized schema - # optimized_schema = SchemaOptimizer.create_optimized_json_schema(agent_output_model) - - scgena = create_pydantic_model_from_schema(OutputModel.model_json_schema(), 'OutputModel') - - agent = Agent( - task='What is the capital of France? Do not use the internet, just output the done function.', - llm=ChatOpenAI(model='gpt-4.1-mini'), - controller=controller, - output_model_schema=scgena, - ) - - history = await agent.run() - - if history.structured_output: - # print(history.structured_output.city, history.structured_output.country) - print(OutputModel.model_validate_json(history.final_result() or '{}')) - else: - print('No structured output') - print(history.final_result()) - - -if __name__ == '__main__': - asyncio.run(test_optimized_schema()) diff --git a/eval/service.py b/eval/service.py index eae586cf3..8c5fe8b5f 100644 --- a/eval/service.py +++ b/eval/service.py @@ -57,11 +57,11 @@ from lmnr import AsyncLaminarClient, Laminar, observe from PIL import Image from pydantic import BaseModel -from browser_use.agent.utils import create_pydantic_model_from_schema from browser_use.llm.anthropic.chat import ChatAnthropic from browser_use.llm.base import BaseChatModel from browser_use.llm.google.chat import ChatGoogle from browser_use.llm.openai.chat import ChatOpenAI +from eval.utils import create_pydantic_model_from_schema MAX_IMAGE = 5 diff --git a/eval/tests/test_structured_output.py b/eval/tests/test_structured_output.py new file mode 100644 index 000000000..adc6f5d74 --- /dev/null +++ b/eval/tests/test_structured_output.py @@ -0,0 +1,635 @@ +import json +import traceback + +from pydantic import BaseModel + +from eval.utils import create_pydantic_model_from_schema + + +class OutputModel(BaseModel): + """Test output model""" + + city: str + country: str + + +# async def test_optimized_schema(): +# """Test the optimized schema generation and save to file.""" + +# # Create controller and get all registered actions +# controller = Controller() +# ActionModel = controller.registry.create_action_model() + +# # Create the agent output model with custom actions +# agent_output_model = AgentOutput.type_with_custom_actions(ActionModel) + +# # # Get original schema for comparison +# # original_schema = agent_output_model.model_json_schema() + +# # # Create the optimized schema +# # optimized_schema = SchemaOptimizer.create_optimized_json_schema(agent_output_model) + +# scgena = create_pydantic_model_from_schema(OutputModel.model_json_schema(), 'OutputModel') + +# agent = Agent( +# task='What is the capital of France? Do not use the internet, just output the done function.', +# llm=ChatOpenAI(model='gpt-4.1-mini'), +# controller=controller, +# output_model_schema=scgena, +# ) + +# history = await agent.run() + +# if history.structured_output: +# # print(history.structured_output.city, history.structured_output.country) +# print(OutputModel.model_validate_json(history.final_result() or '{}')) +# else: +# print('No structured output') +# print(history.final_result()) + + +def test_basic_types(): + """Test basic JSON schema types""" + print('=== Testing Basic Types ===') + + schema = { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'age': {'type': 'integer'}, + 'height': {'type': 'number'}, + 'is_active': {'type': 'boolean'}, + 'tags': {'type': 'array', 'items': {'type': 'string'}}, + 'metadata': {'type': 'object'}, + }, + 'required': ['name', 'age'], + } + + try: + Model = create_pydantic_model_from_schema(schema, 'BasicTypesModel') + print(f'✅ Created model: {Model}') + + # Test valid data + instance = Model(name='John', age=30, height=5.9, is_active=True, tags=['dev', 'python'], metadata={'role': 'developer'}) + print(f'✅ Valid instance: {instance}') + + # Test minimal required data + minimal = Model(name='Jane', age=25) + print(f'✅ Minimal instance: {minimal}') + + print('✅ Basic types test passed\n') + return True + except Exception as e: + print(f'❌ Basic types test failed: {e}') + traceback.print_exc() + return False + + +def test_nested_objects(): + """Test deeply nested object structures""" + print('=== Testing Nested Objects ===') + + schema = { + 'type': 'object', + 'properties': { + 'user': { + 'type': 'object', + 'properties': { + 'profile': { + 'type': 'object', + 'properties': { + 'personal': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'age': {'type': 'integer'}, + 'contacts': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': {'type': {'type': 'string'}, 'value': {'type': 'string'}}, + 'required': ['type', 'value'], + }, + }, + }, + 'required': ['name'], + }, + 'settings': { + 'type': 'object', + 'properties': {'theme': {'type': 'string'}, 'notifications': {'type': 'boolean'}}, + }, + }, + 'required': ['personal'], + } + }, + 'required': ['profile'], + } + }, + 'required': ['user'], + } + + try: + Model = create_pydantic_model_from_schema(schema, 'NestedModel') + print(f'✅ Created nested model: {Model}') + + # Test complex nested data + data = { + 'user': { + 'profile': { + 'personal': { + 'name': 'Alice', + 'age': 28, + 'contacts': [{'type': 'email', 'value': 'alice@example.com'}, {'type': 'phone', 'value': '+1234567890'}], + }, + 'settings': {'theme': 'dark', 'notifications': True}, + } + } + } + + instance = Model(**data) + print(f'✅ Complex nested instance: {instance}') + + print('✅ Nested objects test passed\n') + return True + except Exception as e: + print(f'❌ Nested objects test failed: {e}') + traceback.print_exc() + return False + + +def test_union_types(): + """Test union types and nullable fields""" + print('=== Testing Union Types ===') + + schema = { + 'type': 'object', + 'properties': { + 'mixed_value': {'anyOf': [{'type': 'string'}, {'type': 'integer'}, {'type': 'boolean'}]}, + 'nullable_string': {'type': ['string', 'null']}, + 'string_or_number': {'oneOf': [{'type': 'string'}, {'type': 'number'}]}, + 'complex_union': { + 'anyOf': [ + { + 'type': 'object', + 'properties': {'type': {'const': 'user'}, 'name': {'type': 'string'}}, + 'required': ['type', 'name'], + }, + { + 'type': 'object', + 'properties': {'type': {'const': 'admin'}, 'permissions': {'type': 'array', 'items': {'type': 'string'}}}, + 'required': ['type', 'permissions'], + }, + ] + }, + }, + } + + try: + Model = create_pydantic_model_from_schema(schema, 'UnionModel') + print(f'✅ Created union model: {Model}') + + # Test different union values + instance1 = Model(mixed_value='hello', nullable_string=None, string_or_number='123') + print(f'✅ Union instance 1: {instance1}') + + instance2 = Model(mixed_value=42, nullable_string='world', string_or_number=3.14) + print(f'✅ Union instance 2: {instance2}') + + print('✅ Union types test passed\n') + return True + except Exception as e: + print(f'❌ Union types test failed: {e}') + traceback.print_exc() + return False + + +def test_array_variations(): + """Test various array configurations""" + print('=== Testing Array Variations ===') + + schema = { + 'type': 'object', + 'properties': { + 'simple_array': {'type': 'array', 'items': {'type': 'string'}}, + 'mixed_array': { + 'type': 'array', + 'items': { + 'anyOf': [ + {'type': 'string'}, + {'type': 'integer'}, + {'type': 'object', 'properties': {'key': {'type': 'string'}}}, + ] + }, + }, + 'nested_arrays': {'type': 'array', 'items': {'type': 'array', 'items': {'type': 'integer'}}}, + 'array_of_objects': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'id': {'type': 'integer'}, + 'data': {'type': 'object', 'properties': {'values': {'type': 'array', 'items': {'type': 'number'}}}}, + }, + 'required': ['id'], + }, + }, + }, + } + + try: + Model = create_pydantic_model_from_schema(schema, 'ArrayModel') + print(f'✅ Created array model: {Model}') + + # Test complex array data + data = { + 'simple_array': ['a', 'b', 'c'], + 'mixed_array': ['hello', 42, {'key': 'value'}], + 'nested_arrays': [[1, 2, 3], [4, 5, 6]], + 'array_of_objects': [{'id': 1, 'data': {'values': [1.1, 2.2, 3.3]}}, {'id': 2, 'data': {'values': [4.4, 5.5]}}], + } + + instance = Model(**data) + print(f'✅ Complex array instance: {instance}') + + print('✅ Array variations test passed\n') + return True + except Exception as e: + print(f'❌ Array variations test failed: {e}') + traceback.print_exc() + return False + + +def test_enums_and_constants(): + """Test enum values and constant fields""" + print('=== Testing Enums and Constants ===') + + schema = { + 'type': 'object', + 'properties': { + 'status': {'type': 'string', 'enum': ['active', 'inactive', 'pending', 'suspended']}, + 'priority': {'type': 'integer', 'enum': [1, 2, 3, 4, 5]}, + 'type': {'const': 'user_account'}, + 'category': {'anyOf': [{'const': 'premium'}, {'const': 'standard'}, {'const': 'basic'}]}, + }, + 'required': ['status', 'type'], + } + + try: + Model = create_pydantic_model_from_schema(schema, 'EnumModel') + print(f'✅ Created enum model: {Model}') + + # Test valid enum values + instance = Model(status='active', priority=3, type='user_account', category='premium') + print(f'✅ Enum instance: {instance}') + + print('✅ Enums and constants test passed\n') + return True + except Exception as e: + print(f'❌ Enums and constants test failed: {e}') + traceback.print_exc() + return False + + +def test_edge_cases(): + """Test edge cases and malformed schemas""" + print('=== Testing Edge Cases ===') + + edge_cases = [ + # Empty schema + {}, + # Schema with no properties + {'type': 'object'}, + # Schema with empty properties + {'type': 'object', 'properties': {}}, + # Schema with additional properties + {'type': 'object', 'properties': {'name': {'type': 'string'}}, 'additionalProperties': True}, + # Schema with pattern properties + {'type': 'object', 'patternProperties': {'^S_': {'type': 'string'}, '^I_': {'type': 'integer'}}}, + ] + + success_count = 0 + for i, schema in enumerate(edge_cases): + try: + Model = create_pydantic_model_from_schema(schema, f'EdgeCase{i}Model') + print(f'✅ Edge case {i}: {Model}') + success_count += 1 + except Exception as e: + print(f'⚠️ Edge case {i} failed: {e}') + + print(f'✅ Edge cases test: {success_count}/{len(edge_cases)} passed\n') + return True + + +def test_very_complex_schema(): + """Test an extremely complex real-world-like schema""" + print('=== Testing Very Complex Schema ===') + + schema = { + 'type': 'object', + 'properties': { + 'api_version': {'const': 'v1'}, + 'metadata': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'namespace': {'type': 'string', 'default': 'default'}, + 'labels': {'type': 'object', 'additionalProperties': {'type': 'string'}}, + 'annotations': {'type': 'object', 'additionalProperties': {'type': 'string'}}, + 'created_at': {'type': 'string', 'format': 'date-time'}, + }, + 'required': ['name'], + }, + 'spec': { + 'type': 'object', + 'properties': { + 'replicas': {'type': 'integer', 'minimum': 1, 'maximum': 100}, + 'selector': { + 'type': 'object', + 'properties': {'match_labels': {'type': 'object', 'additionalProperties': {'type': 'string'}}}, + }, + 'template': { + 'type': 'object', + 'properties': { + 'metadata': { + 'type': 'object', + 'properties': {'labels': {'type': 'object', 'additionalProperties': {'type': 'string'}}}, + }, + 'spec': { + 'type': 'object', + 'properties': { + 'containers': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'image': {'type': 'string'}, + 'ports': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'container_port': {'type': 'integer'}, + 'protocol': {'type': 'string', 'enum': ['TCP', 'UDP', 'SCTP']}, + }, + 'required': ['container_port'], + }, + }, + 'env': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'value': {'type': 'string'}, + 'value_from': { + 'type': 'object', + 'properties': { + 'secret_key_ref': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'key': {'type': 'string'}, + }, + 'required': ['name', 'key'], + }, + 'config_map_key_ref': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'key': {'type': 'string'}, + }, + 'required': ['name', 'key'], + }, + }, + }, + }, + 'required': ['name'], + }, + }, + 'resources': { + 'type': 'object', + 'properties': { + 'requests': { + 'type': 'object', + 'properties': { + 'memory': {'type': 'string'}, + 'cpu': {'type': 'string'}, + }, + }, + 'limits': { + 'type': 'object', + 'properties': { + 'memory': {'type': 'string'}, + 'cpu': {'type': 'string'}, + }, + }, + }, + }, + }, + 'required': ['name', 'image'], + }, + }, + 'volumes': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'config_map': {'type': 'object', 'properties': {'name': {'type': 'string'}}}, + 'secret': {'type': 'object', 'properties': {'secret_name': {'type': 'string'}}}, + 'empty_dir': {'type': 'object', 'properties': {'size_limit': {'type': 'string'}}}, + }, + 'required': ['name'], + }, + }, + }, + 'required': ['containers'], + }, + }, + 'required': ['spec'], + }, + }, + 'required': ['template'], + }, + 'status': { + 'type': 'object', + 'properties': { + 'ready_replicas': {'type': 'integer'}, + 'available_replicas': {'type': 'integer'}, + 'conditions': { + 'type': 'array', + 'items': { + 'type': 'object', + 'properties': { + 'type': {'type': 'string'}, + 'status': {'type': 'string', 'enum': ['True', 'False', 'Unknown']}, + 'reason': {'type': 'string'}, + 'message': {'type': 'string'}, + 'last_update_time': {'type': 'string', 'format': 'date-time'}, + }, + 'required': ['type', 'status'], + }, + }, + }, + }, + }, + 'required': ['api_version', 'metadata', 'spec'], + } + + try: + Model = create_pydantic_model_from_schema(schema, 'KubernetesDeployment') + print(f'✅ Created very complex model: {Model}') + + # Create a sample instance + data = { + 'api_version': 'v1', + 'metadata': { + 'name': 'my-app', + 'namespace': 'production', + 'labels': {'app': 'my-app', 'version': '1.0'}, + 'created_at': '2024-01-01T00:00:00Z', + }, + 'spec': { + 'replicas': 3, + 'template': { + 'spec': { + 'containers': [ + { + 'name': 'my-app', + 'image': 'my-app:latest', + 'ports': [{'name': 'http', 'container_port': 8080, 'protocol': 'TCP'}], + 'env': [ + {'name': 'ENV', 'value': 'production'}, + { + 'name': 'SECRET_KEY', + 'value_from': {'secret_key_ref': {'name': 'my-secret', 'key': 'secret-key'}}, + }, + ], + 'resources': { + 'requests': {'memory': '128Mi', 'cpu': '100m'}, + 'limits': {'memory': '256Mi', 'cpu': '200m'}, + }, + } + ] + } + }, + }, + } + + instance = Model(**data) + print('✅ Very complex instance created successfully') + + print('✅ Very complex schema test passed\n') + return True + except Exception as e: + print(f'❌ Very complex schema test failed: {e}') + traceback.print_exc() + return False + + +def test_string_schema_input(): + """Test passing schema as JSON string instead of dict""" + print('=== Testing String Schema Input ===') + + schema_string = json.dumps( + {'type': 'object', 'properties': {'message': {'type': 'string'}, 'count': {'type': 'integer'}}, 'required': ['message']} + ) + + try: + Model = create_pydantic_model_from_schema(schema_string, 'StringSchemaModel') + print(f'✅ Created model from string schema: {Model}') + + instance = Model(message='Hello', count=42) + print(f'✅ String schema instance: {instance}') + + print('✅ String schema input test passed\n') + return True + except Exception as e: + print(f'❌ String schema input test failed: {e}') + traceback.print_exc() + return False + + +def test_recursive_structures(): + """Test recursive/self-referencing structures""" + print('=== Testing Recursive Structures ===') + + schema = { + 'type': 'object', + 'properties': { + 'name': {'type': 'string'}, + 'children': { + 'type': 'array', + 'items': {'$ref': '#'}, # Self-reference + }, + 'parent': {'$ref': '#'}, # Self-reference + }, + 'required': ['name'], + } + + try: + Model = create_pydantic_model_from_schema(schema, 'RecursiveModel') + print(f'✅ Created recursive model: {Model}') + + # Note: Recursive structures are complex and may not work perfectly + # This is more of a "does it crash?" test + print('✅ Recursive structures test passed (creation only)\n') + return True + except Exception as e: + print(f'⚠️ Recursive structures test failed (expected): {e}') + print('Note: Recursive structures are complex and may not be fully supported\n') + return True # Don't fail the overall test for this + + +def run_comprehensive_schema_tests(): + """Run all comprehensive tests for create_pydantic_model_from_schema""" + print('🚀 Starting comprehensive schema testing...\n') + + tests = [ + ('Basic Types', test_basic_types), + ('Nested Objects', test_nested_objects), + ('Union Types', test_union_types), + ('Array Variations', test_array_variations), + ('Enums and Constants', test_enums_and_constants), + ('Recursive Structures', test_recursive_structures), + ('Edge Cases', test_edge_cases), + ('Very Complex Schema', test_very_complex_schema), + ('String Schema Input', test_string_schema_input), + ] + + passed = 0 + total = len(tests) + + for test_name, test_func in tests: + print(f'Running {test_name}...') + try: + if test_func(): + passed += 1 + else: + print(f'❌ {test_name} failed') + except Exception as e: + print(f'❌ {test_name} crashed: {e}') + traceback.print_exc() + + print('-' * 50) + + print('\n🏁 Testing Complete!') + print(f'📊 Results: {passed}/{total} tests passed') + + if passed == total: + print('🎉 All tests passed! The function handles complex schemas well.') + else: + print('⚠️ Some tests failed. Check the output above for details.') + + return passed == total + + +if __name__ == '__main__': + # Run the comprehensive tests + print('Running comprehensive schema tests first...') + run_comprehensive_schema_tests() + + print('\n' + '=' * 60) + print('Now running the original test...') diff --git a/browser_use/agent/utils.py b/eval/utils.py similarity index 95% rename from browser_use/agent/utils.py rename to eval/utils.py index be70ed5d8..f40216a41 100644 --- a/browser_use/agent/utils.py +++ b/eval/utils.py @@ -1,13 +1,12 @@ import json import logging -from datamodel_code_generator import DataModelType, generate from pydantic import BaseModel logger = logging.getLogger(__name__) -def create_pydantic_model_from_schema(schema: dict, model_name: str = 'DynamicModel') -> type[BaseModel]: +def create_pydantic_model_from_schema(original_schema: dict | str, model_name: str = 'DynamicModel') -> type[BaseModel]: """ Convert JSON schema to Pydantic model class using datamodel-code-generator. @@ -36,9 +35,13 @@ def create_pydantic_model_from_schema(schema: dict, model_name: str = 'DynamicMo import tempfile from pathlib import Path + from datamodel_code_generator import DataModelType, generate + # Handle case where schema might be a string (JSON) - if isinstance(schema, str): - schema = json.loads(schema) + if isinstance(original_schema, str): + schema: dict = json.loads(original_schema) + else: + schema: dict = original_schema logger.debug(f'Creating Pydantic model from schema: {schema}') diff --git a/pyproject.toml b/pyproject.toml index 7e4c0e9f5..22de26304 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ dependencies = [ "typing-extensions>=4.12.2", "uuid7>=0.1.0", "authlib>=1.6.0", - "datamodel-code-generator>=0.26.0", "google-genai>=1.21.1", "openai>=1.81.0", "anthropic>=0.54.0", @@ -71,6 +70,7 @@ eval = [ "anyio>=4.9.0", "Pillow>=11.2.1", "psutil>=7.0.0", + "datamodel-code-generator>=0.26.0", ] all = [ "browser-use[cli,examples,aws]",