mirror of
https://github.com/browser-use/browser-use
synced 2026-05-06 17:52:15 +02:00
191 lines
6.7 KiB
Python
191 lines
6.7 KiB
Python
"""
|
|
Utilities for creating optimized Pydantic schemas for LLM usage.
|
|
"""
|
|
|
|
from typing import Any
|
|
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class SchemaOptimizer:
|
|
@staticmethod
|
|
def create_optimized_json_schema(model: type[BaseModel]) -> dict[str, Any]:
|
|
"""
|
|
Create the most optimized schema by flattening all $ref/$defs while preserving
|
|
FULL descriptions and ALL action definitions. Also ensures OpenAI strict mode compatibility.
|
|
|
|
Args:
|
|
model: The Pydantic model to optimize
|
|
|
|
Returns:
|
|
Optimized schema with all $refs resolved and strict mode compatibility
|
|
"""
|
|
# Generate original schema
|
|
original_schema = model.model_json_schema()
|
|
|
|
# Extract $defs for reference resolution, then flatten everything
|
|
defs_lookup = original_schema.get('$defs', {})
|
|
|
|
def optimize_schema(
|
|
obj: Any,
|
|
defs_lookup: dict[str, Any] | None = None,
|
|
*,
|
|
in_properties: bool = False, # NEW: track context
|
|
) -> Any:
|
|
"""Apply all optimization techniques including flattening all $ref/$defs"""
|
|
if isinstance(obj, dict):
|
|
optimized: dict[str, Any] = {}
|
|
flattened_ref: dict[str, Any] | None = None
|
|
|
|
# Skip unnecessary fields AND $defs (we'll inline everything)
|
|
skip_fields = ['additionalProperties', '$defs']
|
|
|
|
for key, value in obj.items():
|
|
if key in skip_fields:
|
|
continue
|
|
|
|
# Skip metadata "title" unless we're iterating inside an actual `properties` map
|
|
if key == 'title' and not in_properties:
|
|
continue
|
|
|
|
# Preserve FULL descriptions without truncation, skip empty ones
|
|
elif key == 'description':
|
|
if value: # Only include non-empty descriptions
|
|
optimized[key] = value
|
|
|
|
# Handle type field
|
|
elif key == 'type':
|
|
optimized[key] = value
|
|
|
|
# FLATTEN: Resolve $ref by inlining the actual definition
|
|
elif key == '$ref' and defs_lookup:
|
|
ref_path = value.split('/')[-1] # Get the definition name from "#/$defs/SomeName"
|
|
if ref_path in defs_lookup:
|
|
# Get the referenced definition and flatten it
|
|
referenced_def = defs_lookup[ref_path]
|
|
flattened_ref = optimize_schema(referenced_def, defs_lookup)
|
|
|
|
# Keep all anyOf structures (action unions) and resolve any $refs within
|
|
elif key == 'anyOf' and isinstance(value, list):
|
|
optimized[key] = [optimize_schema(item, defs_lookup) for item in value]
|
|
|
|
# Recursively optimize nested structures
|
|
elif key in ['properties', 'items']:
|
|
optimized[key] = optimize_schema(
|
|
value,
|
|
defs_lookup,
|
|
in_properties=(key == 'properties'),
|
|
)
|
|
|
|
# Keep essential validation fields
|
|
elif key in ['type', 'required', 'minimum', 'maximum', 'minItems', 'maxItems', 'pattern', 'default']:
|
|
optimized[key] = value if not isinstance(value, (dict, list)) else optimize_schema(value, defs_lookup)
|
|
|
|
# Recursively process all other fields
|
|
else:
|
|
optimized[key] = optimize_schema(value, defs_lookup) if isinstance(value, (dict, list)) else value
|
|
|
|
# If we have a flattened reference, merge it with the optimized properties
|
|
if flattened_ref is not None and isinstance(flattened_ref, dict):
|
|
# Start with the flattened reference as the base
|
|
result = flattened_ref.copy()
|
|
|
|
# Merge in any sibling properties that were processed
|
|
for key, value in optimized.items():
|
|
# Preserve descriptions from the original object if they exist
|
|
if key == 'description' and 'description' not in result:
|
|
result[key] = value
|
|
elif key != 'description': # Don't overwrite description from flattened ref
|
|
result[key] = value
|
|
|
|
return result
|
|
else:
|
|
# No $ref, just return the optimized object
|
|
# CRITICAL: Add additionalProperties: false to ALL objects for OpenAI strict mode
|
|
if optimized.get('type') == 'object':
|
|
optimized['additionalProperties'] = False
|
|
|
|
return optimized
|
|
|
|
elif isinstance(obj, list):
|
|
return [optimize_schema(item, defs_lookup, in_properties=in_properties) for item in obj]
|
|
return obj
|
|
|
|
# Create optimized schema with flattening
|
|
optimized_result = optimize_schema(original_schema, defs_lookup)
|
|
|
|
# Ensure we have a dictionary (should always be the case for schema root)
|
|
if not isinstance(optimized_result, dict):
|
|
raise ValueError('Optimized schema result is not a dictionary')
|
|
|
|
optimized_schema: dict[str, Any] = optimized_result
|
|
|
|
# Additional pass to ensure ALL objects have additionalProperties: false
|
|
def ensure_additional_properties_false(obj: Any) -> None:
|
|
"""Ensure all objects have additionalProperties: false"""
|
|
if isinstance(obj, dict):
|
|
# If it's an object type, ensure additionalProperties is false
|
|
if obj.get('type') == 'object':
|
|
obj['additionalProperties'] = False
|
|
|
|
# Recursively apply to all values
|
|
for value in obj.values():
|
|
if isinstance(value, (dict, list)):
|
|
ensure_additional_properties_false(value)
|
|
elif isinstance(obj, list):
|
|
for item in obj:
|
|
if isinstance(item, (dict, list)):
|
|
ensure_additional_properties_false(item)
|
|
|
|
ensure_additional_properties_false(optimized_schema)
|
|
SchemaOptimizer._make_strict_compatible(optimized_schema)
|
|
|
|
return optimized_schema
|
|
|
|
@staticmethod
|
|
def _make_strict_compatible(schema: dict[str, Any] | list[Any]) -> None:
|
|
"""Ensure all properties are required for OpenAI strict mode"""
|
|
if isinstance(schema, dict):
|
|
# First recursively apply to nested objects
|
|
for key, value in schema.items():
|
|
if isinstance(value, (dict, list)) and key != 'required':
|
|
SchemaOptimizer._make_strict_compatible(value)
|
|
|
|
# Then update required for this level
|
|
if 'properties' in schema and 'type' in schema and schema['type'] == 'object':
|
|
# Add all properties to required array
|
|
all_props = list(schema['properties'].keys())
|
|
schema['required'] = all_props # Set all properties as required
|
|
|
|
elif isinstance(schema, list):
|
|
for item in schema:
|
|
SchemaOptimizer._make_strict_compatible(item)
|
|
|
|
@staticmethod
|
|
def create_gemini_optimized_schema(model: type[BaseModel]) -> dict[str, Any]:
|
|
"""
|
|
Create Gemini-optimized schema that removes 'required' arrays to save tokens.
|
|
Gemini can infer required fields from context since all fields are required.
|
|
|
|
Args:
|
|
model: The Pydantic model to optimize
|
|
|
|
Returns:
|
|
Optimized schema without required arrays
|
|
"""
|
|
# Start with standard optimized schema
|
|
schema = SchemaOptimizer.create_optimized_json_schema(model)
|
|
|
|
def remove_required_arrays(obj: Any) -> Any:
|
|
"""Recursively remove 'required' arrays"""
|
|
if isinstance(obj, dict):
|
|
# Remove 'required' key
|
|
result = {k: v for k, v in obj.items() if k != 'required'}
|
|
# Recursively process nested structures
|
|
return {k: remove_required_arrays(v) for k, v in result.items()}
|
|
elif isinstance(obj, list):
|
|
return [remove_required_arrays(item) for item in obj]
|
|
return obj
|
|
|
|
return remove_required_arrays(schema)
|