mirror of
https://github.com/browser-use/browser-use
synced 2026-04-22 17:45:09 +02:00
632 lines
20 KiB
Python
632 lines
20 KiB
Python
"""Tests for schema-enforced structured extraction."""
|
|
|
|
import asyncio
|
|
import json
|
|
import tempfile
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
from pydantic import ValidationError
|
|
from pytest_httpserver import HTTPServer
|
|
|
|
from browser_use.agent.views import ActionResult
|
|
from browser_use.browser import BrowserProfile, BrowserSession
|
|
from browser_use.filesystem.file_system import FileSystem
|
|
from browser_use.llm.base import BaseChatModel
|
|
from browser_use.llm.views import ChatInvokeCompletion
|
|
from browser_use.tools.extraction.schema_utils import schema_dict_to_pydantic_model
|
|
from browser_use.tools.extraction.views import ExtractionResult
|
|
from browser_use.tools.service import Tools
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Unit tests: schema_dict_to_pydantic_model
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestSchemaDictToPydanticModel:
|
|
"""Unit tests for the JSON-Schema → Pydantic model converter."""
|
|
|
|
def test_flat_object(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'age': {'type': 'integer'},
|
|
},
|
|
'required': ['name', 'age'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(name='Alice', age=30)
|
|
assert instance.name == 'Alice' # type: ignore[attr-defined]
|
|
assert instance.age == 30 # type: ignore[attr-defined]
|
|
|
|
def test_nested_object(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'person': {
|
|
'type': 'object',
|
|
'properties': {
|
|
'first': {'type': 'string'},
|
|
'last': {'type': 'string'},
|
|
},
|
|
'required': ['first'],
|
|
},
|
|
},
|
|
'required': ['person'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(person={'first': 'Bob', 'last': 'Smith'})
|
|
assert instance.person.first == 'Bob' # type: ignore[attr-defined]
|
|
|
|
def test_array_of_objects(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'items': {
|
|
'type': 'array',
|
|
'items': {
|
|
'type': 'object',
|
|
'properties': {
|
|
'id': {'type': 'integer'},
|
|
'label': {'type': 'string'},
|
|
},
|
|
'required': ['id', 'label'],
|
|
},
|
|
},
|
|
},
|
|
'required': ['items'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(items=[{'id': 1, 'label': 'a'}, {'id': 2, 'label': 'b'}])
|
|
assert len(instance.items) == 2 # type: ignore[attr-defined]
|
|
assert instance.items[0].id == 1 # type: ignore[attr-defined]
|
|
|
|
def test_array_of_primitives(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'tags': {'type': 'array', 'items': {'type': 'string'}},
|
|
},
|
|
'required': ['tags'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(tags=['a', 'b', 'c'])
|
|
assert instance.tags == ['a', 'b', 'c'] # type: ignore[attr-defined]
|
|
|
|
def test_enum_field(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'status': {'type': 'string', 'enum': ['active', 'inactive']},
|
|
},
|
|
'required': ['status'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(status='active')
|
|
assert instance.status == 'active' # type: ignore[attr-defined]
|
|
|
|
def test_optional_enum_defaults_to_none(self):
|
|
"""Non-required enum fields default to None, not an out-of-set empty string."""
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'priority': {'type': 'string', 'enum': ['low', 'medium', 'high']},
|
|
},
|
|
'required': ['name'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(name='task1')
|
|
assert instance.priority is None # type: ignore[attr-defined]
|
|
# Serialized output must not contain an out-of-set value
|
|
dumped = instance.model_dump(mode='json')
|
|
assert dumped['priority'] is None
|
|
|
|
# When provided, value still works
|
|
instance2 = Model(name='task2', priority='high')
|
|
assert instance2.priority == 'high' # type: ignore[attr-defined]
|
|
|
|
def test_optional_fields_get_type_appropriate_defaults(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'nickname': {'type': 'string'},
|
|
'score': {'type': 'number'},
|
|
'rank': {'type': 'integer'},
|
|
'active': {'type': 'boolean'},
|
|
'tags': {'type': 'array', 'items': {'type': 'string'}},
|
|
},
|
|
'required': ['name'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(name='Alice')
|
|
assert instance.name == 'Alice' # type: ignore[attr-defined]
|
|
assert instance.nickname == '' # type: ignore[attr-defined]
|
|
assert instance.score == 0.0 # type: ignore[attr-defined]
|
|
assert instance.rank == 0 # type: ignore[attr-defined]
|
|
assert instance.active is False # type: ignore[attr-defined]
|
|
assert instance.tags == [] # type: ignore[attr-defined]
|
|
|
|
def test_optional_non_nullable_rejects_null(self):
|
|
"""Non-required fields that aren't nullable must reject explicit null."""
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'nickname': {'type': 'string'},
|
|
},
|
|
'required': ['name'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
with pytest.raises(ValidationError):
|
|
Model(name='Alice', nickname=None)
|
|
|
|
def test_optional_with_explicit_default(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'color': {'type': 'string', 'default': 'blue'},
|
|
},
|
|
'required': ['name'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(name='Alice')
|
|
assert instance.color == 'blue' # type: ignore[attr-defined]
|
|
|
|
def test_optional_nested_object_defaults_to_none(self):
|
|
"""Non-required nested objects fall back to None since constructing a default is not feasible."""
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'address': {
|
|
'type': 'object',
|
|
'properties': {'city': {'type': 'string'}},
|
|
'required': ['city'],
|
|
},
|
|
},
|
|
'required': ['name'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(name='Alice')
|
|
assert instance.address is None # type: ignore[attr-defined]
|
|
|
|
def test_model_name_from_title(self):
|
|
schema = {
|
|
'title': 'ProductInfo',
|
|
'type': 'object',
|
|
'properties': {'sku': {'type': 'string'}},
|
|
'required': ['sku'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
assert Model.__name__ == 'ProductInfo'
|
|
|
|
def test_model_validate_json_roundtrip(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'x': {'type': 'number'},
|
|
'y': {'type': 'boolean'},
|
|
},
|
|
'required': ['x', 'y'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(x=3.14, y=True)
|
|
raw = instance.model_dump_json()
|
|
restored = Model.model_validate_json(raw)
|
|
assert restored.x == instance.x # type: ignore[attr-defined]
|
|
assert restored.y == instance.y # type: ignore[attr-defined]
|
|
|
|
def test_rejects_ref(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {'item': {'$ref': '#/$defs/Item'}},
|
|
'$defs': {'Item': {'type': 'object', 'properties': {'name': {'type': 'string'}}}},
|
|
}
|
|
with pytest.raises(ValueError, match='Unsupported JSON Schema keyword'):
|
|
schema_dict_to_pydantic_model(schema)
|
|
|
|
def test_rejects_allOf(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {'x': {'allOf': [{'type': 'string'}]}},
|
|
}
|
|
with pytest.raises(ValueError, match='Unsupported JSON Schema keyword'):
|
|
schema_dict_to_pydantic_model(schema)
|
|
|
|
def test_rejects_non_object_toplevel(self):
|
|
with pytest.raises(ValueError, match='type "object"'):
|
|
schema_dict_to_pydantic_model({'type': 'array', 'items': {'type': 'string'}})
|
|
|
|
def test_rejects_empty_properties(self):
|
|
with pytest.raises(ValueError, match='at least one property'):
|
|
schema_dict_to_pydantic_model({'type': 'object', 'properties': {}})
|
|
|
|
def test_extra_fields_forbidden(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {'name': {'type': 'string'}},
|
|
'required': ['name'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
with pytest.raises(ValidationError):
|
|
Model(name='ok', bogus='nope')
|
|
|
|
def test_nullable_field(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'value': {'type': 'string', 'nullable': True},
|
|
},
|
|
'required': ['value'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
instance = Model(value=None)
|
|
assert instance.value is None # type: ignore[attr-defined]
|
|
|
|
def test_field_descriptions_preserved(self):
|
|
schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'price': {'type': 'number', 'description': 'The price in USD'},
|
|
},
|
|
'required': ['price'],
|
|
}
|
|
Model = schema_dict_to_pydantic_model(schema)
|
|
field_info = Model.model_fields['price']
|
|
assert field_info.description == 'The price in USD'
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Unit tests: ExtractionResult
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestExtractionResult:
|
|
def test_construction(self):
|
|
er = ExtractionResult(
|
|
data={'name': 'Alice'},
|
|
schema_used={'type': 'object', 'properties': {'name': {'type': 'string'}}},
|
|
)
|
|
assert er.data == {'name': 'Alice'}
|
|
assert er.is_partial is False
|
|
assert er.source_url is None
|
|
|
|
def test_serialization_roundtrip(self):
|
|
er = ExtractionResult(
|
|
data={'items': [1, 2]},
|
|
schema_used={'type': 'object', 'properties': {'items': {'type': 'array'}}},
|
|
is_partial=True,
|
|
source_url='http://example.com',
|
|
content_stats={'original_html_chars': 5000},
|
|
)
|
|
raw = er.model_dump_json()
|
|
restored = ExtractionResult.model_validate_json(raw)
|
|
assert restored == er
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Integration tests: extract action via Tools
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _make_extraction_llm(structured_response: dict | None = None, freetext_response: str = 'free text result') -> BaseChatModel:
|
|
"""Create a mock LLM that handles both structured and freetext extraction calls."""
|
|
llm = AsyncMock(spec=BaseChatModel)
|
|
llm.model = 'mock-extraction-llm'
|
|
llm._verified_api_keys = True
|
|
llm.provider = 'mock'
|
|
llm.name = 'mock-extraction-llm'
|
|
llm.model_name = 'mock-extraction-llm'
|
|
|
|
async def mock_ainvoke(messages, output_format=None, **kwargs):
|
|
if output_format is not None and structured_response is not None:
|
|
# Structured path: parse the dict through the model
|
|
instance = output_format.model_validate(structured_response)
|
|
return ChatInvokeCompletion(completion=instance, usage=None)
|
|
# Freetext path
|
|
return ChatInvokeCompletion(completion=freetext_response, usage=None)
|
|
|
|
llm.ainvoke.side_effect = mock_ainvoke
|
|
return llm
|
|
|
|
|
|
@pytest.fixture(scope='module')
|
|
async def browser_session():
|
|
session = BrowserSession(browser_profile=BrowserProfile(headless=True, user_data_dir=None, keep_alive=True))
|
|
await session.start()
|
|
yield session
|
|
await session.kill()
|
|
await session.event_bus.stop(clear=True, timeout=5)
|
|
|
|
|
|
@pytest.fixture(scope='session')
|
|
def http_server():
|
|
server = HTTPServer()
|
|
server.start()
|
|
server.expect_request('/products').respond_with_data(
|
|
"""<html><body>
|
|
<h1>Products</h1>
|
|
<ul>
|
|
<li>Widget A - $9.99</li>
|
|
<li>Widget B - $19.99</li>
|
|
</ul>
|
|
</body></html>""",
|
|
content_type='text/html',
|
|
)
|
|
yield server
|
|
server.stop()
|
|
|
|
|
|
@pytest.fixture(scope='session')
|
|
def base_url(http_server):
|
|
return f'http://{http_server.host}:{http_server.port}'
|
|
|
|
|
|
class TestExtractStructured:
|
|
"""Integration tests for the extract action's structured extraction path."""
|
|
|
|
async def test_structured_extraction_returns_json(self, browser_session, base_url):
|
|
"""When output_schema is provided, extract returns structured JSON in <structured_result> tags."""
|
|
tools = Tools()
|
|
await tools.navigate(url=f'{base_url}/products', new_tab=False, browser_session=browser_session)
|
|
await asyncio.sleep(0.5)
|
|
|
|
output_schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'products': {
|
|
'type': 'array',
|
|
'items': {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'price': {'type': 'number'},
|
|
},
|
|
'required': ['name', 'price'],
|
|
},
|
|
},
|
|
},
|
|
'required': ['products'],
|
|
}
|
|
|
|
mock_data = {'products': [{'name': 'Widget A', 'price': 9.99}, {'name': 'Widget B', 'price': 19.99}]}
|
|
extraction_llm = _make_extraction_llm(structured_response=mock_data)
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
fs = FileSystem(tmp)
|
|
result = await tools.extract(
|
|
query='List all products with prices',
|
|
output_schema=output_schema,
|
|
browser_session=browser_session,
|
|
page_extraction_llm=extraction_llm,
|
|
file_system=fs,
|
|
)
|
|
|
|
assert isinstance(result, ActionResult)
|
|
assert result.extracted_content is not None
|
|
assert '<structured_result>' in result.extracted_content
|
|
assert '</structured_result>' in result.extracted_content
|
|
|
|
# Parse the JSON out of the tags
|
|
start = result.extracted_content.index('<structured_result>') + len('<structured_result>')
|
|
end = result.extracted_content.index('</structured_result>')
|
|
parsed = json.loads(result.extracted_content[start:end].strip())
|
|
assert parsed == mock_data
|
|
|
|
# Metadata
|
|
assert result.metadata is not None
|
|
assert result.metadata['structured_extraction'] is True
|
|
meta = result.metadata['extraction_result']
|
|
assert meta['data'] == mock_data
|
|
assert meta['schema_used'] == output_schema
|
|
|
|
async def test_freetext_extraction_unchanged(self, browser_session, base_url):
|
|
"""When output_schema is None, extract returns free-text in <result> tags (backward compat)."""
|
|
tools = Tools()
|
|
await tools.navigate(url=f'{base_url}/products', new_tab=False, browser_session=browser_session)
|
|
await asyncio.sleep(0.5)
|
|
|
|
extraction_llm = _make_extraction_llm(freetext_response='Widget A costs $9.99, Widget B costs $19.99')
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
fs = FileSystem(tmp)
|
|
result = await tools.extract(
|
|
query='What products are listed?',
|
|
browser_session=browser_session,
|
|
page_extraction_llm=extraction_llm,
|
|
file_system=fs,
|
|
)
|
|
|
|
assert isinstance(result, ActionResult)
|
|
assert result.extracted_content is not None
|
|
assert '<result>' in result.extracted_content
|
|
assert '</result>' in result.extracted_content
|
|
assert '<structured_result>' not in result.extracted_content
|
|
assert result.metadata is None
|
|
|
|
async def test_invalid_schema_falls_back_to_freetext(self, browser_session, base_url):
|
|
"""When output_schema contains unsupported keywords, fall back to free-text gracefully."""
|
|
tools = Tools()
|
|
await tools.navigate(url=f'{base_url}/products', new_tab=False, browser_session=browser_session)
|
|
await asyncio.sleep(0.5)
|
|
|
|
bad_schema = {
|
|
'type': 'object',
|
|
'properties': {'item': {'$ref': '#/$defs/Item'}},
|
|
'$defs': {'Item': {'type': 'object', 'properties': {'name': {'type': 'string'}}}},
|
|
}
|
|
|
|
extraction_llm = _make_extraction_llm(freetext_response='fallback text')
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
fs = FileSystem(tmp)
|
|
result = await tools.extract(
|
|
query='Get products',
|
|
output_schema=bad_schema,
|
|
browser_session=browser_session,
|
|
page_extraction_llm=extraction_llm,
|
|
file_system=fs,
|
|
)
|
|
|
|
assert isinstance(result, ActionResult)
|
|
assert result.extracted_content is not None
|
|
# Should have used the free-text path
|
|
assert '<result>' in result.extracted_content
|
|
assert '<structured_result>' not in result.extracted_content
|
|
assert result.metadata is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Integration tests: extraction_schema injection via special parameter
|
|
# ---------------------------------------------------------------------------
|
|
|
|
PRODUCT_SCHEMA = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'products': {
|
|
'type': 'array',
|
|
'items': {
|
|
'type': 'object',
|
|
'properties': {
|
|
'name': {'type': 'string'},
|
|
'price': {'type': 'number'},
|
|
},
|
|
'required': ['name', 'price'],
|
|
},
|
|
},
|
|
},
|
|
'required': ['products'],
|
|
}
|
|
|
|
MOCK_PRODUCTS = {'products': [{'name': 'Widget A', 'price': 9.99}, {'name': 'Widget B', 'price': 19.99}]}
|
|
|
|
|
|
class TestExtractionSchemaInjection:
|
|
"""Tests that extraction_schema injected as a special parameter triggers structured extraction."""
|
|
|
|
async def test_injected_extraction_schema_triggers_structured_path(self, browser_session, base_url):
|
|
"""extraction_schema passed via act() triggers structured extraction even without output_schema in params."""
|
|
tools = Tools()
|
|
await tools.navigate(url=f'{base_url}/products', new_tab=False, browser_session=browser_session)
|
|
await asyncio.sleep(0.5)
|
|
|
|
extraction_llm = _make_extraction_llm(structured_response=MOCK_PRODUCTS)
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
fs = FileSystem(tmp)
|
|
result = await tools.extract(
|
|
query='List all products with prices',
|
|
browser_session=browser_session,
|
|
page_extraction_llm=extraction_llm,
|
|
file_system=fs,
|
|
extraction_schema=PRODUCT_SCHEMA,
|
|
)
|
|
|
|
assert isinstance(result, ActionResult)
|
|
assert result.extracted_content is not None
|
|
assert '<structured_result>' in result.extracted_content
|
|
|
|
# Parse and verify JSON
|
|
start = result.extracted_content.index('<structured_result>') + len('<structured_result>')
|
|
end = result.extracted_content.index('</structured_result>')
|
|
parsed = json.loads(result.extracted_content[start:end].strip())
|
|
assert parsed == MOCK_PRODUCTS
|
|
|
|
assert result.metadata is not None
|
|
assert result.metadata['structured_extraction'] is True
|
|
|
|
async def test_output_schema_takes_precedence_over_extraction_schema(self, browser_session, base_url):
|
|
"""When the LLM provides output_schema in params, it should take precedence over injected extraction_schema."""
|
|
tools = Tools()
|
|
await tools.navigate(url=f'{base_url}/products', new_tab=False, browser_session=browser_session)
|
|
await asyncio.sleep(0.5)
|
|
|
|
# Different schema than the injected one — just a name list
|
|
param_schema = {
|
|
'type': 'object',
|
|
'properties': {
|
|
'names': {'type': 'array', 'items': {'type': 'string'}},
|
|
},
|
|
'required': ['names'],
|
|
}
|
|
param_response = {'names': ['Widget A', 'Widget B']}
|
|
extraction_llm = _make_extraction_llm(structured_response=param_response)
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
fs = FileSystem(tmp)
|
|
result = await tools.extract(
|
|
query='List product names',
|
|
output_schema=param_schema,
|
|
browser_session=browser_session,
|
|
page_extraction_llm=extraction_llm,
|
|
file_system=fs,
|
|
extraction_schema=PRODUCT_SCHEMA, # should be ignored
|
|
)
|
|
|
|
assert isinstance(result, ActionResult)
|
|
assert result.extracted_content is not None
|
|
assert '<structured_result>' in result.extracted_content
|
|
|
|
start = result.extracted_content.index('<structured_result>') + len('<structured_result>')
|
|
end = result.extracted_content.index('</structured_result>')
|
|
parsed = json.loads(result.extracted_content[start:end].strip())
|
|
# Should match param_schema response, NOT PRODUCT_SCHEMA
|
|
assert parsed == param_response
|
|
assert result.metadata is not None
|
|
assert result.metadata['extraction_result']['schema_used'] == param_schema
|
|
|
|
async def test_no_schema_uses_freetext_path(self, browser_session, base_url):
|
|
"""When neither output_schema nor extraction_schema is provided, free-text path is used (backward compat)."""
|
|
tools = Tools()
|
|
await tools.navigate(url=f'{base_url}/products', new_tab=False, browser_session=browser_session)
|
|
await asyncio.sleep(0.5)
|
|
|
|
extraction_llm = _make_extraction_llm(freetext_response='Widget A costs $9.99')
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
fs = FileSystem(tmp)
|
|
result = await tools.extract(
|
|
query='What products are listed?',
|
|
browser_session=browser_session,
|
|
page_extraction_llm=extraction_llm,
|
|
file_system=fs,
|
|
# No extraction_schema, no output_schema
|
|
)
|
|
|
|
assert isinstance(result, ActionResult)
|
|
assert result.extracted_content is not None
|
|
assert '<result>' in result.extracted_content
|
|
assert '<structured_result>' not in result.extracted_content
|
|
assert result.metadata is None
|
|
|
|
async def test_extraction_schema_threads_through_act(self, browser_session, base_url):
|
|
"""extraction_schema passed to act() reaches extract() via the registry's special parameter injection."""
|
|
tools = Tools()
|
|
await tools.navigate(url=f'{base_url}/products', new_tab=False, browser_session=browser_session)
|
|
await asyncio.sleep(0.5)
|
|
|
|
extraction_llm = _make_extraction_llm(structured_response=MOCK_PRODUCTS)
|
|
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
fs = FileSystem(tmp)
|
|
|
|
# Build an ActionModel for the extract action
|
|
action_model = tools.registry.create_action_model()
|
|
action = action_model.model_validate({'extract': {'query': 'List products'}})
|
|
|
|
result = await tools.act(
|
|
action=action,
|
|
browser_session=browser_session,
|
|
page_extraction_llm=extraction_llm,
|
|
file_system=fs,
|
|
extraction_schema=PRODUCT_SCHEMA,
|
|
)
|
|
|
|
assert isinstance(result, ActionResult)
|
|
assert result.extracted_content is not None
|
|
assert '<structured_result>' in result.extracted_content
|