Fix 9 query-schema alignment bugs + 3 AI-review findings

Entity ID alignment: add 6 missing ID fields (cnes_code, finance_id,
embargo_id, school_id, convenio_id, stats_id) to entity_by_id and all
6 investigation Cypher queries (WHERE + coalesce chains).

entity_by_element_id: add missing PublicOffice label.

pattern_self_dealing: fix Amendment field reads with dual-source
coalesce fallbacks (TransfereGov + Transparencia).

init.cypher + schema_init.cypher: replace dead indexes
(amendment_object→function, amendment_date→value_committed,
convenio_date→date_published), expand fulltext index to 9 node types
with 11 search fields including n.function.

seed-dev.cypher: fix all property names (id→contract_id/sanction_id,
value→valor, PublicOffice id→cpf), add Amendment node, fix
AUTOR_EMENDA target to Amendment.

search.py: add name extraction for Contract/Amendment/Convenio/Embargo
/PublicOffice types in search results.

21 new tests, 570 total green. Triple-AI validated (Claude + Codex).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
bruno cesar
2026-02-24 10:31:30 -03:00
parent a4967ae1da
commit 271671ff3a
14 changed files with 206 additions and 58 deletions

View File

@@ -14,6 +14,8 @@ EXPECTED_LABELS = {
# Expected entity ID property fields in lookup/coalesce chains.
EXPECTED_ID_FIELDS = {
"cpf", "cnpj", "contract_id", "sanction_id", "amendment_id",
"cnes_code", "finance_id", "embargo_id", "school_id", "convenio_id",
"stats_id",
}
@@ -109,3 +111,110 @@ async def test_connections_rejects_invalid_depth(client: AsyncClient) -> None:
async def test_connections_rejects_zero_depth(client: AsyncClient) -> None:
response = await client.get("/api/v1/entity/test-id/connections?depth=0")
assert response.status_code == 422
# ── entity_by_id: all 11 ID fields are present in WHERE clause ──────────
def test_entity_by_id_has_all_11_id_fields() -> None:
"""entity_by_id.cypher must resolve all 11 entity ID property fields."""
cypher = _load_cypher("entity_by_id")
all_fields = {
"cpf", "cnpj", "contract_id", "sanction_id", "amendment_id",
"cnes_code", "finance_id", "embargo_id", "school_id",
"convenio_id", "stats_id",
}
for field in all_fields:
assert f"e.{field}" in cypher, (
f"entity_by_id.cypher missing ID field: e.{field}"
)
# ── entity_by_element_id: PublicOffice label ────────────────────────────
def test_entity_by_element_id_has_public_office_label() -> None:
"""entity_by_element_id.cypher must include PublicOffice in label allowlist."""
cypher = _load_cypher("entity_by_element_id")
assert "e:PublicOffice" in cypher, (
"entity_by_element_id.cypher missing PublicOffice label"
)
def test_entity_by_element_id_has_all_labels() -> None:
"""entity_by_element_id.cypher must include all 13 entity labels."""
cypher = _load_cypher("entity_by_element_id")
for label in EXPECTED_LABELS:
assert f"e:{label}" in cypher, (
f"entity_by_element_id.cypher missing label: {label}"
)
# ── Investigation coalesce chains: all 6 queries include all 11 ID fields ──
INVESTIGATION_COALESCE_QUERIES = [
"investigation_get",
"investigation_list",
"investigation_update",
"investigation_by_token",
"investigation_add_entity",
]
ALL_ID_FIELDS = [
"e.cpf", "e.cnpj", "e.contract_id", "e.sanction_id", "e.amendment_id",
"e.cnes_code", "e.finance_id", "e.embargo_id", "e.school_id",
"e.convenio_id", "e.stats_id",
]
@pytest.mark.parametrize("query_name", INVESTIGATION_COALESCE_QUERIES)
def test_investigation_coalesce_has_all_id_fields(query_name: str) -> None:
"""Every investigation coalesce chain must include all 11 entity ID fields."""
cypher = _load_cypher(query_name)
for field in ALL_ID_FIELDS:
assert field in cypher, (
f"{query_name}.cypher coalesce chain missing {field}"
)
INVESTIGATION_WHERE_QUERIES = [
"investigation_add_entity",
"investigation_remove_entity",
]
@pytest.mark.parametrize("query_name", INVESTIGATION_WHERE_QUERIES)
def test_investigation_where_has_all_id_fields(query_name: str) -> None:
"""Investigation add/remove WHERE clauses must look up all 11 entity ID fields."""
cypher = _load_cypher(query_name)
for field in ALL_ID_FIELDS:
assert field in cypher, (
f"{query_name}.cypher WHERE clause missing {field}"
)
# ── pattern_self_dealing: uses correct Amendment field names ─────────
def test_self_dealing_uses_value_committed_or_value_paid() -> None:
"""pattern_self_dealing.cypher must read value_committed/value_paid with a.value fallback."""
cypher = _load_cypher("pattern_self_dealing")
# Must use coalesce with both TransfereGov fields AND Transparencia fallback
assert "a.value_committed" in cypher, (
"pattern_self_dealing.cypher missing a.value_committed (TransfereGov)"
)
assert "a.value_paid" in cypher, (
"pattern_self_dealing.cypher missing a.value_paid (TransfereGov)"
)
# Transparencia Amendments use a.value and a.object — must be in coalesce fallback
lines = cypher.splitlines()
for line in lines:
if "amendment_value" in line:
assert "a.value" in line, (
f"amendment_value line missing a.value fallback for Transparencia: {line.strip()}"
)
if "amendment_object" in line:
assert "a.function" in line and "a.object" in line, (
f"amendment_object line missing dual-source fallback: {line.strip()}"
)