sync: upstream convergence 2026-03-02

Co-authored-by: bruno cesar <brunoclz@brunos-MacBook-Pro.local>
2026-04-25 17:15:02 +02:00 · 2026-03-02 03:51:26 -03:00
parent d00d150f93
commit add44821e8
175 changed files with 2569 additions and 4713 deletions
--- a/.env.example
+++ b/.env.example
@@ -18,29 +18,24 @@ API_PORT=8000
 LOG_LEVEL=info
 APP_ENV=dev
 JWT_SECRET_KEY=change-me-generate-with-openssl-rand-hex-32
-AUTH_COOKIE_NAME=bracc_session
-AUTH_COOKIE_SECURE=false
-AUTH_COOKIE_SAMESITE=lax
-TRUST_PROXY_HEADERS=false
 INVITE_CODE=
 CORS_ORIGINS=http://localhost:3000
 PRODUCT_TIER=community
 PATTERNS_ENABLED=false
-PUBLIC_MODE=true
+PUBLIC_MODE=false
 PUBLIC_ALLOW_PERSON=false
 PUBLIC_ALLOW_ENTITY_LOOKUP=false
 PUBLIC_ALLOW_INVESTIGATIONS=false
 PATTERN_SPLIT_THRESHOLD_VALUE=80000
 PATTERN_SPLIT_MIN_COUNT=3
-PATTERN_SHARE_THRESHOLD=0.60
+PATTERN_SHARE_THRESHOLD=0.6
 PATTERN_SRP_MIN_ORGS=5
 PATTERN_INEXIG_MIN_RECURRENCE=3
 PATTERN_MAX_EVIDENCE_REFS=50
-SHARE_TOKEN_TTL_HOURS=168

 # Frontend (dev only — production uses Caddy reverse proxy with relative paths)
 VITE_API_URL=http://localhost:8000
-VITE_PUBLIC_MODE=true
+VITE_PUBLIC_MODE=false
 VITE_PATTERNS_ENABLED=false

 # Optional: Google Cloud (for Base dos Dados / TSE BigQuery)
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,5 @@
 blank_issues_enabled: false
 contact_links:
  - name: Security vulnerability report
-    url: https://github.com/World-Open-Graph/br-acc/security/advisories/new
+    url: https://github.com/brunoclz/world-transparency-graph/security/advisories/new
    about: Use GitHub Security Advisories for private vulnerability disclosure.
--- a/.github/claude-automerge-policy.json
+++ b/.github/claude-automerge-policy.json
@@ -10,8 +10,8 @@
    "README.md",
    "CONTRIBUTING.md",
    "frontend/src/**",
-    "api/src/icarus/queries/**",
-    "api/src/icarus/models/**",
+    "api/src/bracc/queries/**",
+    "api/src/bracc/models/**",
    "api/tests/**",
    "etl/tests/**",
    "frontend/src/**/*.test.*"
--- a/.github/workflows/publish-release.yml
+++ b/.github/workflows/publish-release.yml
@@ -23,26 +23,6 @@ on:
        description: "Release title (EN)"
        required: true
        type: string
-      highlights_pt:
-        description: "PT highlights (separate bullets with |)"
-        required: true
-        type: string
-      highlights_en:
-        description: "EN highlights (separate bullets with |)"
-        required: true
-        type: string
-      patterns_included:
-        description: "Comma-separated pattern IDs included in this release (use 'none' if not applicable)"
-        required: true
-        type: string
-      technical_changes_pt:
-        description: "PT technical changes (separate bullets with |)"
-        required: true
-        type: string
-      technical_changes_en:
-        description: "EN technical changes (separate bullets with |)"
-        required: true
-        type: string

 permissions:
  contents: write
@@ -124,116 +104,63 @@ jobs:
          COMPARE_URL: ${{ steps.validate.outputs.compare_url }}
          TITLE_PT: ${{ inputs.title_pt }}
          TITLE_EN: ${{ inputs.title_en }}
-          HIGHLIGHTS_PT: ${{ inputs.highlights_pt }}
-          HIGHLIGHTS_EN: ${{ inputs.highlights_en }}
-          PATTERNS_INCLUDED: ${{ inputs.patterns_included }}
-          TECHNICAL_CHANGES_PT: ${{ inputs.technical_changes_pt }}
-          TECHNICAL_CHANGES_EN: ${{ inputs.technical_changes_en }}
        run: |
          set -euo pipefail
          DATE_UTC="$(date -u +"%Y-%m-%d")"
          export DATE_UTC

-          python - <<'PY'
-          import json
-          import os
-          from textwrap import dedent
-
-          def split_pipe(raw: str) -> list[str]:
-              normalized = raw.replace("\r\n", "\n").replace("\n", "|")
-              return [item.strip(" -\t") for item in normalized.split("|") if item.strip()]
-
-          def split_csv(raw: str) -> list[str]:
-              value = raw.strip()
-              if value.lower() in {"none", "n/a", "na", "-"}:
-                  return []
-              return [item.strip() for item in value.split(",") if item.strip()]
-
-          def bullets(items: list[str], fallback: str) -> str:
-              if not items:
-                  return f"- {fallback}"
-              return "\n".join(f"- {item}" for item in items)
-
-          highlights_pt = split_pipe(os.environ["HIGHLIGHTS_PT"])
-          highlights_en = split_pipe(os.environ["HIGHLIGHTS_EN"])
-          technical_changes_pt = split_pipe(os.environ["TECHNICAL_CHANGES_PT"])
-          technical_changes_en = split_pipe(os.environ["TECHNICAL_CHANGES_EN"])
-          patterns = split_csv(os.environ["PATTERNS_INCLUDED"])
-
-          release_notes = dedent(
-              f"""
+          cat > release_notes.md <<NOTES
          ## PT-BR

-              {os.environ["TITLE_PT"]}
+          ${TITLE_PT}

          ### Escopo
          - Release publicada por marco.
-              - Mudanças listadas de forma específica para facilitar auditoria pública.
-
-              ### Destaques
-              {bullets(highlights_pt, "Sem destaques declarados.")}
-
-              ### Padrões incluídos
-              {bullets(patterns, "Sem novos padrões nesta release.")}
-
-              ### Mudanças técnicas
-              {bullets(technical_changes_pt, "Sem mudanças técnicas declaradas.")}
+          - Mudanças detalhadas por categorias no histórico desta versão.

          ### Integridade pública
          Os sinais e padrões refletem coocorrências em bases públicas e não constituem prova legal.

          ## EN

-              {os.environ["TITLE_EN"]}
+          ${TITLE_EN}

          ### Scope
          - Milestone-based release publication.
-              - Changes are listed explicitly for public traceability.
-
-              ### Highlights
-              {bullets(highlights_en, "No highlights declared.")}
-
-              ### Included patterns
-              {bullets(patterns, "No new patterns in this release.")}
-
-              ### Technical changes
-              {bullets(technical_changes_en, "No technical changes declared.")}
+          - Detailed changes grouped by category in this version history.

          ### Public integrity
          Signals and patterns reflect co-occurrence in public records and are not legal proof.

          ## Compatibility

-              - Breaking changes: none declared.
-              - Migration required: no.
+          - Breaking changes: declare explicitly when applicable.
+          - Migration required: declare explicitly when applicable.

          ## Compare

-              {os.environ.get("COMPARE_URL", "")}
+          ${COMPARE_URL}

          ## Metadata

-              - Version: {os.environ["VERSION"]}
-              - Target SHA: {os.environ["TARGET_SHA"]}
-              - Previous tag: {os.environ["PREVIOUS_TAG"]}
-              - Date (UTC): {os.environ.get("DATE_UTC", "")}
-              """
-          ).strip() + "\n"
+          - Version: ${VERSION}
+          - Target SHA: ${TARGET_SHA}
+          - Previous tag: ${PREVIOUS_TAG}
+          - Date (UTC): ${DATE_UTC}
+          NOTES

-          with open("release_notes.md", "w", encoding="utf-8") as fh:
-              fh.write(release_notes)
+          python - <<'PY'
+          import json
+          import os

          payload = {
              "version": os.environ["VERSION"],
              "date": os.environ.get("DATE_UTC", ""),
-              "highlights_pt": highlights_pt,
-              "highlights_en": highlights_en,
+              "highlights_pt": [os.environ["TITLE_PT"]],
+              "highlights_en": [os.environ["TITLE_EN"]],
              "api_changes": [],
              "data_changes": [],
              "privacy_compliance_changes": [],
-              "patterns_included": patterns,
-              "technical_changes_pt": technical_changes_pt,
-              "technical_changes_en": technical_changes_en,
              "breaking_changes": False,
              "migration_required": False,
              "compare_url": os.environ.get("COMPARE_URL", ""),
--- a/.github/workflows/security.yml
+++ b/.github/workflows/security.yml
@@ -6,18 +6,10 @@ on:
  pull_request:
    branches: [main]

-concurrency:
-  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
-  cancel-in-progress: true
-
-permissions:
-  contents: read
-
 jobs:
  gitleaks:
    name: Gitleaks
    runs-on: ubuntu-latest
-    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4
        with:
@@ -37,7 +29,6 @@ jobs:
  bandit:
    name: Bandit (Python)
    runs-on: ubuntu-latest
-    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4

@@ -57,7 +48,6 @@ jobs:
  pip-audit:
    name: Pip Audit (Python deps)
    runs-on: ubuntu-latest
-    timeout-minutes: 20
    steps:
      - uses: actions/checkout@v4

@@ -69,14 +59,6 @@ jobs:
        with:
          python-version: "3.12"

-      - name: Cache uv
-        uses: actions/cache@v4
-        with:
-          path: ~/.cache/uv
-          key: ${{ runner.os }}-uv-security-${{ hashFiles('api/uv.lock', 'etl/uv.lock') }}
-          restore-keys: |
-            ${{ runner.os }}-uv-security-
-
      - name: Export lock-compatible requirement sets
        run: |
          cd api
@@ -93,7 +75,6 @@ jobs:
  public-privacy-gate:
    name: Public Privacy Gate
    runs-on: ubuntu-latest
-    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4

@@ -107,7 +88,6 @@ jobs:
  compliance-pack-gate:
    name: Compliance Pack Gate
    runs-on: ubuntu-latest
-    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4

@@ -120,9 +100,8 @@ jobs:

  public-boundary-gate:
    name: Public Boundary Gate
-    if: vars.PUBLIC_BOUNDARY_GATE_ENABLED == 'true'
+    if: github.repository == 'brunoclz/world-transparency-graph'
    runs-on: ubuntu-latest
-    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4

@@ -136,7 +115,6 @@ jobs:
  internal-instruction-boundary:
    name: Internal Instruction Boundary
    runs-on: ubuntu-latest
-    timeout-minutes: 15
    steps:
      - uses: actions/checkout@v4

--- a/.gitignore
+++ b/.gitignore
@@ -75,7 +75,6 @@ scripts/audit-prompts/
 # Local report artifacts in repository root
 /*.pdf
 /*.html
-gitleaks-report*.json

 # Playwright MCP cache
 .playwright-mcp/
@@ -91,7 +90,7 @@ data/tse/
 # Local MCP runtime config (keep example only)
 .mcp.json

-# Internal assistant instructions (must never be published)
+# Internal assistant instruction files (must never be published)
 CLAUDE.md
 AGENTS.md
 AGENTS*.md
--- a/142
+++ b/142
@@ -1,14 +1,125 @@
-.PHONY: dev stop seed bootstrap-demo bootstrap-full bootstrap-all bootstrap-all-noninteractive bootstrap-all-report check-public-claims check-source-urls check-pipeline-contracts check-pipeline-inputs generate-pipeline-status generate-source-summary generate-reference-metrics check neutrality
+.PHONY: dev stop api etl frontend lint type-check test test-api test-etl test-frontend test-integration-api test-integration-etl test-integration check seed clean download-cnpj download-tse download-transparencia download-sanctions download-all etl-cnpj etl-cnpj-stream etl-tse etl-transparencia etl-sanctions etl-all link-persons bootstrap-demo bootstrap-full bootstrap-all bootstrap-all-noninteractive bootstrap-all-report check-public-claims check-source-urls check-pipeline-contracts check-pipeline-inputs generate-pipeline-status generate-source-summary generate-reference-metrics

+# ── Development ─────────────────────────────────────────
 dev:
-	docker compose -f infra/docker-compose.yml up -d
+	docker compose up -d

 stop:
-	docker compose -f infra/docker-compose.yml down
+	docker compose down
+
+# ── API ─────────────────────────────────────────────────
+api:
+	cd api && uv run uvicorn bracc.main:app --reload --host 0.0.0.0 --port 8000
+
+# ── ETL ─────────────────────────────────────────────────
+etl:
+	cd etl && uv run bracc-etl --help

 seed:
 	bash infra/scripts/seed-dev.sh

+# ── CNPJ Data ──────────────────────────────────────────
+download-cnpj:
+	cd etl && uv run python scripts/download_cnpj.py --reference-only
+	cd etl && uv run python scripts/download_cnpj.py --files 1
+
+download-cnpj-all:
+	cd etl && uv run python scripts/download_cnpj.py --files 10
+
+etl-cnpj:
+	cd etl && uv run bracc-etl run --source cnpj --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+etl-cnpj-dev:
+	cd etl && uv run bracc-etl run --source cnpj --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --limit 10000
+
+etl-cnpj-stream:
+	cd etl && uv run bracc-etl run --source cnpj --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --streaming
+
+# ── TSE Data ──────────────────────────────────────────
+download-tse:
+	cd etl && uv run python scripts/download_tse.py --years 2024
+
+etl-tse:
+	cd etl && uv run bracc-etl run --source tse --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+etl-tse-dev:
+	cd etl && uv run bracc-etl run --source tse --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --limit 10000
+
+# ── Transparencia Data ────────────────────────────────
+download-transparencia:
+	cd etl && uv run python scripts/download_transparencia.py --year 2025
+
+etl-transparencia:
+	cd etl && uv run bracc-etl run --source transparencia --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+etl-transparencia-dev:
+	cd etl && uv run bracc-etl run --source transparencia --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data --limit 10000
+
+# ── Sanctions Data ────────────────────────────────────
+download-sanctions:
+	cd etl && uv run python scripts/download_sanctions.py
+
+etl-sanctions:
+	cd etl && uv run bracc-etl run --source sanctions --neo4j-password "$${NEO4J_PASSWORD}" --data-dir ../data
+
+# ── All Data ──────────────────────────────────────────
+download-all: download-cnpj download-tse download-transparencia download-sanctions
+
+etl-all: etl-cnpj etl-tse etl-transparencia etl-sanctions
+
+# ── Entity Resolution ────────────────────────────────────
+link-persons:
+	docker compose exec neo4j cypher-shell -u neo4j -p "$${NEO4J_PASSWORD}" -f /scripts/link_persons.cypher
+
+# ── Frontend ────────────────────────────────────────────
+frontend:
+	cd frontend && npm run dev
+
+# ── Quality ─────────────────────────────────────────────
+lint:
+	cd api && uv run ruff check src/ tests/
+	cd etl && uv run ruff check src/ tests/
+	cd frontend && npm run lint
+
+type-check:
+	cd api && uv run mypy src/
+	cd etl && uv run mypy src/
+	cd frontend && npm run type-check
+
+test-api:
+	cd api && uv run pytest
+
+test-etl:
+	cd etl && uv run pytest
+
+test-frontend:
+	cd frontend && npm test
+
+test: test-api test-etl test-frontend
+
+# ── Integration tests ─────────────────────────────────
+test-integration-api:
+	cd api && uv run pytest -m integration
+
+test-integration-etl:
+	cd etl && uv run pytest -m integration
+
+test-integration: test-integration-api test-integration-etl
+
+# ── Full check (run before commit) ─────────────────────
+check: lint type-check test
+	@echo "All checks passed."
+
+# ── Neutrality audit ───────────────────────────────────
+neutrality:
+	@! grep -rn \
+		"suspicious\|corrupt\|criminal\|fraudulent\|illegal\|guilty\|CRITICAL\|HIGH.*severity\|MEDIUM.*severity\|LOW.*severity" \
+		api/src/ etl/src/ frontend/src/ \
+		--include="*.py" --include="*.ts" --include="*.tsx" --include="*.json" \
+		|| (echo "NEUTRALITY VIOLATION FOUND" && exit 1)
+	@echo "Neutrality check passed."
+
+# ── Bootstrap ─────────────────────────────────────────────
 bootstrap-demo:
 	bash scripts/bootstrap_public_demo.sh --profile demo

@@ -24,6 +135,7 @@ bootstrap-all-noninteractive:
 bootstrap-all-report:
 	python3 scripts/run_bootstrap_all.py --repo-root . --report-latest

+# ── Quality checks ────────────────────────────────────────
 check-public-claims:
 	python3 scripts/check_public_claims.py --repo-root .

@@ -36,22 +148,20 @@ check-pipeline-contracts:
 check-pipeline-inputs:
 	python3 scripts/check_pipeline_inputs.py

+# ── Generators ────────────────────────────────────────────
 generate-pipeline-status:
-	python3 scripts/generate_pipeline_status.py --registry-path docs/source_registry_br_v1.csv --output docs/pipeline_status.md
+	python3 scripts/generate_pipeline_status.py

 generate-source-summary:
-	python3 scripts/generate_data_sources_summary.py --registry-path docs/source_registry_br_v1.csv --docs-path docs/data-sources.md
+	python3 scripts/generate_data_sources_summary.py

 generate-reference-metrics:
-	python3 scripts/generate_reference_metrics.py --json-output audit-results/public-trust/latest/neo4j-reference-metrics.json --doc-output docs/reference_metrics.md
+	python3 scripts/generate_reference_metrics.py

-check:
-	cd api && bash ../scripts/ci/python_quality.sh
-	cd etl && bash ../scripts/ci/python_quality.sh
-	cd frontend && bash ../scripts/ci/frontend_quality.sh
-
-neutrality:
-	@! grep -rn "suspicious\|corrupt\|criminal\|fraudulent\|illegal\|guilty" \
-		api/src/ etl/src/ frontend/src/ \
-		--include="*.py" --include="*.ts" --include="*.tsx" --include="*.json" \
-		|| (echo "NEUTRALITY VIOLATION: banned words found in source" && exit 1)
+# ── Cleanup ─────────────────────────────────────────────
+clean:
+	find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .pytest_cache -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .mypy_cache -exec rm -rf {} + 2>/dev/null || true
+	find . -type d -name .ruff_cache -exec rm -rf {} + 2>/dev/null || true
+	rm -rf frontend/dist
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "bracc-api"
 version = "0.1.0"
-description = "BRACC API — Brazilian public data anti-corruption graph tool"
+description = "BR-ACC API — Brazilian public data anti-corruption graph tool"
 requires-python = ">=3.12"
 license = "AGPL-3.0-or-later"
 dependencies = [
--- a/api/src/bracc/config.py
+++ b/api/src/bracc/config.py
@@ -1,5 +1,6 @@
 from typing import Literal

+from pydantic import Field
 from pydantic_settings import BaseSettings


@@ -17,14 +18,15 @@ class Settings(BaseSettings):
    jwt_secret_key: str = "change-me-in-production"
    jwt_algorithm: str = "HS256"
    jwt_expire_minutes: int = 1440
-    auth_cookie_name: str = "bracc_session"
-    auth_cookie_secure: bool = False
-    auth_cookie_samesite: Literal["lax", "strict", "none"] = "lax"
-    trust_proxy_headers: bool = False
    rate_limit_anon: str = "60/minute"
    rate_limit_auth: str = "300/minute"
    invite_code: str = ""
    cors_origins: str = "http://localhost:3000"
+    auth_cookie_name: str = "bracc_session"
+    auth_cookie_secure: bool = False
+    auth_cookie_samesite: Literal["lax", "strict", "none"] = "lax"
+    trust_proxy_headers: bool = False
+    share_token_ttl_hours: int = 168  # 7 days
    product_tier: str = "community"
    patterns_enabled: bool = False
    public_mode: bool = False
@@ -37,7 +39,16 @@ class Settings(BaseSettings):
    pattern_srp_min_orgs: int = 5
    pattern_inexig_min_recurrence: int = 3
    pattern_max_evidence_refs: int = 50
-    share_token_ttl_hours: int = 168
+
+    # Pattern hardening defaults (decision-complete contract)
+    pattern_temporal_window_years: int = Field(default=4, ge=1, le=20)
+    pattern_min_contract_value: float = Field(default=100000.0, ge=0)
+    pattern_min_contract_count: int = Field(default=2, ge=1)
+    pattern_min_debt_value: float = Field(default=50000.0, ge=0)
+    pattern_same_as_min_confidence: float = Field(default=0.85, ge=0, le=1)
+    pattern_pep_min_confidence: float = Field(default=0.85, ge=0, le=1)
+    pattern_min_recurrence: int = Field(default=2, ge=1)
+    pattern_min_discrepancy_ratio: float = Field(default=0.30, ge=0, le=1)

    model_config = {"env_prefix": "", "env_file": ".env"}

--- a/api/src/bracc/dependencies.py
+++ b/api/src/bracc/dependencies.py
@@ -35,7 +35,12 @@ async def close_driver() -> None:


 async def get_driver(request: Request) -> AsyncDriver:
-    driver: AsyncDriver = request.app.state.neo4j_driver
+    driver: AsyncDriver | None = getattr(request.app.state, "neo4j_driver", None)
+    if driver is None:
+        raise HTTPException(
+            status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
+            detail="Database connection not available",
+        )
    return driver


--- a/api/src/bracc/main.py
+++ b/api/src/bracc/main.py
@@ -2,7 +2,7 @@ import logging
 from collections.abc import AsyncIterator
 from contextlib import asynccontextmanager

-from fastapi import FastAPI, Request
+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from slowapi import _rate_limit_exceeded_handler
 from slowapi.errors import RateLimitExceeded
@@ -51,7 +51,7 @@ async def lifespan(app: FastAPI) -> AsyncIterator[None]:


 app = FastAPI(
-    title="BRACC API",
+    title="BR-ACC API",
    description="Brazilian public data graph analysis tool",
    version="0.1.0",
    lifespan=lifespan,
@@ -85,5 +85,5 @@ app.include_router(investigation.shared_router)


@app.get("/health")
-async def health(request: Request) -> dict[str, str]:
-    return {"status": "ok", "version": request.app.version}
+async def health() -> dict[str, str]:
+    return {"status": "ok"}
--- a/api/src/bracc/middleware/cpf_masking.py
+++ b/api/src/bracc/middleware/cpf_masking.py
@@ -53,7 +53,7 @@ def _is_pep_record(record: dict[str, Any]) -> bool:

    for field in ("role", "cargo"):
        value = record.get(field)
-        if isinstance(value, str) and value.strip().lower() in PEP_ROLES:
+        if isinstance(value, str) and any(kw in value.strip().lower() for kw in PEP_ROLES):
            return True

    return False
--- a/api/src/bracc/middleware/rate_limit.py
+++ b/api/src/bracc/middleware/rate_limit.py
@@ -6,37 +6,20 @@ from bracc.config import settings
 from bracc.services.auth_service import decode_access_token


-def _extract_token(request: Request) -> str | None:
+def _get_rate_limit_key(request: Request) -> str:
+    """Extract user_id from JWT (Bearer or cookie) for rate limiting, fallback to IP."""
    auth = request.headers.get("authorization", "")
    if auth.startswith("Bearer "):
-        return auth[7:].strip()
-    cookie_token = request.cookies.get(settings.auth_cookie_name)
-    if isinstance(cookie_token, str) and cookie_token.strip():
-        return cookie_token.strip()
-    return None
-
-
-def _resolve_client_ip(request: Request) -> str:
-    if settings.trust_proxy_headers:
-        forwarded = request.headers.get("x-forwarded-for", "")
-        if forwarded:
-            first_hop = forwarded.split(",", 1)[0].strip()
-            if first_hop:
-                return first_hop
-        real_ip = request.headers.get("x-real-ip", "").strip()
-        if real_ip:
-            return real_ip
-    return get_remote_address(request)
-
-
-def _get_rate_limit_key(request: Request) -> str:
-    """Extract user_id from JWT for rate limiting, fallback to IP."""
-    token = _extract_token(request)
-    if token:
+        token = auth[7:]
        user_id = decode_access_token(token)
        if user_id:
            return f"user:{user_id}"
-    return _resolve_client_ip(request)
+    cookie_token = request.cookies.get(settings.auth_cookie_name)
+    if isinstance(cookie_token, str) and cookie_token.strip():
+        user_id = decode_access_token(cookie_token.strip())
+        if user_id:
+            return f"user:{user_id}"
+    return get_remote_address(request)


 limiter = Limiter(
--- a/api/src/bracc/queries/entity_connections.cypher
+++ b/api/src/bracc/queries/entity_connections.cypher
@@ -1,27 +1,15 @@
-MATCH (center) WHERE elementId(center) = $entity_id
+MATCH (center)
+WHERE elementId(center) = $entity_id
  AND (center:Person OR center:Partner OR center:Company OR center:Contract OR center:Sanction OR center:Election
       OR center:Amendment OR center:Finance OR center:Embargo OR center:Health OR center:Education
       OR center:Convenio OR center:LaborStats OR center:PublicOffice)
-WITH center,
-     CASE
-       WHEN coalesce($include_probable, false) THEN
-         "SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLE_SAME_AS"
-       ELSE
-         "SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS"
-     END AS relationship_filter
-CALL apoc.path.subgraphAll(center, {
-  relationshipFilter: relationship_filter,
-  labelFilter: "-User|-Investigation|-Annotation|-Tag",
-  maxLevel: $depth,
-  limit: 200
-})
-YIELD nodes, relationships
-WITH center, nodes, relationships
-UNWIND relationships AS r
-WITH center,
-     startNode(r) AS src,
-     endNode(r) AS tgt,
-     r
+OPTIONAL MATCH p=(center)-[:SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLE_SAME_AS*1..4]-(connected)
+WHERE length(p) <= $depth
+  AND all(x IN nodes(p) WHERE NOT (x:User OR x:Investigation OR x:Annotation OR x:Tag))
+WITH center, p
+UNWIND CASE WHEN p IS NULL THEN [] ELSE relationships(p) END AS r
+WITH DISTINCT center, r, startNode(r) AS src, endNode(r) AS tgt
+WHERE coalesce($include_probable, false) OR type(r) <> "POSSIBLE_SAME_AS"
 RETURN center AS e,
       r,
       CASE WHEN elementId(src) = elementId(center) THEN tgt ELSE src END AS connected,
--- a/api/src/bracc/queries/graph_expand.cypher
+++ b/api/src/bracc/queries/graph_expand.cypher
@@ -1,14 +1,21 @@
-MATCH (center) WHERE elementId(center) = $entity_id
+MATCH (center)
+WHERE elementId(center) = $entity_id
  AND (center:Person OR center:Company OR center:Contract OR center:Sanction OR center:Election
       OR center:Amendment OR center:Finance OR center:Embargo OR center:Health OR center:Education
       OR center:Convenio OR center:LaborStats OR center:PublicOffice
       OR center:OffshoreEntity OR center:OffshoreOfficer OR center:GlobalPEP
       OR center:CVMProceeding OR center:Expense)
-CALL apoc.path.subgraphAll(center, {
-  relationshipFilter: "SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLY_SAME_AS|OFFICER_OF|INTERMEDIARY_OF|GLOBAL_PEP_MATCH|CVM_SANCIONADA|GASTOU|FORNECEU",
-  labelFilter: $label_filter,
-  maxLevel: $depth,
-  limit: 200
-})
-YIELD nodes, relationships
-RETURN nodes, relationships, elementId(center) AS center_id
+OPTIONAL MATCH p=(center)-[:SOCIO_DE|DOOU|CANDIDATO_EM|VENCEU|AUTOR_EMENDA|SANCIONADA|OPERA_UNIDADE|DEVE|RECEBEU_EMPRESTIMO|EMBARGADA|MANTEDORA_DE|BENEFICIOU|GEROU_CONVENIO|SAME_AS|POSSIBLY_SAME_AS|OFFICER_OF|INTERMEDIARY_OF|GLOBAL_PEP_MATCH|CVM_SANCIONADA|GASTOU|FORNECEU*1..4]-(n)
+WHERE length(p) <= $depth
+  AND all(x IN nodes(p) WHERE NOT (x:User OR x:Investigation OR x:Annotation OR x:Tag))
+WITH center, collect(p) AS paths
+WITH center,
+     reduce(ns = [center], p IN paths | ns + CASE WHEN p IS NULL THEN [] ELSE nodes(p) END) AS raw_nodes,
+     reduce(rs = [], p IN paths | rs + CASE WHEN p IS NULL THEN [] ELSE relationships(p) END) AS raw_rels
+UNWIND raw_nodes AS n
+WITH center, collect(DISTINCT n) AS nodes, raw_rels
+UNWIND CASE WHEN size(raw_rels) = 0 THEN [NULL] ELSE raw_rels END AS r
+WITH center, nodes, collect(DISTINCT r) AS rels
+RETURN nodes,
+       [x IN rels WHERE x IS NOT NULL] AS relationships,
+       elementId(center) AS center_id
--- a/api/src/bracc/queries/investigation_by_token.cypher
+++ b/api/src/bracc/queries/investigation_by_token.cypher
@@ -1,6 +1,4 @@
-MATCH (i:Investigation)
-WHERE i.share_token = $token
-  AND (i.share_expires_at IS NULL OR i.share_expires_at > datetime())
+MATCH (i:Investigation {share_token: $token})
 OPTIONAL MATCH (i)-[:INCLUDES]->(e)
 WITH i, collect(coalesce(e.cpf, e.cnpj, e.contract_id, e.sanction_id, e.amendment_id, e.cnes_code, e.finance_id, e.embargo_id, e.school_id, e.convenio_id, e.stats_id, elementId(e))) AS eids
 RETURN i.id AS id,
@@ -9,5 +7,4 @@ RETURN i.id AS id,
       i.created_at AS created_at,
       i.updated_at AS updated_at,
       i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
       [x IN eids WHERE x IS NOT NULL] AS entity_ids
--- a/api/src/bracc/queries/investigation_create.cypher
+++ b/api/src/bracc/queries/investigation_create.cypher
@@ -4,8 +4,7 @@ CREATE (i:Investigation {
  description: $description,
  created_at: datetime(),
  updated_at: datetime(),
-  share_token: null,
-  share_expires_at: null
+  share_token: null
 })
 WITH i
 MATCH (u:User {id: $user_id})
@@ -16,5 +15,4 @@ RETURN i.id AS id,
       i.created_at AS created_at,
       i.updated_at AS updated_at,
       i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
       [] AS entity_ids
--- a/api/src/bracc/queries/investigation_get.cypher
+++ b/api/src/bracc/queries/investigation_get.cypher
@@ -7,5 +7,4 @@ RETURN i.id AS id,
       i.created_at AS created_at,
       i.updated_at AS updated_at,
       i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
       [x IN eids WHERE x IS NOT NULL] AS entity_ids
--- a/api/src/bracc/queries/investigation_list.cypher
+++ b/api/src/bracc/queries/investigation_list.cypher
@@ -13,5 +13,4 @@ RETURN total,
       i.created_at AS created_at,
       i.updated_at AS updated_at,
       i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
       [x IN eids WHERE x IS NOT NULL] AS entity_ids
--- a/api/src/bracc/queries/investigation_share.cypher
+++ b/api/src/bracc/queries/investigation_share.cypher
@@ -1,7 +1,5 @@
 MATCH (u:User {id: $user_id})-[:OWNS]->(i:Investigation {id: $id})
 SET i.share_token = $share_token,
-    i.share_expires_at = $share_expires_at,
    i.updated_at = datetime()
 RETURN i.id AS id,
-       i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at
+       i.share_token AS share_token
--- a/api/src/bracc/queries/investigation_update.cypher
+++ b/api/src/bracc/queries/investigation_update.cypher
@@ -11,5 +11,4 @@ RETURN i.id AS id,
       i.created_at AS created_at,
       i.updated_at AS updated_at,
       i.share_token AS share_token,
-       i.share_expires_at AS share_expires_at,
       [x IN eids WHERE x IS NOT NULL] AS entity_ids
--- a/api/src/bracc/queries/node_degree.cypher
+++ b/api/src/bracc/queries/node_degree.cypher
@@ -1,5 +1,6 @@
-MATCH (n) WHERE elementId(n) = $entity_id
+MATCH (n)
+WHERE elementId(n) = $entity_id
  AND (n:Person OR n:Company OR n:Contract OR n:Sanction OR n:Election
       OR n:Amendment OR n:Finance OR n:Embargo OR n:Health OR n:Education
       OR n:Convenio OR n:LaborStats OR n:PublicOffice)
-RETURN apoc.node.degree(n) AS degree
+RETURN COUNT { (n)--() } AS degree
--- a/api/src/bracc/queries/public_graph_company.cypher
+++ b/api/src/bracc/queries/public_graph_company.cypher
@@ -2,11 +2,31 @@ MATCH (center:Company)
 WHERE elementId(center) = $company_id
   OR center.cnpj = $company_identifier
   OR center.cnpj = $company_identifier_formatted
-CALL apoc.path.subgraphAll(center, {
-  relationshipFilter: "SOCIO_DE|VENCEU|SANCIONADA|DEVE|RECEBEU_EMPRESTIMO|BENEFICIOU|GEROU_CONVENIO|MUNICIPAL_VENCEU|MUNICIPAL_LICITOU",
-  labelFilter: "+Company|+Contract|+Sanction|+Finance|+Amendment|+Convenio|+Bid|+MunicipalContract|+MunicipalBid|-Person|-Partner|-User|-Investigation|-Annotation|-Tag",
-  maxLevel: $depth,
-  limit: 200
-})
-YIELD nodes, relationships
-RETURN nodes, relationships, elementId(center) AS center_id
+OPTIONAL MATCH p=(center)-[:SOCIO_DE|VENCEU|SANCIONADA|DEVE|RECEBEU_EMPRESTIMO|BENEFICIOU|GEROU_CONVENIO|MUNICIPAL_VENCEU|MUNICIPAL_LICITOU*1..4]-(n)
+WHERE length(p) <= $depth
+  AND all(
+    x IN nodes(p)
+    WHERE NOT (
+      "Person" IN labels(x)
+      OR "Partner" IN labels(x)
+      OR "User" IN labels(x)
+      OR "Investigation" IN labels(x)
+      OR "Annotation" IN labels(x)
+      OR "Tag" IN labels(x)
+    )
+  )
+  AND (
+    n:Company OR n:Contract OR n:Sanction OR n:Finance OR n:Amendment OR n:Convenio
+    OR n:Bid OR n:MunicipalContract OR n:MunicipalBid OR n IS NULL
+  )
+WITH center, collect(p) AS paths
+WITH center,
+     reduce(ns = [center], p IN paths | ns + CASE WHEN p IS NULL THEN [] ELSE nodes(p) END) AS raw_nodes,
+     reduce(rs = [], p IN paths | rs + CASE WHEN p IS NULL THEN [] ELSE relationships(p) END) AS raw_rels
+UNWIND raw_nodes AS n
+WITH center, collect(DISTINCT n) AS nodes, raw_rels
+UNWIND CASE WHEN size(raw_rels) = 0 THEN [NULL] ELSE raw_rels END AS r
+WITH center, nodes, collect(DISTINCT r) AS rels
+RETURN nodes,
+       [x IN rels WHERE x IS NOT NULL] AS relationships,
+       elementId(center) AS center_id
--- a/api/src/bracc/queries/schema_init.cypher
+++ b/api/src/bracc/queries/schema_init.cypher
@@ -1,4 +1,4 @@
-// BRACC Neo4j Schema — Constraints and Indexes
+// BR-ACC Neo4j Schema — Constraints and Indexes
 // Applied on database initialization

 // ── Uniqueness Constraints ──────────────────────────────
--- a/api/src/bracc/routers/baseline.py
+++ b/api/src/bracc/routers/baseline.py
@@ -6,6 +6,7 @@ from neo4j import AsyncSession
 from bracc.dependencies import get_session
 from bracc.models.baseline import BaselineResponse
 from bracc.services.baseline_service import BASELINE_QUERIES, run_all_baselines, run_baseline
+from bracc.services.public_guard import enforce_entity_lookup_enabled

 router = APIRouter(prefix="/api/v1/baseline", tags=["baseline"])

@@ -16,6 +17,7 @@ async def get_baseline_for_entity(
    session: Annotated[AsyncSession, Depends(get_session)],
    dimension: Annotated[str | None, Query()] = None,
 ) -> BaselineResponse:
+    enforce_entity_lookup_enabled()
    if dimension:
        if dimension not in BASELINE_QUERIES:
            available = list(BASELINE_QUERIES.keys())
--- a/api/src/bracc/routers/entity.py
+++ b/api/src/bracc/routers/entity.py
@@ -182,7 +182,7 @@ async def get_entity_timeline(
            date=event_date,
            label=str(label),
            entity_type=entity_type,
-            properties=sanitize_props(props),
+            properties=sanitize_public_properties(sanitize_props(props)),
            sources=[SourceAttribution(database="neo4j_graph")],
        ))

--- a/api/src/bracc/routers/investigation.py
+++ b/api/src/bracc/routers/investigation.py
@@ -311,7 +311,7 @@ async def export_investigation_pdf(
            cpf_val = node.get("cpf")
            if cpf_val and isinstance(cpf_val, str):
                role = str(node.get("role", node.get("cargo", ""))).lower()
-                is_pep = role in PEP_ROLES
+                is_pep = any(kw in role for kw in PEP_ROLES)
                if not is_pep:
                    if "." in document and "-" in document:
                        document = mask_formatted_cpf(document)
--- a/api/src/bracc/routers/meta.py
+++ b/api/src/bracc/routers/meta.py
@@ -6,6 +6,7 @@ from neo4j import AsyncSession

 from bracc.dependencies import get_session
 from bracc.services.neo4j_service import execute_query_single
+from bracc.services.public_guard import should_hide_person_entities
 from bracc.services.source_registry import load_source_registry, source_registry_summary

 router = APIRouter(prefix="/api/v1/meta", tags=["meta"])
@@ -40,7 +41,9 @@ async def database_stats(
    result = {
        "total_nodes": record["total_nodes"] if record else 0,
        "total_relationships": record["total_relationships"] if record else 0,
-        "person_count": record["person_count"] if record else 0,
+        "person_count": (
+            0 if should_hide_person_entities() else (record["person_count"] if record else 0)
+        ),
        "company_count": record["company_count"] if record else 0,
        "health_count": record["health_count"] if record else 0,
        "finance_count": record["finance_count"] if record else 0,
--- a/api/src/bracc/routers/public.py
+++ b/api/src/bracc/routers/public.py
@@ -57,12 +57,6 @@ async def public_meta(
    return {
        "product": "World Transparency Graph",
        "mode": "public_safe",
-        "dataset_scope": {
-            "local_default": "demo_local",
-            "ingestion_mode": "byo_ingestion",
-            "reference_metrics": "reference_production_snapshot",
-        },
-        "metrics_as_of_utc": "2026-03-01T23:05:00Z",
        "total_nodes": record["total_nodes"] if record else 0,
        "total_relationships": record["total_relationships"] if record else 0,
        "company_count": record["company_count"] if record else 0,
--- a/api/src/bracc/routers/search.py
+++ b/api/src/bracc/routers/search.py
@@ -61,9 +61,9 @@ async def search_entities(
        {
            "query": _escape_lucene(q),
            "entity_type": type_filter,
-            "hide_person_entities": hide_person_entities,
            "skip": skip,
            "limit": size,
+            "hide_person_entities": hide_person_entities,
        },
    )
    total_record = await execute_query_single(
--- a/api/tests/integration/conftest.py
+++ b/api/tests/integration/conftest.py
@@ -9,6 +9,17 @@ from testcontainers.neo4j import Neo4jContainer
 from bracc.main import app


+def _iter_cypher_statements(path: Path) -> list[str]:
+    # Strip comment-only lines before splitting to avoid dropping statements
+    # that are preceded by section headers.
+    filtered_lines = [
+        line for line in path.read_text().splitlines()
+        if line.strip() and not line.strip().startswith("//")
+    ]
+    text = "\n".join(filtered_lines)
+    return [stmt.strip() for stmt in text.split(";") if stmt.strip()]
+
+
@pytest.fixture(scope="session")
 def neo4j_container() -> Neo4jContainer:  # type: ignore[misc]
    """Start a Neo4j container for integration tests."""
@@ -25,21 +36,34 @@ def neo4j_uri(neo4j_container: Neo4jContainer) -> str:

@pytest.fixture(scope="session")
 def neo4j_auth(neo4j_container: Neo4jContainer) -> tuple[str, str]:
-    return ("neo4j", neo4j_container.NEO4J_ADMIN_PASSWORD)
+    # testcontainers.neo4j API changed: older versions exposed NEO4J_ADMIN_PASSWORD,
+    # newer versions expose username/password attributes.
+    username = getattr(neo4j_container, "username", "neo4j")
+    password = getattr(
+        neo4j_container,
+        "password",
+        getattr(neo4j_container, "NEO4J_ADMIN_PASSWORD", None),
+    )
+    if password is None:
+        msg = "Could not resolve Neo4j testcontainer password"
+        raise RuntimeError(msg)
+    return (username, password)


-@pytest.fixture(scope="session")
+@pytest.fixture
 async def neo4j_driver(
    neo4j_uri: str, neo4j_auth: tuple[str, str]
 ) -> AsyncIterator[AsyncDriver]:
+    # Function-scoped driver avoids loop affinity issues between async tests.
    driver = AsyncGraphDatabase.driver(neo4j_uri, auth=neo4j_auth)
+    async with driver.session() as session:
+        # Keep tests deterministic across function scope by resetting test data.
+        await session.run("MATCH (n) DETACH DELETE n")
    # Apply schema
    schema_path = Path(__file__).parent.parent.parent.parent / "infra" / "neo4j" / "init.cypher"
    if schema_path.exists():
        async with driver.session() as session:
-            for statement in schema_path.read_text().split(";"):
-                stmt = statement.strip()
-                if stmt and not stmt.startswith("//"):
+            for stmt in _iter_cypher_statements(schema_path):
                await session.run(stmt)
    # Seed dev data
    seed_path = (
@@ -47,9 +71,7 @@ async def neo4j_driver(
    )
    if seed_path.exists():
        async with driver.session() as session:
-            for statement in seed_path.read_text().split(";"):
-                stmt = statement.strip()
-                if stmt and not stmt.startswith("//"):
+            for stmt in _iter_cypher_statements(seed_path):
                await session.run(stmt)
    yield driver
    await driver.close()
--- a/api/tests/unit/test_auth.py
+++ b/api/tests/unit/test_auth.py
@@ -34,7 +34,11 @@ def _setup_mock_session(driver: MagicMock, records: list[MagicMock]) -> AsyncMoc


@pytest.mark.anyio
-async def test_register_success(client: AsyncClient) -> None:
+async def test_register_success(client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None:
+    from bracc.config import settings
+
+    monkeypatch.setattr(settings, "invite_code", "")
+
    record = _mock_record({
        "id": "user-uuid",
        "email": "test@example.com",
@@ -56,19 +60,15 @@ async def test_register_success(client: AsyncClient) -> None:


@pytest.mark.anyio
-async def test_register_bad_invite(client: AsyncClient) -> None:
+async def test_register_bad_invite(client: AsyncClient, monkeypatch: pytest.MonkeyPatch) -> None:
    from bracc.config import settings

-    original = settings.invite_code
-    try:
-        settings.invite_code = "secret-code"
+    monkeypatch.setattr(settings, "invite_code", "secret-code")
    response = await client.post(
        "/api/v1/auth/register",
        json={"email": "test@example.com", "password": "password123", "invite_code": "wrong"},
    )
    assert response.status_code == 403
-    finally:
-        settings.invite_code = original


@pytest.mark.anyio
@@ -155,16 +155,25 @@ async def test_me_invalid_token(client: AsyncClient) -> None:


@pytest.mark.anyio
-async def test_register_duplicate_email(client: AsyncClient) -> None:
+async def test_register_duplicate_email(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from neo4j.exceptions import ConstraintError
+
+    from bracc.config import settings
    from bracc.main import app

+    monkeypatch.setattr(settings, "invite_code", "")
+
    driver = app.state.neo4j_driver
    mock_session = AsyncMock()
-    mock_session.run = AsyncMock(side_effect=Exception("Constraint violation"))
+    mock_session.run = AsyncMock(side_effect=ConstraintError("Node already exists"))
    driver.session.return_value.__aenter__ = AsyncMock(return_value=mock_session)

-    with pytest.raises(Exception, match="Constraint violation"):
-        await client.post(
+    response = await client.post(
        "/api/v1/auth/register",
        json={"email": "duplicate@example.com", "password": "password123"},
    )
+    assert response.status_code == 409
+    assert response.json()["detail"] == "Email already registered"
--- a/api/tests/unit/test_auth_service.py
+++ b/api/tests/unit/test_auth_service.py
@@ -61,7 +61,9 @@ def test_decode_access_token_invalid() -> None:


@pytest.mark.anyio
-async def test_register_user_success() -> None:
+async def test_register_user_success(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(settings, "invite_code", "")
+
    mock_record = _mock_record({
        "id": "user-uuid",
        "email": "test@example.com",
@@ -80,15 +82,11 @@ async def test_register_user_success() -> None:


@pytest.mark.anyio
-async def test_register_user_bad_invite() -> None:
-    original = settings.invite_code
-    try:
-        settings.invite_code = "secret-code"
+async def test_register_user_bad_invite(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(settings, "invite_code", "secret-code")
    session = AsyncMock()
    with pytest.raises(ValueError, match="Invalid invite code"):
        await register_user(session, "test@example.com", "password123", "wrong-code")
-    finally:
-        settings.invite_code = original


@pytest.mark.anyio
--- a/api/tests/unit/test_cpf_masking.py
+++ b/api/tests/unit/test_cpf_masking.py
@@ -68,6 +68,28 @@ class TestIsPepRecord:
    def test_cargo_field(self) -> None:
        assert _is_pep_record({"name": "X", "cpf": "11111111111", "cargo": "Deputado"})

+    @pytest.mark.parametrize(
+        "role",
+        [
+            "Deputado Federal",
+            "deputado federal",
+            "DEPUTADO FEDERAL",
+            "Senador da Republica",
+            "senadora da republica",
+            "Vereador Suplente",
+            "Ministro de Estado",
+            "Governadora do Estado de Sao Paulo",
+            "Presidente da Republica",
+        ],
+    )
+    def test_compound_role_detected_as_pep(self, role: str) -> None:
+        """Compound PEP roles like 'deputado federal' must be detected via substring match."""
+        assert _is_pep_record({"name": "X", "cpf": "11111111111", "role": role})
+
+    def test_compound_cargo_detected_as_pep(self) -> None:
+        """Compound PEP cargo like 'Deputado Federal' must be detected via substring match."""
+        assert _is_pep_record({"name": "X", "cpf": "11111111111", "cargo": "Deputado Federal"})
+
    def test_non_pep_role(self) -> None:
        assert not _is_pep_record({"name": "X", "cpf": "11111111111", "role": "assessor"})

@@ -99,6 +121,18 @@ class TestCollectPepCpfs:
        data = {"a": {"b": {"c": [{"cpf": "33333333333", "is_pep": True}]}}}
        assert "33333333333" in _collect_pep_cpfs(data)

+    def test_compound_role_collected(self) -> None:
+        """Compound roles like 'Deputado Federal' must be recognized in the walk."""
+        data = {
+            "results": [
+                {"cpf": "11111111111", "role": "Deputado Federal"},
+                {"cpf": "22222222222", "role": "assessor parlamentar"},
+            ]
+        }
+        peps = _collect_pep_cpfs(data)
+        assert "11111111111" in peps
+        assert "22222222222" not in peps
+

 # ---------------------------------------------------------------------------
 # Unit tests for mask_cpfs_in_json
@@ -205,4 +239,4 @@ async def test_health_not_masked(client: AsyncClient) -> None:
    """Non-CPF JSON responses pass through unchanged."""
    resp = await client.get("/health")
    assert resp.status_code == 200
-    assert resp.json()["status"] == "ok" and "version" in resp.json()
+    assert resp.json() == {"status": "ok"}
--- a/api/tests/unit/test_health.py
+++ b/api/tests/unit/test_health.py
@@ -8,9 +8,7 @@ from httpx import AsyncClient
 async def test_health_returns_ok(client: AsyncClient) -> None:
    response = await client.get("/health")
    assert response.status_code == 200
-    data = response.json()
-    assert data["status"] == "ok"
-    assert "version" in data
+    assert response.json() == {"status": "ok"}
    assert response.headers["x-content-type-options"] == "nosniff"
    assert response.headers["x-frame-options"] == "DENY"
    assert response.headers["referrer-policy"] == "no-referrer"
--- a/api/tests/unit/test_patterns.py
+++ b/api/tests/unit/test_patterns.py
@@ -1,120 +0,0 @@
-from unittest.mock import AsyncMock, patch
-
-import pytest
-from httpx import AsyncClient
-
-from bracc.config import settings
-from bracc.models.pattern import PATTERN_METADATA
-from bracc.services.intelligence_provider import COMMUNITY_PATTERN_IDS, COMMUNITY_PATTERN_QUERIES
-from bracc.services.neo4j_service import CypherLoader
-
-
-@pytest.fixture(autouse=True)
-def _enable_patterns(monkeypatch: pytest.MonkeyPatch) -> None:
-    monkeypatch.setattr(settings, "patterns_enabled", True)
-
-
-def test_all_community_patterns_have_metadata() -> None:
-    for pattern_id in COMMUNITY_PATTERN_IDS:
-        assert pattern_id in PATTERN_METADATA, f"Missing metadata for {pattern_id}"
-
-
-def test_all_community_patterns_have_query_files() -> None:
-    for query_name in COMMUNITY_PATTERN_QUERIES.values():
-        try:
-            CypherLoader.load(query_name)
-        except FileNotFoundError:
-            pytest.fail(f"Missing .cypher file for query {query_name}.cypher")
-        finally:
-            CypherLoader.clear_cache()
-
-
-def test_pattern_metadata_has_required_fields() -> None:
-    for pid, meta in PATTERN_METADATA.items():
-        assert "name_pt" in meta, f"{pid} missing name_pt"
-        assert "name_en" in meta, f"{pid} missing name_en"
-        assert "desc_pt" in meta, f"{pid} missing desc_pt"
-        assert "desc_en" in meta, f"{pid} missing desc_en"
-
-
-@pytest.mark.anyio
-async def test_list_patterns_endpoint(client: AsyncClient) -> None:
-    response = await client.get("/api/v1/patterns/")
-    assert response.status_code == 200
-    data = response.json()
-    assert "patterns" in data
-    assert len(data["patterns"]) == 8
-
-    ids = {row["id"] for row in data["patterns"]}
-    assert ids == set(COMMUNITY_PATTERN_IDS)
-
-
-@pytest.mark.anyio
-async def test_patterns_endpoint_returns_503_when_disabled(
-    client: AsyncClient,
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
-    monkeypatch.setattr(settings, "patterns_enabled", False)
-    response = await client.get("/api/v1/patterns/")
-    assert response.status_code == 503
-    assert "temporarily unavailable" in response.json()["detail"]
-
-
-@pytest.mark.anyio
-async def test_invalid_pattern_returns_404(client: AsyncClient) -> None:
-    response = await client.get("/api/v1/patterns/test-id/nonexistent_pattern")
-    assert response.status_code == 404
-    assert "Pattern not found" in response.json()["detail"]
-
-
-@pytest.mark.anyio
-async def test_patterns_endpoint_forwards_include_probable(client: AsyncClient) -> None:
-    with patch("bracc.routers.patterns.run_all_patterns", new_callable=AsyncMock) as mock_run_all:
-        mock_run_all.return_value = []
-        response = await client.get("/api/v1/patterns/test-id?include_probable=true")
-
-    assert response.status_code == 200
-    mock_run_all.assert_awaited_once()
-    _driver, entity_id, _lang = mock_run_all.await_args.args
-    assert entity_id == "test-id"
-    assert mock_run_all.await_args.kwargs["include_probable"] is True
-
-
-@pytest.mark.anyio
-async def test_specific_pattern_endpoint_forwards_include_probable(client: AsyncClient) -> None:
-    with patch("bracc.routers.patterns.run_pattern", new_callable=AsyncMock) as mock_run_one:
-        mock_run_one.return_value = []
-        response = await client.get(
-            "/api/v1/patterns/test-id/debtor_contracts?include_probable=true",
-        )
-
-    assert response.status_code == 200
-    mock_run_one.assert_awaited_once()
-    _session, pattern_name, entity_id, _lang = mock_run_one.await_args.args
-    assert pattern_name == "debtor_contracts"
-    assert entity_id == "test-id"
-    assert mock_run_one.await_args.kwargs["include_probable"] is True
-
-
-def test_community_queries_use_bind_params() -> None:
-    for query_name in COMMUNITY_PATTERN_QUERIES.values():
-        try:
-            cypher = CypherLoader.load(query_name)
-        finally:
-            CypherLoader.clear_cache()
-        assert "$company_id" in cypher, f"{query_name}.cypher missing $company_id"
-        assert "$company_identifier" in cypher, f"{query_name}.cypher missing $company_identifier"
-        assert "$company_identifier_formatted" in cypher, (
-            f"{query_name}.cypher missing $company_identifier_formatted"
-        )
-        assert "${" not in cypher, f"{query_name}.cypher uses unsafe string interpolation"
-
-
-def test_no_banned_words_in_pattern_metadata() -> None:
-    banned = {"suspicious", "corrupt", "criminal", "fraudulent", "illegal", "guilty"}
-    for pid, meta in PATTERN_METADATA.items():
-        for key, value in meta.items():
-            for word in banned:
-                assert word not in value.lower(), (
-                    f"Banned word '{word}' in {pid}.{key}: {value}"
-                )
--- a/api/tests/unit/test_patterns_new.py
+++ b/api/tests/unit/test_patterns_new.py
@@ -1,79 +0,0 @@
-"""Community public-safe pattern registry and query contract tests."""
-
-import pytest
-
-from bracc.models.pattern import PATTERN_METADATA
-from bracc.services.intelligence_provider import COMMUNITY_PATTERN_IDS, COMMUNITY_PATTERN_QUERIES
-from bracc.services.neo4j_service import CypherLoader
-
-
-def test_community_pattern_registry_exact_ids() -> None:
-    assert len(COMMUNITY_PATTERN_IDS) == 8
-    assert set(COMMUNITY_PATTERN_IDS) == {
-        "sanctioned_still_receiving",
-        "amendment_beneficiary_contracts",
-        "split_contracts_below_threshold",
-        "contract_concentration",
-        "embargoed_receiving",
-        "debtor_contracts",
-        "srp_multi_org_hitchhiking",
-        "inexigibility_recurrence",
-    }
-
-
-def test_community_pattern_query_mapping_is_complete() -> None:
-    assert set(COMMUNITY_PATTERN_QUERIES.keys()) == set(COMMUNITY_PATTERN_IDS)
-    for query_name in COMMUNITY_PATTERN_QUERIES.values():
-        assert query_name.startswith("public_pattern_")
-
-
-@pytest.mark.parametrize("query_name", COMMUNITY_PATTERN_QUERIES.values())
-def test_public_pattern_query_files_load(query_name: str) -> None:
-    try:
-        CypherLoader.load(query_name)
-    finally:
-        CypherLoader.clear_cache()
-
-
-@pytest.mark.parametrize("query_name", COMMUNITY_PATTERN_QUERIES.values())
-def test_public_pattern_query_required_return_aliases(query_name: str) -> None:
-    try:
-        cypher = CypherLoader.load(query_name)
-    finally:
-        CypherLoader.clear_cache()
-
-    for required_alias in (
-        " AS pattern_id",
-        " AS risk_signal",
-        " AS amount_total",
-        " AS window_start",
-        " AS window_end",
-        " AS evidence_refs",
-        " AS evidence_count",
-    ):
-        assert required_alias in cypher, f"{query_name}.cypher missing alias: {required_alias}"
-
-
-@pytest.mark.parametrize("pattern_id", COMMUNITY_PATTERN_IDS)
-def test_community_pattern_metadata_is_present(pattern_id: str) -> None:
-    meta = PATTERN_METADATA.get(pattern_id)
-    assert meta is not None
-    assert meta.get("name_pt")
-    assert meta.get("name_en")
-    assert meta.get("desc_pt")
-    assert meta.get("desc_en")
-
-
-def test_threshold_params_used_in_threshold_patterns() -> None:
-    query_params = {
-        "public_pattern_split_contracts_below_threshold": "$pattern_split_threshold_value",
-        "public_pattern_contract_concentration": "$pattern_share_threshold",
-        "public_pattern_srp_multi_org_hitchhiking": "$pattern_srp_min_orgs",
-        "public_pattern_inexigibility_recurrence": "$pattern_inexig_min_recurrence",
-    }
-    for query_name, required_param in query_params.items():
-        try:
-            cypher = CypherLoader.load(query_name)
-        finally:
-            CypherLoader.clear_cache()
-        assert required_param in cypher, f"{query_name}.cypher missing {required_param}"
--- a/api/tests/unit/test_public_mode.py
+++ b/api/tests/unit/test_public_mode.py
@@ -225,6 +225,135 @@ async def test_public_graph_company_filters_person_nodes(client: AsyncClient) ->
    assert len(payload["edges"]) == 0


+@pytest.mark.anyio
+async def test_baseline_disabled_in_public_mode(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings, "public_mode", True)
+    monkeypatch.setattr(settings, "public_allow_entity_lookup", False)
+    response = await client.get("/api/v1/baseline/test-id")
+    assert response.status_code == 403
+    assert "disabled in public mode" in response.json()["detail"]
+
+
+@pytest.mark.anyio
+async def test_stats_hides_person_count_in_public_mode(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings, "public_mode", True)
+    monkeypatch.setattr(settings, "public_allow_person", False)
+    # Clear stats cache to ensure fresh computation
+    import bracc.routers.meta as meta_mod
+    monkeypatch.setattr(meta_mod, "_stats_cache", None)
+
+    fake_record = {
+        "total_nodes": 100,
+        "total_relationships": 200,
+        "person_count": 999,
+        "company_count": 50,
+        "health_count": 10,
+        "finance_count": 5,
+        "contract_count": 20,
+        "sanction_count": 3,
+        "election_count": 7,
+        "amendment_count": 4,
+        "embargo_count": 2,
+        "education_count": 6,
+        "convenio_count": 8,
+        "laborstats_count": 9,
+        "offshore_entity_count": 1,
+        "offshore_officer_count": 2,
+        "global_pep_count": 3,
+        "cvm_proceeding_count": 4,
+        "expense_count": 11,
+        "pep_record_count": 12,
+        "expulsion_count": 13,
+        "leniency_count": 14,
+        "international_sanction_count": 15,
+        "gov_card_expense_count": 16,
+        "gov_travel_count": 17,
+        "bid_count": 18,
+        "fund_count": 19,
+        "dou_act_count": 20,
+        "tax_waiver_count": 21,
+        "municipal_finance_count": 22,
+        "declared_asset_count": 23,
+        "party_membership_count": 24,
+        "barred_ngo_count": 25,
+        "bcb_penalty_count": 26,
+        "labor_movement_count": 27,
+        "legal_case_count": 28,
+        "judicial_case_count": 29,
+        "source_document_count": 30,
+        "ingestion_run_count": 31,
+        "temporal_violation_count": 32,
+        "cpi_count": 33,
+        "inquiry_requirement_count": 34,
+        "inquiry_session_count": 35,
+        "municipal_bid_count": 36,
+        "municipal_contract_count": 37,
+        "municipal_gazette_act_count": 38,
+    }
+    with patch(
+        "bracc.routers.meta.execute_query_single",
+        new_callable=AsyncMock,
+        return_value=fake_record,
+    ), patch(
+        "bracc.routers.meta.load_source_registry",
+        return_value=[],
+    ), patch(
+        "bracc.routers.meta.source_registry_summary",
+        return_value={
+            "universe_v1_sources": 0,
+            "implemented_sources": 0,
+            "loaded_sources": 0,
+            "healthy_sources": 0,
+            "stale_sources": 0,
+            "blocked_external_sources": 0,
+            "quality_fail_sources": 0,
+            "discovered_uningested_sources": 0,
+        },
+    ):
+        response = await client.get("/api/v1/meta/stats")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert payload["person_count"] == 0
+    assert payload["company_count"] == 50  # non-person counts preserved
+
+
+@pytest.mark.anyio
+async def test_timeline_sanitizes_properties_in_public_mode(
+    client: AsyncClient,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings, "public_mode", True)
+    monkeypatch.setattr(settings, "public_allow_entity_lookup", True)
+    mock_records = [
+        {
+            "lbls": ["Contract"],
+            "props": {"type": "licitacao", "cpf": "12345678900", "value": 50000.0},
+            "event_date": "2024-01-15",
+            "id": "evt-1",
+        },
+    ]
+    with patch(
+        "bracc.routers.entity.execute_query",
+        new_callable=AsyncMock,
+        return_value=mock_records,
+    ):
+        response = await client.get("/api/v1/entity/test-id/timeline")
+
+    assert response.status_code == 200
+    payload = response.json()
+    assert len(payload["events"]) == 1
+    event_props = payload["events"][0]["properties"]
+    assert "cpf" not in event_props
+    assert event_props["value"] == 50000.0
+
+
@pytest.mark.anyio
 async def test_investigations_disabled_in_public_mode(
    client: AsyncClient,
--- a/api/tests/unit/test_rate_limit.py
+++ b/api/tests/unit/test_rate_limit.py
@@ -1,24 +1,15 @@
 from unittest.mock import MagicMock

-from bracc.config import settings
 from bracc.middleware.rate_limit import _get_rate_limit_key, limiter
 from bracc.services.auth_service import create_access_token


-def _make_request(
-    auth_header: str | None = None,
-    client_ip: str = "127.0.0.1",
-    cookie_token: str | None = None,
-    x_forwarded_for: str | None = None,
-) -> MagicMock:
+def _make_request(auth_header: str | None = None, client_ip: str = "127.0.0.1") -> MagicMock:
    request = MagicMock()
    headers: dict[str, str] = {}
    if auth_header:
        headers["authorization"] = auth_header
-    if x_forwarded_for:
-        headers["x-forwarded-for"] = x_forwarded_for
    request.headers = headers
-    request.cookies = {settings.auth_cookie_name: cookie_token} if cookie_token else {}
    request.client = MagicMock()
    request.client.host = client_ip
    return request
@@ -43,23 +34,5 @@ def test_key_func_invalid_token_fallback() -> None:
    assert key == "10.0.0.1"


-def test_key_func_extracts_user_from_cookie_token() -> None:
-    token = create_access_token("cookie-user-1")
-    request = _make_request(cookie_token=token)
-    key = _get_rate_limit_key(request)
-    assert key == "user:cookie-user-1"
-
-
-def test_key_func_uses_forwarded_ip_when_enabled() -> None:
-    original = settings.trust_proxy_headers
-    try:
-        settings.trust_proxy_headers = True
-        request = _make_request(client_ip="127.0.0.1", x_forwarded_for="203.0.113.9, 10.0.0.4")
-        key = _get_rate_limit_key(request)
-        assert key == "203.0.113.9"
-    finally:
-        settings.trust_proxy_headers = original
-
-
 def test_limiter_instance_exists() -> None:
    assert limiter is not None
--- a/api/tests/unit/test_search.py
+++ b/api/tests/unit/test_search.py
@@ -1,21 +1,6 @@
 import pytest
 from httpx import AsyncClient

-from bracc.routers.search import _escape_lucene
-
-
-def test_escape_lucene_cnpj() -> None:
-    assert _escape_lucene("00.000.000/0001-00") == "00.000.000\\/0001\\-00"
-
-
-def test_escape_lucene_plain_text() -> None:
-    assert _escape_lucene("silva construcoes") == "silva construcoes"
-
-
-def test_escape_lucene_all_special_chars() -> None:
-    for ch in r'+-&|!(){}[]^"~*?:\/':
-        assert f"\\{ch}" in _escape_lucene(ch)
-

@pytest.mark.anyio
 async def test_search_rejects_short_query(client: AsyncClient) -> None:
--- a/api/uv.lock
+++ b/api/uv.lock
@@ -103,6 +103,56 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" },
 ]

+[[package]]
+name = "bracc-api"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+    { name = "bcrypt" },
+    { name = "fastapi" },
+    { name = "jinja2" },
+    { name = "neo4j" },
+    { name = "pydantic" },
+    { name = "pydantic-settings" },
+    { name = "pyjwt", extra = ["crypto"] },
+    { name = "python-multipart" },
+    { name = "slowapi" },
+    { name = "uvicorn", extra = ["standard"] },
+    { name = "weasyprint" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "httpx" },
+    { name = "mypy" },
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+    { name = "ruff" },
+    { name = "testcontainers", extra = ["neo4j"] },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "bcrypt", specifier = ">=4.0.0" },
+    { name = "fastapi", specifier = ">=0.115.0" },
+    { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },
+    { name = "jinja2", specifier = ">=3.1.0" },
+    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.14.0" },
+    { name = "neo4j", specifier = ">=5.27.0" },
+    { name = "pydantic", specifier = ">=2.10.0" },
+    { name = "pydantic-settings", specifier = ">=2.7.0" },
+    { name = "pyjwt", extras = ["crypto"], specifier = ">=2.9.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" },
+    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
+    { name = "python-multipart", specifier = ">=0.0.18" },
+    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" },
+    { name = "slowapi", specifier = ">=0.1.9" },
+    { name = "testcontainers", extras = ["neo4j"], marker = "extra == 'dev'", specifier = ">=4.0" },
+    { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
+    { name = "weasyprint", specifier = ">=62.0" },
+]
+provides-extras = ["dev"]
+
 [[package]]
 name = "brotli"
 version = "1.2.0"
@@ -523,56 +573,6 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
 ]

-[[package]]
-name = "bracc-api"
-version = "0.1.0"
-source = { editable = "." }
-dependencies = [
-    { name = "bcrypt" },
-    { name = "fastapi" },
-    { name = "jinja2" },
-    { name = "neo4j" },
-    { name = "pydantic" },
-    { name = "pydantic-settings" },
-    { name = "pyjwt", extra = ["crypto"] },
-    { name = "python-multipart" },
-    { name = "slowapi" },
-    { name = "uvicorn", extra = ["standard"] },
-    { name = "weasyprint" },
-]
-
-[package.optional-dependencies]
-dev = [
-    { name = "httpx" },
-    { name = "mypy" },
-    { name = "pytest" },
-    { name = "pytest-asyncio" },
-    { name = "ruff" },
-    { name = "testcontainers", extra = ["neo4j"] },
-]
-
-[package.metadata]
-requires-dist = [
-    { name = "bcrypt", specifier = ">=4.0.0" },
-    { name = "fastapi", specifier = ">=0.115.0" },
-    { name = "httpx", marker = "extra == 'dev'", specifier = ">=0.28.0" },
-    { name = "jinja2", specifier = ">=3.1.0" },
-    { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.14.0" },
-    { name = "neo4j", specifier = ">=5.27.0" },
-    { name = "pydantic", specifier = ">=2.10.0" },
-    { name = "pydantic-settings", specifier = ">=2.7.0" },
-    { name = "pyjwt", extras = ["crypto"], specifier = ">=2.9.0" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0" },
-    { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.24.0" },
-    { name = "python-multipart", specifier = ">=0.0.18" },
-    { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.9.0" },
-    { name = "slowapi", specifier = ">=0.1.9" },
-    { name = "testcontainers", extras = ["neo4j"], marker = "extra == 'dev'", specifier = ">=4.0" },
-    { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
-    { name = "weasyprint", specifier = ">=62.0" },
-]
-provides-extras = ["dev"]
-
 [[package]]
 name = "idna"
 version = "3.11"
--- a/data/.gitkeep
+++ b/data/.gitkeep
--- a/data/cnpj/extracted/.gitkeep
+++ b/data/cnpj/extracted/.gitkeep
--- a/data/cnpj/raw/.gitkeep
+++ b/data/cnpj/raw/.gitkeep
--- a/data/cnpj/reference/.gitkeep
+++ b/data/cnpj/reference/.gitkeep
--- a/docs/brand/bracc-header.png
+++ b/docs/brand/bracc-header.png
--- a/docs/brand/wtg-header.png
+++ b/docs/brand/wtg-header.png
--- a/docs/data-sources.md
+++ b/docs/data-sources.md
@@ -1,24 +1,13 @@
-# BRACC Data Source Catalog
+# ICARUS Data Source Catalog

-<!-- SOURCE_SUMMARY_START -->
-**Generated from `docs/source_registry_br_v1.csv` (as-of UTC: 2026-03-01T23:05:00Z)**
-
- Universe v1 sources: 108
- Implemented pipelines: 45
- Loaded sources (load_state=loaded): 36
- Partial sources (load_state=partial): 8
- Not loaded sources (load_state=not_loaded): 64
- Status counts: loaded=36, partial=5, stale=3, blocked_external=1, not_built=63
-<!-- SOURCE_SUMMARY_END -->
-
-Catalog note: counts and status labels are generated from the public registry (`docs/source_registry_br_v1.csv`).
-This document includes reference production inventory context and backlog discovery; it is not a guarantee that every listed source is currently loaded in your local environment.
+**38 loaded | 3 pipelines pending data | 60+ not yet built**
+Last updated: 2026-02-26

 ---

-## 1. Reference Production Snapshot (Loaded/Implemented Inventory)
+## 1. LOADED (38 sources)

-The table below is a timestamped reference snapshot and should be interpreted together with the generated summary block above.
+All sources below have working ETL pipelines in `etl/src/icarus_etl/pipelines/` and are loaded into production Neo4j.

 | # | Source | Pipeline | Nodes Created | Rels Created | Notes |
 |---|--------|----------|---------------|--------------|-------|
--- a/docs/demo/dataset-contract.md
+++ b/docs/demo/dataset-contract.md
@@ -1,29 +0,0 @@
-# Demo Dataset Contract (WTG Open)
-
-## Objective
-Provide a reproducible, public-safe demo graph with synthetic records only.
-
-## Safety rules
- Synthetic data only. No real CPF, no real personal names, no real personal addresses.
- Company identifiers may use synthetic CNPJ-like values reserved for demonstration.
- Demo graph cannot include `Person` or `Partner` labels.
- Demo exports must never include private or operational metadata.
-
-## Required files
- `data/demo/synthetic_graph.json`
- `data/demo/README.md`
- `scripts/generate_demo_dataset.py`
-
-## JSON schema (minimum)
- `nodes[]`: `{id, label, type, properties}`
- `edges[]`: `{id, source, target, type, properties}`
- `meta`: `{generated_at_utc, generator_version, source: "synthetic"}`
-
-## Acceptance checks
- No field name contains `cpf`, `doc_partial`, or `doc_raw`.
- No node label equals `Person` or `Partner`.
- CI privacy gate passes.
-
-## Runtime target
- Dedicated demo Neo4j instance (non-production).
- Public API served with `PUBLIC_MODE=true`.
--- a/docs/release/community_announcement_template.md
+++ b/docs/release/community_announcement_template.md
@@ -14,7 +14,6 @@ Resumo:
 Release notes: {release_url}

 Observação de integridade: os sinais refletem coocorrências em bases públicas e não constituem prova legal.
-Divulgação obrigatória: o repositório público entrega engine + demo + fluxo BYO-data; métricas de escala são snapshots de referência com timestamp.

 ## Short post (EN)

@@ -28,7 +27,6 @@ Summary:
 Release notes: {release_url}

 Integrity note: signals reflect co-occurrence in public records and are not legal proof.
-Mandatory disclosure: the public repo ships engine + demo + BYO-data workflow; production-scale metrics are timestamped reference snapshots.

 ## Discord/Telegram long form (PT+EN)

@@ -44,11 +42,6 @@ Mandatory disclosure: the public repo ships engine + demo + BYO-data workflow; p
 **Compatibilidade**
 - {pt_compat}

-**Reproducibility Reality Check**
- Funciona agora: {pt_works_now}
- Requer ingestão de dados: {pt_requires_ingestion}
- Não incluído por padrão: {pt_not_included}
-
 **Link**
 - {release_url}

@@ -64,10 +57,5 @@ Mandatory disclosure: the public repo ships engine + demo + BYO-data workflow; p
 **Compatibility**
 - {en_compat}

-**Reproducibility Reality Check**
- Works now: {en_works_now}
- Requires data ingestion: {en_requires_ingestion}
- Not included by default: {en_not_included}
-
 **Link**
 - {release_url}
--- a/docs/release/public_boundary_matrix.csv
+++ b/docs/release/public_boundary_matrix.csv
@@ -7,8 +7,8 @@ docs/**,PUBLIC with review,Keep public documentation and legal pack,include revi
 .github/workflows/**,PUBLIC,CI and security transparency,include
 scripts/**,PUBLIC with review,Keep public utilities and gates,include reviewed subset
 data/demo/**,PUBLIC,Synthetic demo dataset only,include
-api/src/bracc/services/pattern_service.py,REMOVE_FROM_PUBLIC,Pattern engine disabled pending validation,exclude
-api/src/bracc/queries/pattern_*.cypher,REMOVE_FROM_PUBLIC,Pattern query engine disabled pending validation,exclude
+api/src/icarus/services/pattern_service.py,REMOVE_FROM_PUBLIC,Pattern engine disabled pending validation,exclude
+api/src/icarus/queries/pattern_*.cypher,REMOVE_FROM_PUBLIC,Pattern query engine disabled pending validation,exclude
 scripts/auto_finalize_pncp_backfill.sh,REMOVE_FROM_PUBLIC,Production operational script with server-specific assumptions,exclude
 docs/shadow_rollout_runbook.md,REMOVE_FROM_PUBLIC,Production operational runbook details,exclude
 docs/ingestion_priority_runbook.md,REMOVE_FROM_PUBLIC,Production operational runbook details,exclude
--- a/docs/release/public_repo_release_checklist.md
+++ b/docs/release/public_repo_release_checklist.md
@@ -1,56 +1,78 @@
-# Public Repo Release Checklist — `World-Open-Graph/br-acc`
-
-## 1) Pre-release gate
-
-1. Confirm target merge commit exists on `main`.
-2. Confirm CI + Security + Public gates are green on that commit.
-3. Confirm PR is merged with exactly one release label.
-
-## 2) Public boundary checks
+# Public Repo Release Checklist — World Transparency Graph

+## 1) Prepare sanitized snapshot
 ```bash
-python scripts/check_public_privacy.py --repo-root .
-python scripts/check_compliance_pack.py --repo-root .
-python scripts/check_open_core_boundary.py --repo-root .
+bash scripts/prepare_public_snapshot.sh /Users/brunoclz/CORRUPTOS /tmp/world-transparency-graph-public
 ```

-Expected: all `PASS`.
-
-## 3) Snapshot hygiene (optional verification)
-
+## 2) Initialize clean-history repo from snapshot
 ```bash
-bash scripts/prepare_public_snapshot.sh . /tmp/br-acc-public
-python /tmp/br-acc-public/scripts/check_public_privacy.py --repo-root /tmp/br-acc-public
-python /tmp/br-acc-public/scripts/check_compliance_pack.py --repo-root /tmp/br-acc-public
-python /tmp/br-acc-public/scripts/check_open_core_boundary.py --repo-root /tmp/br-acc-public
+cd /tmp/world-transparency-graph-public
+git init
+git add .
+git commit -m "Initial public release (WTG)"
 ```

-Expected in snapshot:
+## 3) Create GitHub repository (manual)
+- Owner: `brunoclz`
+- Name: `world-transparency-graph`
+- Visibility: Public
+- Do not auto-add README/License (already present)

- No `CLAUDE.md`.
- No `AGENTS.md` or `AGENTS*.md`.
- No private operational runbooks outside public scope.
+## 4) Push initial release
+```bash
+git branch -M main
+git remote add origin https://github.com/brunoclz/world-transparency-graph.git
+git push -u origin main
+```

-## 4) Publish release (manual workflow)
+## 5) Configure branch protection (GitHub UI)
+Require all checks:
+- `API (Python)`
+- `ETL (Python)`
+- `Frontend (TypeScript)`
+- `Neutrality Audit`
+- `Gitleaks`
+- `Bandit (Python)`
+- `Pip Audit (Python deps)`
+- `Public Privacy Gate`
+- `Compliance Pack Gate`
+- `Public Boundary Gate`

-In GitHub Actions, run **Publish Release** with:
+## 6) Configure environment defaults
+- Set public deployment environment vars:
+  - `PRODUCT_TIER=community`
+  - `PUBLIC_MODE=true`
+  - `PUBLIC_ALLOW_PERSON=false`
+  - `PUBLIC_ALLOW_ENTITY_LOOKUP=false`
+  - `PUBLIC_ALLOW_INVESTIGATIONS=false`
+  - `PATTERNS_ENABLED=false`
+  - `VITE_PUBLIC_MODE=true`
+  - `VITE_PATTERNS_ENABLED=false`

- `version`: SemVer tag (e.g. `v0.3.0`, `v0.3.1-rc.1`)
- `target_sha`: merge commit on `main`
- `prerelease`: `false` (stable) or `true` (RC)
- `title_pt`: release title PT-BR
- `title_en`: release title EN
+## 7) Final checks before launch
+- `python scripts/check_public_privacy.py --repo-root .` => `PASS`
+- `python scripts/check_compliance_pack.py --repo-root .` => `PASS`
+- `python scripts/check_open_core_boundary.py --repo-root .` => `PASS`
+- Confirm no internal runbooks in public repo
+- Confirm demo data is synthetic (`data/demo/synthetic_graph.json`)
+- Confirm all legal docs exist in root:
+  - `ETHICS.md`
+  - `LGPD.md`
+  - `PRIVACY.md`
+  - `TERMS.md`
+  - `DISCLAIMER.md`
+  - `SECURITY.md`
+  - `ABUSE_RESPONSE.md`

-## 5) Verify outputs
+## 8) Launch communication split
+- Publish product announcement as **WTG**
+- Publish movement announcement as **BRCC**
+- Mention methodology limits and non-accusatory policy

-1. Tag exists in repository.
-2. Release page published under `/releases`.
-3. Notes include PT+EN and non-accusatory disclaimer.
-4. `release_manifest.json` asset is attached.
-5. Compare link is valid (`previous_tag...new_tag`).
-
-## 6) Community communication
-
-1. Use `docs/release/community_announcement_template.md`.
-2. Publish short PT+EN summary with release URL.
-3. Keep wording factual: “signals/co-occurrence”, never accusatory language.
+## 9) Release system bootstrap
+- Ensure `.github/release.yml` exists for auto-notes categories.
+- Ensure `.github/release-drafter.yml` + workflow are active.
+- Ensure `publish-release.yml` workflow is present and dispatchable.
+- Ensure release label taxonomy is documented and applied to PRs.
+- Publish first policy-compliant tag from this stream (`v0.3.0`).
--- a/docs/release/release_policy.md
+++ b/docs/release/release_policy.md
@@ -48,11 +48,10 @@ A release can only be published from a commit on `main` where all required gates
 Every release must include PT-BR and EN sections with:

 1. Scope summary.
-2. Notable changes (explicit bullet points).
-3. Included pattern IDs when release contains pattern/signal changes.
-4. Compatibility/breaking notes.
-5. Privacy/compliance notes when applicable.
-6. Non-accusatory disclaimer.
+2. Notable changes.
+3. Compatibility/breaking notes.
+4. Privacy/compliance notes when applicable.
+5. Non-accusatory disclaimer.

 ## Artifacts

--- a/docs/release/release_runbook.md
+++ b/docs/release/release_runbook.md
@@ -37,19 +37,6 @@ For validation cycles use RC:
 - `prerelease`: `true` for RC, `false` for stable
 - `title_pt`: short PT-BR title
 - `title_en`: short EN title
- `highlights_pt`: PT highlights separated by `|`
- `highlights_en`: EN highlights separated by `|`
- `patterns_included`: comma-separated pattern IDs (use `none` when not applicable)
- `technical_changes_pt`: PT technical changes separated by `|`
- `technical_changes_en`: EN technical changes separated by `|`
-
-Example inputs for a pattern release:
-
- `highlights_pt`: `Port de 8 padrões públicos factuais | Padronização de payload público`
- `highlights_en`: `Port of 8 factual public-safe patterns | Public payload standardization`
- `patterns_included`: `sanctioned_still_receiving,amendment_beneficiary_contracts,split_contracts_below_threshold,contract_concentration,embargoed_receiving,debtor_contracts,srp_multi_org_hitchhiking,inexigibility_recurrence`
- `technical_changes_pt`: `Provider community de 4 para 8 padrões | ETL criou relação Contract-REFERENTE_A-Bid`
- `technical_changes_en`: `Community provider expanded from 4 to 8 patterns | ETL created Contract-REFERENTE_A-Bid linkage`

 ## 4) Workflow validations performed

@@ -65,7 +52,7 @@ The workflow blocks publication when:
 On success the workflow:

 1. Creates and pushes an annotated tag.
-2. Creates GitHub Release (PT+EN notes) with explicit highlights, patterns, and technical changes.
+2. Creates GitHub Release (PT+EN notes).
 3. Uploads `release_manifest.json` asset.

 ## 6) Post-release checklist
@@ -73,7 +60,6 @@ On success the workflow:
 1. Open the release page and confirm:
 - version tag is correct,
 - PT+EN notes are present,
- included patterns are explicitly listed (or marked as none),
 - non-accusatory disclaimer line is present,
 - `release_manifest.json` is attached.
 2. Share release link in community channels.
--- a/docs/source_onboarding_contract.md
+++ b/docs/source_onboarding_contract.md
@@ -1,67 +0,0 @@
-# Source Onboarding Contract (Brazil Coverage v1)
-
-This contract is mandatory for every new source before `shadow -> promote`.
-
-## 1. Source Identity
- `source_id`:
- `name`:
- `category`:
- `tier`:
- `owner_agent`:
- `primary_url`:
- `access_mode` (`file|api|bigquery|web`):
- `public_access_mode` (`open|open_with_rate_limit|registration|credentialed_public`):
- `discovery_status` (`discovered|discovered_uningested|monitored|unreachable`):
- `last_seen_url`:
- `cadence_expected`:
- `cadence_observed`:
- `quality_status` (`healthy|stale|quality_fail|blocked_external|not_built|partial|loaded`):
-
-## 2. Access and Legal
- Credential required:
- Secret name/path:
- License or usage restriction:
- LGPD/privacy considerations:
- `blocked_external` criteria:
-
-## 3. Data Contract
- Downloader script: `etl/scripts/download_<source>.py`
- Canonical output files:
- Manifest file:
- Manifest mandatory fields (`run_id`, `source_id`, `window_start`, `window_end`, `rows`, `error`, `checksum`, `retrieved_at_utc`):
- Update cadence:
- Expected row volume:
- Partition/window strategy:
-
-## 4. Graph Contract
- Node labels introduced:
- Relationship types introduced:
- Natural key(s) per node:
- Merge key strategy:
- Relationship quality tier (`strong|probable`):
- Provenance fields (`method`, `confidence`, `source_ref`, `run_id`):
-
-## 5. Index and Constraint Contract
- Required uniqueness constraints:
- Required date indexes:
- Required lookup indexes:
- Required fulltext indexes (if text-heavy):
-
-## 6. Quality Gates (Hard Stop/Go)
- Identity integrity preserved (`Person.cpf` masked = 0, 14-digit = 0):
- Freshness SLA threshold:
- Temporal sanity (`<= now + 365d`):
- Null/duplicate key thresholds:
- Mandatory non-zero nodes/rels:
-
-## 7. Operational Flow
- Shadow load command:
- Gate runner commands:
- API smoke checks:
- Promote command:
- Rollback command:
-
-## 8. Acceptance
- Evidence bundle path in `audit-results/`:
- Final status: `resolved | resolved_full | blocked_external | quality_fail`
- Reviewer sign-off:
--- a/docs/source_registry_br_v1.csv
+++ b/docs/source_registry_br_v1.csv
@@ -1,109 +1,109 @@
-source_id,name,category,tier,status,implementation_state,load_state,frequency,in_universe_v1,primary_url,pipeline_id,owner_agent,access_mode,notes,public_access_mode,discovery_status,last_seen_url,cadence_expected,cadence_observed,quality_status,last_verified_utc,verification_status
-cnpj,Receita Federal CNPJ,identity,P0,loaded,implemented,loaded,monthly,true,https://dadosabertos.rfb.gov.br/CNPJ/,cnpj,Agent A,file,http://dadosabertos.rfb.gov.br,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,transient_error
-tse,TSE elections and donations,electoral,P0,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/,tse,Agent E,file,Core electoral data loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-transparencia,Portal da Transparencia contracts,contracts,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados,transparencia,Agent C,file,Federal contracts and servants,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-sanctions,CEIS CNEP sanctions,sanctions,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/sancoes/consulta,sanctions,Agent C,file,Administrative sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-pep_cgu,CGU PEP list,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/pep,pep_cgu,Agent A,file,PEP baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-bndes,BNDES financings,finance,P1,loaded,implemented,loaded,monthly,true,https://www.bndes.gov.br/wps/portal/site/home/transparencia/dados,bndes,Agent G,file,Loan relationships,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-pgfn,PGFN divida ativa,fiscal,P0,loaded,implemented,loaded,monthly,true,https://www.regularize.pgfn.gov.br/dados-abertos,pgfn,Agent C,file,Debt risk core,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-ibama,IBAMA embargos,environment,P1,loaded,implemented,loaded,monthly,true,https://servicos.ibama.gov.br/ctf/publico/areasembargadas/,ibama,Agent F,file,Environmental enforcement,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-comprasnet,ComprasNet contracts,contracts,P0,stale,implemented,partial,monthly,true,https://dados.gov.br/dados/conjuntos-dados/comprasnet-contratos,comprasnet,Agent C,file,Needs freshness backfill,,monitored,,,,stale,2026-03-01T23:11:31.444615+00:00,ok
-tcu,TCU sanctions,audit,P1,loaded,implemented,loaded,monthly,true,https://contas.tcu.gov.br/ords/f?p=INIDONEAS:INIDONEAS,tcu,Agent C,file,Inidoneidade sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-transferegov,TransfereGov emendas e convenios,transfers,P0,loaded,implemented,loaded,monthly,true,https://www.transferegov.sistema.gov.br/portal/download-de-dados,transferegov,Agent C,file,Transfer relationships,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,transient_error
-rais,RAIS aggregated labor,labor,P1,loaded,implemented,loaded,annual,true,https://basedosdados.org/dataset/br-me-rais,rais,Agent H,bigquery,Aggregate mode only,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-inep,INEP school census,education,P2,loaded,implemented,loaded,annual,true,https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/censo-escolar,inep,Agent H,file,Education coverage,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-dou,Diario Oficial da Uniao,gazette,P0,loaded,implemented,loaded,daily,true,https://www.in.gov.br/leiturajornal,dou,Agent E,bigquery,National acts ingestion,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-datasus,DATASUS CNES,health,P1,loaded,implemented,loaded,monthly,true,https://opendatasus.saude.gov.br/,datasus,Agent H,file,Health establishments,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-icij,ICIJ offshore leaks,offshore,P1,loaded,implemented,loaded,yearly,true,https://offshoreleaks.icij.org/pages/database,icij,Agent G,file,Offshore entities and officers,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-opensanctions,OpenSanctions global PEP,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.opensanctions.org/datasets/peps/,opensanctions,Agent G,file,Global PEP matching,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cvm,CVM proceedings,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/,cvm,Agent G,file,Proceedings loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cvm_funds,CVM fund registry,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/dados/FI/,cvm_funds,Agent G,file,Fund baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-camara,Camara CEAP expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.camara.leg.br/,camara,Agent E,api,Expense reimbursement,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-camara_inquiries,Camara inquiries and requirements,legislative,P0,partial,implemented,partial,daily,true,https://dadosabertos.camara.leg.br/,camara_inquiries,Agent E,api,Sessions still low,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-senado,Senado CEAPS expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://www12.senado.leg.br/dados-abertos,senado,Agent E,api,Expense data loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-ceaf,CEAF expelled servants,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/ceaf,ceaf,Agent A,file,Expulsion evidence,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cepim,CEPIM barred NGOs,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cepim,cepim,Agent A,file,NGO restrictions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-cpgf,CPGF gov card expenses,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cpgf,cpgf,Agent H,file,Masked CPF source,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-leniency,Acordos de leniencia,integrity,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/acordos-leniencia,leniency,Agent A,file,High signal low volume,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-ofac,OFAC sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://home.treasury.gov/policy-issues/financial-sanctions/sdn-list-data-files,ofac,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-holdings,Brasil IO holdings,ownership,P1,loaded,implemented,loaded,monthly,true,https://brasil.io/dataset/socios-brasil/,holdings,Agent G,file,Ownership enrichment,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-viagens,Viagens a servico,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/viagens,viagens,Agent H,file,Travel spend baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-siop,SIOP emendas,budget,P0,partial,implemented,partial,annual,true,https://www.siop.planejamento.gov.br/,siop,Agent C,api,Author linkage limited,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-pncp,PNCP bids and contracts,contracts,P0,stale,implemented,partial,monthly,true,https://pncp.gov.br/api/consulta/v1/contratacoes/publicacao,pncp,Agent C,api,Freshness SLA pending,,monitored,,,,stale,2026-03-01T23:11:31.444615+00:00,transient_error
-renuncias,Renuncias fiscais,fiscal,P1,loaded,implemented,loaded,annual,true,https://www.gov.br/receitafederal/pt-br/acesso-a-informacao/dados-abertos,renuncias,Agent G,file,Tax waiver baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-siconfi,SICONFI municipal finance,fiscal,P1,partial,implemented,partial,annual,true,https://apidatalake.tesouro.gov.br/docs/siconfi/,siconfi,Agent C,api,No CNPJ direct links,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-tse_bens,TSE candidate assets,electoral,P1,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/api/3/action/package_search?q=bens,tse_bens,Agent E,file,Patrimony baseline,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-tse_filiados,TSE party memberships,electoral,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.tse.jus.br/api/3/action/package_search?q=filiacao,tse_filiados,Agent E,file,Party network,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-bcb,BCB penalties,finance,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.bcb.gov.br/,bcb,Agent G,file,Bank penalties loaded,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-stf,STF court data,judiciary,P1,loaded,implemented,loaded,monthly,true,https://basedosdados.org/dataset/br-stf-corte-aberta,stf,Agent D,bigquery,Supreme court coverage,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-caged,CAGED labor movements,labor,P1,stale,implemented,partial,monthly,true,https://ftp.mtps.gov.br/pdet/microdados/NOVO%20CAGED/,caged,Agent H,file,Aggregate-only implementation,,monitored,,,,stale,2026-03-01T23:11:31.444615+00:00,transient_error
-eu_sanctions,EU sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://data.europa.eu/data/datasets/consolidated-list-of-persons-groups-and-entities-subject-to-eu-financial-sanctions,eu_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-un_sanctions,UN sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://scsanctions.un.org/resources/xml/en/consolidated.xml,un_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,transient_error
-world_bank,World Bank debarment,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.worldbank.org/en/projects-operations/procurement/debarred-firms,world_bank,Agent G,file,International sanctions,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-senado_cpis,Senado CPIs,legislative,P0,partial,implemented,partial,yearly,true,https://www12.senado.leg.br/dados-abertos,senado_cpis,Agent E,api,Needs richer sessions and requirements,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-mides,MiDES municipal procurement,municipal,P0,loaded,implemented,loaded,daily,true,https://basedosdados.org/dataset/world-wb-mides,mides,Agent H,bigquery,Operational after access fix,,monitored,,,,healthy,2026-03-01T23:11:31.444615+00:00,ok
-querido_diario,Querido Diario gazettes,municipal,P1,partial,implemented,partial,daily,true,https://queridodiario.ok.org.br/api,querido_diario,Agent H,api,Text availability gap,,monitored,,,,partial,2026-03-01T23:11:31.444615+00:00,ok
-datajud,CNJ DataJud,judiciary,P0,blocked_external,implemented,not_loaded,monthly,true,https://api-publica.datajud.cnj.jus.br/,datajud,Agent D,api,Credentials not fully operational in prod,,monitored,,,,blocked_external,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-bolsa_familia_bpc,Bolsa Familia and BPC,social,P3,not_built,not_implemented,not_loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/bolsa-familia-pagamentos,,Agent H,file,High volume masked identities,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-estban,BCB ESTBAN balances,finance,P3,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/,,Agent G,file,Banking aggregates,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-if_data,BCB IF data indicators,finance,P3,not_built,not_implemented,not_loaded,quarterly,true,https://dadosabertos.bcb.gov.br/,,Agent G,file,Institution KPIs,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-bcb_liquidacao,BCB bank liquidation acts,finance,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/,,Agent G,file,Regulatory actions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-stj_dados_abertos,STJ open data,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.stj.jus.br/,,Agent D,api,Superior court decisions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-cnciai_improbidade,CNIAI improbidade,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://www.cnj.jus.br/sistemas/datajud/,,Agent D,api,Misconduct convictions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-carf_tax_appeals,CARF tax appeals,judiciary,P2,not_built,not_implemented,not_loaded,monthly,true,https://carf.economia.gov.br/dados-abertos,,Agent D,file,Tax litigation,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anp_royalties,ANP royalties and fuel,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anp,,Agent F,api,Oil and gas royalties,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-aneel_concessions,ANEEL concessions,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.aneel.gov.br/,,Agent F,api,Energy concessions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anm_mining_rights,ANM mining rights,regulatory,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anm,,Agent F,api,Mining rights and permits,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-antt_transport_concessions,ANTT concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antt,,Agent F,api,Transport concessions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-ans_health_plans,ANS operators,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ans,,Agent H,api,Health insurance operators,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anvisa_registrations,ANVISA products,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anvisa,,Agent H,api,Regulatory registrations,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anac_aviation_concessions,ANAC concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anac,,Agent F,api,Aviation contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-antaq_port_contracts,ANTAQ contracts,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antaq,,Agent F,api,Port concessions,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-ana_water_grants,ANA water grants,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ana,,Agent F,api,Water use rights,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-anatel_telecom_licenses,ANATEL licenses,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anatel,,Agent G,api,Telecom operators,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-susep_insurance_market,SUSEP insurance market,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/susep,,Agent G,file,Insurance entities,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-cvm_full_ownership_chain,CVM ownership chains,market,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.cvm.gov.br/,,Agent G,file,Shareholder graph expansion,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-receita_dirbi,Receita DIRBI,tax,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/declaracao-dirbi,,Agent G,file,Tax benefit declarations,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-mapbiomas_alertas,MapBiomas Alerta,environment,P1,not_built,not_implemented,not_loaded,monthly,true,https://alerta.mapbiomas.org/api,,Agent F,api,Deforestation alerts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-sicar_rural_registry,SiCAR rural registry,environment,P1,not_built,not_implemented,not_loaded,quarterly,true,https://www.car.gov.br/publico/municipios/downloads,,Agent F,file,Property boundaries and owners,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-icmbio_cnuc,ICMBio CNUC units,environment,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.gov.br/icmbio/pt-br,,Agent F,file,Protected areas,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-tesouro_emendas,Tesouro emendas,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www.tesourotransparente.gov.br/,,Agent C,file,Budget execution,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-siga_brasil,SIGA Brasil,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www12.senado.leg.br/orcamento/sigabrasil,,Agent C,file,Federal budget traces,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-camara_votes_bills,Camara votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://dadosabertos.camara.leg.br/api/v2,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-senado_votes_bills,Senado votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://legis.senado.leg.br/dadosabertos,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-interpol_red_notices,Interpol red notices,international,P2,not_built,not_implemented,not_loaded,weekly,true,https://www.interpol.int/How-we-work/Notices/Red-Notices,,Agent G,api,Requires key,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_sp,TCE Sao Paulo,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://transparencia.tce.sp.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pe,TCE Pernambuco,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://sistemas.tce.pe.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,auth_or_rate_limited
-tce_rj,TCE Rio de Janeiro,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.tce.rj.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_rs,TCE Rio Grande do Sul,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://tcers.tc.br/fiscalizado/,,Agent H,file,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_mg,TCE Minas Gerais,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mg.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_ba,TCE Bahia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ba.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ce,TCE Ceara,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ce.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_go,TCE Goias,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://portal.tce.go.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pr,TCE Parana,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www1.tce.pr.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_sc,TCE Santa Catarina,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcesc.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_es,TCE Espirito Santo,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcees.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_mt,TCE Mato Grosso,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mt.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ms,TCE Mato Grosso do Sul,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ms.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_am,TCE Amazonas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.am.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pa,TCE Para,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcepa.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ro,TCE Rondonia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ro.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_rr,TCE Roraima,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcerr.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ap,TCE Amapa,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ap.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-tce_to,TCE Tocantins,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceto.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_ma,TCE Maranhao,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcema.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pi,TCE Piaui,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.pi.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_rn,TCE Rio Grande do Norte,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.rn.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_pb,TCE Paraiba,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://tce.pb.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_al,TCE Alagoas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceal.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-tce_se,TCE Sergipe,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.se.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_sp,Sao Paulo transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sp.gov.br/,,Agent H,api,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_mg,Minas Gerais transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.mg.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_ba,Bahia transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ba.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_ce,Ceara transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ce.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
-state_portal_go,Goias transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.go.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_pr,Parana transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pr.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_sc,Santa Catarina transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sc.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_rs,Rio Grande do Sul transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rs.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_pe,Pernambuco transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pe.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,ok
-state_portal_rj,Rio de Janeiro transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rj.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built,2026-03-01T23:11:31.444615+00:00,transient_error
+source_id,name,category,tier,status,implementation_state,load_state,frequency,in_universe_v1,primary_url,pipeline_id,owner_agent,access_mode,notes,public_access_mode,discovery_status,last_seen_url,cadence_expected,cadence_observed,quality_status
+cnpj,Receita Federal CNPJ,identity,P0,loaded,implemented,loaded,monthly,true,https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/,cnpj,Agent A,file,http://dadosabertos.rfb.gov.br,,monitored,,,,healthy
+tse,TSE elections and donations,electoral,P0,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/,tse,Agent E,file,Core electoral data loaded,,monitored,,,,healthy
+transparencia,Portal da Transparencia contracts,contracts,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados,transparencia,Agent C,file,Federal contracts and servants,,monitored,,,,healthy
+sanctions,CEIS CNEP sanctions,sanctions,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/sancoes/consulta,sanctions,Agent C,file,Administrative sanctions,,monitored,,,,healthy
+pep_cgu,CGU PEP list,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/pep,pep_cgu,Agent A,file,PEP baseline,,monitored,,,,healthy
+bndes,BNDES financings,finance,P1,loaded,implemented,loaded,monthly,true,https://www.bndes.gov.br/wps/portal/site/home/transparencia/dados,bndes,Agent G,file,Loan relationships,,monitored,,,,healthy
+pgfn,PGFN divida ativa,fiscal,P0,loaded,implemented,loaded,monthly,true,https://www.regularize.pgfn.gov.br/dados-abertos,pgfn,Agent C,file,Debt risk core,,monitored,,,,healthy
+ibama,IBAMA embargos,environment,P1,loaded,implemented,loaded,monthly,true,https://servicos.ibama.gov.br/ctf/publico/areasembargadas/,ibama,Agent F,file,Environmental enforcement,,monitored,,,,healthy
+comprasnet,ComprasNet contracts,contracts,P0,stale,implemented,partial,monthly,true,https://dados.gov.br/dados/conjuntos-dados/comprasnet-contratos,comprasnet,Agent C,file,Needs freshness backfill,,monitored,,,,stale
+tcu,TCU sanctions,audit,P1,loaded,implemented,loaded,monthly,true,https://contas.tcu.gov.br/ords/f?p=INIDONEAS:INIDONEAS,tcu,Agent C,file,Inidoneidade sanctions,,monitored,,,,healthy
+transferegov,TransfereGov emendas e convenios,transfers,P0,loaded,implemented,loaded,monthly,true,https://www.transferegov.sistema.gov.br/portal/download-de-dados,transferegov,Agent C,file,Transfer relationships,,monitored,,,,healthy
+rais,RAIS aggregated labor,labor,P1,loaded,implemented,loaded,annual,true,https://basedosdados.org/dataset/br-me-rais,rais,Agent H,bigquery,Aggregate mode only,,monitored,,,,healthy
+inep,INEP school census,education,P2,loaded,implemented,loaded,annual,true,https://www.gov.br/inep/pt-br/acesso-a-informacao/dados-abertos/microdados/censo-escolar,inep,Agent H,file,Education coverage,,monitored,,,,healthy
+dou,Diario Oficial da Uniao,gazette,P0,loaded,implemented,loaded,daily,true,https://www.in.gov.br/leiturajornal,dou,Agent E,bigquery,National acts ingestion,,monitored,,,,healthy
+datasus,DATASUS CNES,health,P1,loaded,implemented,loaded,monthly,true,https://opendatasus.saude.gov.br/,datasus,Agent H,file,Health establishments,,monitored,,,,healthy
+icij,ICIJ offshore leaks,offshore,P1,loaded,implemented,loaded,yearly,true,https://offshoreleaks.icij.org/pages/database,icij,Agent G,file,Offshore entities and officers,,monitored,,,,healthy
+opensanctions,OpenSanctions global PEP,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.opensanctions.org/datasets/peps/,opensanctions,Agent G,file,Global PEP matching,,monitored,,,,healthy
+cvm,CVM proceedings,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/,cvm,Agent G,file,Proceedings loaded,,monitored,,,,healthy
+cvm_funds,CVM fund registry,market,P1,loaded,implemented,loaded,monthly,true,https://dados.cvm.gov.br/dados/FI/,cvm_funds,Agent G,file,Fund baseline,,monitored,,,,healthy
+camara,Camara CEAP expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.camara.leg.br/,camara,Agent E,api,Expense reimbursement,,monitored,,,,healthy
+camara_inquiries,Camara inquiries and requirements,legislative,P0,partial,implemented,partial,daily,true,https://dadosabertos.camara.leg.br/,camara_inquiries,Agent E,api,Sessions still low,,monitored,,,,partial
+senado,Senado CEAPS expenses,legislative,P1,loaded,implemented,loaded,monthly,true,https://www12.senado.leg.br/dados-abertos,senado,Agent E,api,Expense data loaded,,monitored,,,,healthy
+ceaf,CEAF expelled servants,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/ceaf,ceaf,Agent A,file,Expulsion evidence,,monitored,,,,healthy
+cepim,CEPIM barred NGOs,integrity,P1,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cepim,cepim,Agent A,file,NGO restrictions,,monitored,,,,healthy
+cpgf,CPGF gov card expenses,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/cpgf,cpgf,Agent H,file,Masked CPF source,,monitored,,,,healthy
+leniency,Acordos de leniencia,integrity,P0,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/acordos-leniencia,leniency,Agent A,file,High signal low volume,,monitored,,,,healthy
+ofac,OFAC sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://home.treasury.gov/policy-issues/financial-sanctions/sdn-list-data-files,ofac,Agent G,file,International sanctions,,monitored,,,,healthy
+holdings,Brasil IO holdings,ownership,P1,loaded,implemented,loaded,monthly,true,https://brasil.io/dataset/socios-brasil/holding/,holdings,Agent G,file,Ownership enrichment,,monitored,,,,healthy
+viagens,Viagens a servico,spending,P2,loaded,implemented,loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/viagens,viagens,Agent H,file,Travel spend baseline,,monitored,,,,healthy
+siop,SIOP emendas,budget,P0,partial,implemented,partial,annual,true,https://www.siop.planejamento.gov.br/,siop,Agent C,api,Author linkage limited,,monitored,,,,partial
+pncp,PNCP bids and contracts,contracts,P0,stale,implemented,partial,monthly,true,https://pncp.gov.br/api/consulta/v1/,pncp,Agent C,api,Freshness SLA pending,,monitored,,,,stale
+renuncias,Renuncias fiscais,fiscal,P1,loaded,implemented,loaded,annual,true,https://www.gov.br/receitafederal/pt-br/acesso-a-informacao/dados-abertos,renuncias,Agent G,file,Tax waiver baseline,,monitored,,,,healthy
+siconfi,SICONFI municipal finance,fiscal,P1,partial,implemented,partial,annual,true,https://apidatalake.tesouro.gov.br/docs/siconfi/,siconfi,Agent C,api,No CNPJ direct links,,monitored,,,,partial
+tse_bens,TSE candidate assets,electoral,P1,loaded,implemented,loaded,biennial,true,https://dadosabertos.tse.jus.br/dataset/bens-candidato,tse_bens,Agent E,file,Patrimony baseline,,monitored,,,,healthy
+tse_filiados,TSE party memberships,electoral,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.tse.jus.br/dataset/filiados-partidos,tse_filiados,Agent E,file,Party network,,monitored,,,,healthy
+bcb,BCB penalties,finance,P1,loaded,implemented,loaded,monthly,true,https://dadosabertos.bcb.gov.br/dataset/penalidades,bcb,Agent G,file,Bank penalties loaded,,monitored,,,,healthy
+stf,STF court data,judiciary,P1,loaded,implemented,loaded,monthly,true,https://basedosdados.org/dataset/br-stf-corte-aberta,stf,Agent D,bigquery,Supreme court coverage,,monitored,,,,healthy
+caged,CAGED labor movements,labor,P1,stale,implemented,partial,monthly,true,https://ftp.mtps.gov.br/pdet/microdados/NOVO%20CAGED/,caged,Agent H,file,Aggregate-only implementation,,monitored,,,,stale
+eu_sanctions,EU sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://data.europa.eu/data/datasets/consolidated-list-of-persons-groups-and-entities-subject-to-eu-financial-sanctions,eu_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy
+un_sanctions,UN sanctions,sanctions,P1,loaded,implemented,loaded,monthly,true,https://scsanctions.un.org/resources/xml/en/consolidated.xml,un_sanctions,Agent G,file,International sanctions,,monitored,,,,healthy
+world_bank,World Bank debarment,sanctions,P1,loaded,implemented,loaded,monthly,true,https://www.worldbank.org/en/projects-operations/procurement/debarred-firms,world_bank,Agent G,file,International sanctions,,monitored,,,,healthy
+senado_cpis,Senado CPIs,legislative,P0,partial,implemented,partial,yearly,true,https://www12.senado.leg.br/dados-abertos,senado_cpis,Agent E,api,Needs richer sessions and requirements,,monitored,,,,partial
+mides,MiDES municipal procurement,municipal,P0,loaded,implemented,loaded,daily,true,https://basedosdados.org/dataset/world-wb-mides,mides,Agent H,bigquery,Operational after access fix,,monitored,,,,healthy
+querido_diario,Querido Diario gazettes,municipal,P1,partial,implemented,partial,daily,true,https://queridodiario.ok.org.br/api,querido_diario,Agent H,api,Text availability gap,,monitored,,,,partial
+datajud,CNJ DataJud,judiciary,P0,blocked_external,implemented,not_loaded,monthly,true,https://api-publica.datajud.cnj.jus.br/,datajud,Agent D,api,Credentials not fully operational in prod,,monitored,,,,blocked_external
+bolsa_familia_bpc,Bolsa Familia and BPC,social,P3,not_built,not_implemented,not_loaded,monthly,true,https://portaldatransparencia.gov.br/download-de-dados/bolsa-familia-pagamentos,,Agent H,file,High volume masked identities,,discovered_uningested,,,,not_built
+estban,BCB ESTBAN balances,finance,P3,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/dataset/estban,,Agent G,file,Banking aggregates,,discovered_uningested,,,,not_built
+if_data,BCB IF data indicators,finance,P3,not_built,not_implemented,not_loaded,quarterly,true,https://dadosabertos.bcb.gov.br/dataset/if-data,,Agent G,file,Institution KPIs,,discovered_uningested,,,,not_built
+bcb_liquidacao,BCB bank liquidation acts,finance,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.bcb.gov.br/dataset/intervencao-e-liquidacao,,Agent G,file,Regulatory actions,,discovered_uningested,,,,not_built
+stj_dados_abertos,STJ open data,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.stj.jus.br/,,Agent D,api,Superior court decisions,,discovered_uningested,,,,not_built
+cnciai_improbidade,CNIAI improbidade,judiciary,P1,not_built,not_implemented,not_loaded,monthly,true,https://www.cnj.jus.br/sistemas/cnciai/,,Agent D,api,Misconduct convictions,,discovered_uningested,,,,not_built
+carf_tax_appeals,CARF tax appeals,judiciary,P2,not_built,not_implemented,not_loaded,monthly,true,https://carf.economia.gov.br/dados-abertos,,Agent D,file,Tax litigation,,discovered_uningested,,,,not_built
+anp_royalties,ANP royalties and fuel,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anp,,Agent F,api,Oil and gas royalties,,discovered_uningested,,,,not_built
+aneel_concessions,ANEEL concessions,regulatory,P2,not_built,not_implemented,not_loaded,monthly,true,https://dadosabertos.aneel.gov.br/,,Agent F,api,Energy concessions,,discovered_uningested,,,,not_built
+anm_mining_rights,ANM mining rights,regulatory,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anm,,Agent F,api,Mining rights and permits,,discovered_uningested,,,,not_built
+antt_transport_concessions,ANTT concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antt,,Agent F,api,Transport concessions,,discovered_uningested,,,,not_built
+ans_health_plans,ANS operators,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ans,,Agent H,api,Health insurance operators,,discovered_uningested,,,,not_built
+anvisa_registrations,ANVISA products,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anvisa,,Agent H,api,Regulatory registrations,,discovered_uningested,,,,not_built
+anac_aviation_concessions,ANAC concessions,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anac,,Agent F,api,Aviation contracts,,discovered_uningested,,,,not_built
+antaq_port_contracts,ANTAQ contracts,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/antaq,,Agent F,api,Port concessions,,discovered_uningested,,,,not_built
+ana_water_grants,ANA water grants,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/ana,,Agent F,api,Water use rights,,discovered_uningested,,,,not_built
+anatel_telecom_licenses,ANATEL licenses,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/anatel,,Agent G,api,Telecom operators,,discovered_uningested,,,,not_built
+susep_insurance_market,SUSEP insurance market,regulatory,P3,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/susep,,Agent G,file,Insurance entities,,discovered_uningested,,,,not_built
+cvm_full_ownership_chain,CVM ownership chains,market,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.cvm.gov.br/,,Agent G,file,Shareholder graph expansion,,discovered_uningested,,,,not_built
+receita_dirbi,Receita DIRBI,tax,P1,not_built,not_implemented,not_loaded,monthly,true,https://dados.gov.br/dados/conjuntos-dados/declaracao-dirbi,,Agent G,file,Tax benefit declarations,,discovered_uningested,,,,not_built
+mapbiomas_alertas,MapBiomas Alerta,environment,P1,not_built,not_implemented,not_loaded,monthly,true,https://alerta.mapbiomas.org/api,,Agent F,api,Deforestation alerts,,discovered_uningested,,,,not_built
+sicar_rural_registry,SiCAR rural registry,environment,P1,not_built,not_implemented,not_loaded,quarterly,true,https://www.car.gov.br/publico/municipios/downloads,,Agent F,file,Property boundaries and owners,,discovered_uningested,,,,not_built
+icmbio_cnuc,ICMBio CNUC units,environment,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.icmbio.gov.br/portal/faunabrasileira/cadastro-nacional-de-unidades-de-conservacao,,Agent F,file,Protected areas,,discovered_uningested,,,,not_built
+tesouro_emendas,Tesouro emendas,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www.tesourotransparente.gov.br/,,Agent C,file,Budget execution,,discovered_uningested,,,,not_built
+siga_brasil,SIGA Brasil,budget,P0,not_built,not_implemented,not_loaded,monthly,true,https://www12.senado.leg.br/orcamento/sigabrasil,,Agent C,file,Federal budget traces,,discovered_uningested,,,,not_built
+camara_votes_bills,Camara votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://dadosabertos.camara.leg.br/api/v2,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built
+senado_votes_bills,Senado votes and bills,legislative,P1,not_built,not_implemented,not_loaded,daily,true,https://legis.senado.leg.br/dadosabertos,,Agent E,api,Legislative behavior,,discovered_uningested,,,,not_built
+interpol_red_notices,Interpol red notices,international,P2,not_built,not_implemented,not_loaded,weekly,true,https://www.interpol.int/How-we-work/Notices/Red-Notices,,Agent G,api,Requires key,,discovered_uningested,,,,not_built
+tce_sp,TCE Sao Paulo,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://transparencia.tce.sp.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built
+tce_pe,TCE Pernambuco,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://sistemas.tce.pe.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built
+tce_rj,TCE Rio de Janeiro,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://dados.tce.rj.gov.br/,,Agent H,api,State audit procurement,,discovered_uningested,,,,not_built
+tce_rs,TCE Rio Grande do Sul,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://portal.tce.rs.gov.br/,,Agent H,file,State audit procurement,,discovered_uningested,,,,not_built
+tce_mg,TCE Minas Gerais,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mg.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ba,TCE Bahia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ba.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ce,TCE Ceara,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ce.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_go,TCE Goias,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://portal.tce.go.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pr,TCE Parana,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www1.tce.pr.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_sc,TCE Santa Catarina,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcesc.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_es,TCE Espirito Santo,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcees.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_mt,TCE Mato Grosso,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.mt.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ms,TCE Mato Grosso do Sul,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ms.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_am,TCE Amazonas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.am.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pa,TCE Para,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcepa.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ro,TCE Rondonia,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ro.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_rr,TCE Roraima,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcerr.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ap,TCE Amapa,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.ap.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_to,TCE Tocantins,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceto.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_ma,TCE Maranhao,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tcema.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pi,TCE Piaui,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.pi.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_rn,TCE Rio Grande do Norte,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.rn.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_pb,TCE Paraiba,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://tce.pb.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_al,TCE Alagoas,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tceal.tc.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+tce_se,TCE Sergipe,state,P3,not_built,not_implemented,not_loaded,monthly,true,https://www.tce.se.gov.br/,,Agent H,web,State audit procurement,,discovered_uningested,,,,not_built
+state_portal_sp,Sao Paulo transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sp.gov.br/,,Agent H,api,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_mg,Minas Gerais transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.mg.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_ba,Bahia transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ba.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_ce,Ceara transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.ce.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_go,Goias transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.go.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_pr,Parana transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pr.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_sc,Santa Catarina transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.sc.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_rs,Rio Grande do Sul transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rs.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_pe,Pernambuco transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.pe.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
+state_portal_rj,Rio de Janeiro transparency portal,state,P2,not_built,not_implemented,not_loaded,monthly,true,https://www.transparencia.rj.gov.br/,,Agent H,web,State expenses and contracts,,discovered_uningested,,,,not_built
--- a/etl/pyproject.toml
+++ b/etl/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "bracc-etl"
 version = "0.1.0"
-description = "BRACC ETL — Data ingestion pipelines for Brazilian public data"
+description = "BR-ACC ETL — Data ingestion pipelines for Brazilian public data"
 requires-python = ">=3.12"
 license = "AGPL-3.0-or-later"
 dependencies = [
@@ -9,10 +9,11 @@ dependencies = [
    "pandas>=2.2.0",
    "httpx>=0.28.0",
    "click>=8.1.0",
-    "defusedxml>=0.7.1",
    "pydantic>=2.10.0",
    "pydantic-settings>=2.7.0",
    "pypdf>=5.2.0",
+    "defusedxml>=0.7.0",
+    "pandera>=0.21.0",
 ]

 [project.optional-dependencies]
--- a/etl/scripts/_download_utils.py
+++ b/etl/scripts/_download_utils.py
@@ -3,8 +3,6 @@
 from __future__ import annotations

 import logging
-import shutil
-import stat
 import zipfile
 from pathlib import Path

@@ -38,12 +36,21 @@ def download_file(url: str, dest: Path, *, timeout: int = 600) -> bool:

            response.raise_for_status()

+            # If we requested a range but server returned full content (200 vs 206),
+            # start fresh to avoid corruption
+            if start_byte > 0 and response.status_code != 206:
+                logger.warning(
+                    "Server ignored Range header for %s, restarting download",
+                    dest.name,
+                )
+                start_byte = 0
+
            total = response.headers.get("content-length")
            total_mb = f"{int(total) / 1e6:.1f} MB" if total else "unknown size"
            logger.info("Downloading %s (%s)...", dest.name, total_mb)

-            mode = "ab" if start_byte > 0 else "wb"
-            downloaded = start_byte
+            mode = "ab" if start_byte > 0 and response.status_code == 206 else "wb"
+            downloaded = start_byte if mode == "ab" else 0
            with open(partial, mode) as f:
                for chunk in response.iter_bytes(chunk_size=65_536):
                    f.write(chunk)
@@ -58,24 +65,49 @@ def download_file(url: str, dest: Path, *, timeout: int = 600) -> bool:
        return False


-def extract_zip(zip_path: Path, output_dir: Path) -> list[Path]:
-    """Extract ZIP and return list of extracted files.
+def safe_extract_zip(
+    zip_path: Path,
+    output_dir: Path,
+    *,
+    max_total_bytes: int = 50 * 1024**3,  # 50GB default (CNPJ zips are huge)
+) -> list[Path]:
+    """Safely extract ZIP with path traversal and bomb guards.

    Deletes corrupted ZIPs for re-download.
    """
    try:
        with zipfile.ZipFile(zip_path, "r") as zf:
-            extracted = safe_extract_zip(zf, output_dir)
-        logger.info("Extracted %d files from %s", len(extracted), zip_path.name)
-        return extracted
+            # Check for path traversal
+            resolved_output = output_dir.resolve()
+            for info in zf.infolist():
+                target = (output_dir / info.filename).resolve()
+                if not target.is_relative_to(resolved_output):
+                    raise ValueError(
+                        f"Path traversal detected in {zip_path.name}: {info.filename}"
+                    )
+
+            # Check total uncompressed size (zip bomb guard)
+            total_size = sum(info.file_size for info in zf.infolist())
+            if total_size > max_total_bytes:
+                raise ValueError(
+                    f"ZIP bomb guard: {zip_path.name} would extract to "
+                    f"{total_size / 1e9:.1f}GB (limit: {max_total_bytes / 1e9:.1f}GB)"
+                )
+
+            names = zf.namelist()
+            zf.extractall(output_dir)
+
+        logger.info("Extracted %d files from %s", len(names), zip_path.name)
+        return [output_dir / n for n in names]
    except zipfile.BadZipFile:
        logger.warning("Bad ZIP file: %s — deleting for re-download", zip_path.name)
        zip_path.unlink()
        return []
-    except ValueError as exc:
-        logger.warning("Unsafe ZIP file %s: %s — deleting", zip_path.name, exc)
-        zip_path.unlink(missing_ok=True)
-        return []
+
+
+def extract_zip(zip_path: Path, output_dir: Path) -> list[Path]:
+    """Extract ZIP and return list of extracted files."""
+    return safe_extract_zip(zip_path, output_dir)


 def validate_csv(
@@ -111,60 +143,3 @@ def validate_csv(
    except Exception as e:
        logger.warning("Validation failed for %s: %s", path.name, e)
        return False
-
-
-def safe_extract_zip(
-    archive: zipfile.ZipFile,
-    output_dir: Path,
-    *,
-    max_members: int = 50_000,
-    max_uncompressed_bytes: int = 5_000_000_000,
-) -> list[Path]:
-    """Safely extract a ZIP archive.
-
-    Blocks path traversal, symlinks, and oversized archives.
-    """
-    output_root = output_dir.resolve()
-    infos = archive.infolist()
-    if len(infos) > max_members:
-        msg = f"ZIP has too many entries ({len(infos)} > {max_members})"
-        raise ValueError(msg)
-
-    extracted: list[Path] = []
-    uncompressed_total = 0
-    for info in infos:
-        member_name = info.filename.replace("\\", "/")
-        if not member_name:
-            continue
-
-        # Reject symlink entries.
-        mode = info.external_attr >> 16
-        if stat.S_ISLNK(mode):
-            msg = f"ZIP contains symlink entry: {member_name}"
-            raise ValueError(msg)
-
-        target = (output_dir / member_name).resolve()
-        try:
-            target.relative_to(output_root)
-        except ValueError as exc:
-            msg = f"Path traversal detected: {member_name}"
-            raise ValueError(msg) from exc
-
-        if info.is_dir():
-            target.mkdir(parents=True, exist_ok=True)
-            continue
-
-        uncompressed_total += info.file_size
-        if uncompressed_total > max_uncompressed_bytes:
-            msg = (
-                f"ZIP exceeds max extracted size "
-                f"({uncompressed_total} > {max_uncompressed_bytes})"
-            )
-            raise ValueError(msg)
-
-        target.parent.mkdir(parents=True, exist_ok=True)
-        with archive.open(info, "r") as source, target.open("wb") as destination:
-            shutil.copyfileobj(source, destination)
-        extracted.append(target)
-
-    return extracted
--- a/etl/scripts/download_caged.py
+++ b/etl/scripts/download_caged.py
@@ -5,9 +5,9 @@ Streams microdados_movimentacao year-by-year to separate CSVs for
 resumability and memory management on large datasets.

 Usage:
-    python etl/scripts/download_caged.py --billing-project bracc-corruptos
-    python etl/scripts/download_caged.py --billing-project bracc-corruptos --start-year 2024
-    python etl/scripts/download_caged.py --billing-project bracc-corruptos --skip-existing
+    python etl/scripts/download_caged.py --billing-project icarus-corruptos
+    python etl/scripts/download_caged.py --billing-project icarus-corruptos --start-year 2024
+    python etl/scripts/download_caged.py --billing-project icarus-corruptos --skip-existing
 """

 from __future__ import annotations
--- a/etl/scripts/download_camara_inquiries.py
+++ b/etl/scripts/download_camara_inquiries.py
@@ -413,7 +413,7 @@ def _write_manifest(
 )
@click.option(
    "--billing-project",
-    default="bracc-corruptos",
+    default="icarus-corruptos",
    help="GCP billing project for BQ mode.",
 )
@click.option(
--- a/etl/scripts/download_cnpj.py
+++ b/etl/scripts/download_cnpj.py
@@ -6,15 +6,21 @@ Usage:
    python etl/scripts/download_cnpj.py --reference-only   # reference tables only (tiny)
    python etl/scripts/download_cnpj.py --files 1          # just first file of each type
    python etl/scripts/download_cnpj.py --types Empresas   # specific type only
+    python etl/scripts/download_cnpj.py --release 2026-03  # pin to specific monthly release
 """

 from __future__ import annotations

+import hashlib
+import json
 import logging
+import os
 import sys
+from datetime import datetime, timezone
 from pathlib import Path

 import click
+import httpx

 sys.path.insert(0, str(Path(__file__).parent))
 from _download_utils import download_file, extract_zip, validate_csv
@@ -22,7 +28,13 @@ from _download_utils import download_file, extract_zip, validate_csv
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 logger = logging.getLogger(__name__)

-BASE_URL = "https://dadosabertos.rfb.gov.br/CNPJ/"
+# Receita Federal Nextcloud (primary since Jan 2026)
+NEXTCLOUD_BASE = "https://arquivos.receitafederal.gov.br/s/{token}/download?path=%2F&files="
+KNOWN_TOKENS = ["gn672Ad4CF8N6TK", "YggdBLfdninEJX9"]
+
+# Legacy URLs (dadosabertos.rfb.gov.br decommissioned Jan 2026)
+LEGACY_NEW_BASE_PATTERN = "https://dadosabertos.rfb.gov.br/CNPJ/dados_abertos_cnpj/{year_month}/"
+LEGACY_BASE_URL = "https://dadosabertos.rfb.gov.br/CNPJ/"

 MAIN_TYPES = ["Empresas", "Socios", "Estabelecimentos"]
 REFERENCE_FILES = [
@@ -48,6 +60,126 @@ EXPECTED_COLS = {
 }


+def _previous_month(year: int, month: int) -> tuple[int, int]:
+    """Return (year, month) for the previous month."""
+    if month == 1:
+        return year - 1, 12
+    return year, month - 1
+
+
+def _check_url_accessible(url: str, timeout: int = 30) -> bool:
+    """Send HTTP HEAD to verify a URL is accessible (2xx)."""
+    try:
+        resp = httpx.head(url, follow_redirects=True, timeout=timeout)
+        return resp.status_code < 400
+    except httpx.HTTPError:
+        return False
+
+
+def _check_nextcloud_token(token: str, timeout: int = 30) -> bool:
+    """Verify a Nextcloud share token is valid via HEAD request."""
+    share_url = f"https://arquivos.receitafederal.gov.br/s/{token}"
+    try:
+        resp = httpx.head(share_url, follow_redirects=True, timeout=timeout)
+        return resp.status_code < 400
+    except httpx.HTTPError:
+        return False
+
+
+def resolve_rf_release(year_month: str | None = None) -> str:
+    """Resolve the Receita Federal CNPJ release URL.
+
+    Strategy:
+    1. Try Nextcloud share (primary since Jan 2026):
+       a. Check CNPJ_SHARE_TOKEN env var first.
+       b. Then try each known token.
+    2. Fall back to legacy dadosabertos.rfb.gov.br paths.
+    3. Raise RuntimeError if nothing works (fail-closed).
+
+    Returns the resolved base URL. For Nextcloud, files are fetched via
+    ``{base_url}{filename}``.
+    """
+    now = datetime.now(timezone.utc)
+
+    # --- Nextcloud (primary) ---
+    tokens_to_try: list[str] = []
+
+    env_token = os.environ.get("CNPJ_SHARE_TOKEN")
+    if env_token:
+        tokens_to_try.append(env_token)
+
+    for t in KNOWN_TOKENS:
+        if t not in tokens_to_try:
+            tokens_to_try.append(t)
+
+    for token in tokens_to_try:
+        logger.info("Probing Nextcloud token: %s...", token[:6])
+        if _check_nextcloud_token(token):
+            base_url = NEXTCLOUD_BASE.format(token=token)
+            logger.info("Resolved CNPJ via Nextcloud (token %s...)", token[:6])
+            return base_url
+
+    # --- Legacy dadosabertos.rfb.gov.br ---
+    if year_month is not None:
+        candidates = [year_month]
+    else:
+        current = f"{now.year:04d}-{now.month:02d}"
+        prev_y, prev_m = _previous_month(now.year, now.month)
+        previous = f"{prev_y:04d}-{prev_m:02d}"
+        candidates = [current, previous]
+
+    for ym in candidates:
+        url = LEGACY_NEW_BASE_PATTERN.format(year_month=ym)
+        logger.info("Probing legacy release URL: %s", url)
+        if _check_url_accessible(url):
+            logger.info("Resolved CNPJ release (legacy new path): %s", url)
+            return url
+
+    logger.info("Trying legacy flat URL: %s", LEGACY_BASE_URL)
+    if _check_url_accessible(LEGACY_BASE_URL):
+        logger.info("Resolved CNPJ release (legacy flat): %s", LEGACY_BASE_URL)
+        return LEGACY_BASE_URL
+
+    tried = ", ".join(candidates)
+    raise RuntimeError(
+        f"Could not resolve CNPJ release. Tried Nextcloud tokens, "
+        f"legacy months [{tried}], and legacy flat path. "
+        "Receita Federal portal may be down or the URL structure has changed."
+    )
+
+
+def _write_manifest(
+    output_dir: Path,
+    base_url: str,
+    resolved_release: str,
+    file_results: list[dict],
+    started_at: str,
+) -> Path:
+    """Write download manifest JSON after download completes."""
+    finished_at = datetime.now(timezone.utc).isoformat()
+
+    # Compute an aggregate checksum over all successful file names + sizes
+    hasher = hashlib.sha256()
+    for fr in sorted(file_results, key=lambda x: x["name"]):
+        hasher.update(f"{fr['name']}:{fr['size_bytes']}:{fr['status']}".encode())
+    checksum = f"sha256:{hasher.hexdigest()}"
+
+    manifest = {
+        "source": "receita_federal_cnpj",
+        "resolved_release": resolved_release,
+        "base_url": base_url,
+        "files": file_results,
+        "started_at": started_at,
+        "finished_at": finished_at,
+        "checksum": checksum,
+    }
+
+    manifest_path = output_dir / "download_manifest.json"
+    manifest_path.write_text(json.dumps(manifest, indent=2, ensure_ascii=False), encoding="utf-8")
+    logger.info("Manifest written: %s", manifest_path)
+    return manifest_path
+
+
@click.command()
@click.option("--output-dir", default="./data/cnpj", help="Base output directory")
@click.option("--files", type=int, default=10, help="Number of files per type (0-9)")
@@ -56,6 +188,7 @@ EXPECTED_COLS = {
@click.option("--skip-existing/--no-skip-existing", default=True, help="Skip already downloaded files")
@click.option("--skip-extract", is_flag=True, help="Skip extraction after download")
@click.option("--timeout", type=int, default=600, help="Download timeout in seconds")
+@click.option("--release", default=None, help="Pin to specific monthly release (YYYY-MM format)")
 def main(
    output_dir: str,
    files: int,
@@ -64,8 +197,20 @@ def main(
    skip_existing: bool,
    skip_extract: bool,
    timeout: int,
+    release: str | None,
 ) -> None:
    """Download and extract CNPJ data from Receita Federal."""
+    started_at = datetime.now(timezone.utc).isoformat()
+
+    base_url = resolve_rf_release(release)
+    # Extract the release identifier from the resolved URL
+    resolved_release = release or "legacy"
+    if "arquivos.receitafederal.gov.br" in base_url:
+        resolved_release = "nextcloud"
+    elif "/dados_abertos_cnpj/" in base_url:
+        # Extract YYYY-MM from URL
+        resolved_release = base_url.rstrip("/").rsplit("/", 1)[-1]
+
    base = Path(output_dir)
    raw_dir = base / "raw"
    extract_dir = base / "extracted"
@@ -73,14 +218,26 @@ def main(
    for d in [raw_dir, extract_dir, ref_dir]:
        d.mkdir(parents=True, exist_ok=True)

+    file_results: list[dict] = []
+
    # --- Reference tables (always download, they're tiny) ---
    logger.info("=== Reference tables ===")
    for filename in REFERENCE_FILES:
        dest = raw_dir / filename
        if skip_existing and dest.exists():
            logger.info("Skipping (exists): %s", filename)
+            file_results.append({
+                "name": filename,
+                "status": "skipped",
+                "size_bytes": dest.stat().st_size,
+            })
        else:
-            download_file(f"{BASE_URL}{filename}", dest, timeout=timeout)
+            success = download_file(f"{base_url}{filename}", dest, timeout=timeout)
+            file_results.append({
+                "name": filename,
+                "status": "ok" if success else "failed",
+                "size_bytes": dest.stat().st_size if dest.exists() else 0,
+            })

        if not skip_extract and dest.exists():
            extracted = extract_zip(dest, ref_dir)
@@ -90,7 +247,8 @@ def main(
                validate_csv(f, expected_cols=expected)

    if reference_only:
-        logger.info("Reference-only mode — done.")
+        logger.info("Reference-only mode -- done.")
+        _write_manifest(base, base_url, resolved_release, file_results, started_at)
        return

    # --- Main data files ---
@@ -102,10 +260,25 @@ def main(
            dest = raw_dir / filename
            if skip_existing and dest.exists():
                logger.info("Skipping (exists): %s", filename)
+                file_results.append({
+                    "name": filename,
+                    "status": "skipped",
+                    "size_bytes": dest.stat().st_size,
+                })
            else:
-                success = download_file(f"{BASE_URL}{filename}", dest, timeout=timeout)
+                success = download_file(f"{base_url}{filename}", dest, timeout=timeout)
                if not success:
+                    file_results.append({
+                        "name": filename,
+                        "status": "failed",
+                        "size_bytes": 0,
+                    })
                    continue
+                file_results.append({
+                    "name": filename,
+                    "status": "ok",
+                    "size_bytes": dest.stat().st_size if dest.exists() else 0,
+                })

            if not skip_extract and dest.exists():
                extracted = extract_zip(dest, extract_dir)
@@ -120,6 +293,7 @@ def main(

    logger.info("=== Download complete ===")
    _print_summary(raw_dir, extract_dir, ref_dir)
+    _write_manifest(base, base_url, resolved_release, file_results, started_at)


 def _print_summary(raw_dir: Path, extract_dir: Path, ref_dir: Path) -> None:
--- a/etl/scripts/download_cnpj_bq.py
+++ b/etl/scripts/download_cnpj_bq.py
@@ -10,8 +10,8 @@ And a manifest:
  - download_manifest.json

 Usage:
-  python etl/scripts/download_cnpj_bq.py --billing-project bracc-corruptos
-  python etl/scripts/download_cnpj_bq.py --billing-project bracc-corruptos --tables socios
+  python etl/scripts/download_cnpj_bq.py --billing-project icarus-corruptos
+  python etl/scripts/download_cnpj_bq.py --billing-project icarus-corruptos --tables socios
 """

 from __future__ import annotations
@@ -105,6 +105,44 @@ TABLES: dict[str, list[str]] = {
 PAGE_SIZE = 100_000


+def _run_bigquery_precheck(
+    *,
+    billing_project: str,
+    source_project: str,
+    source_dataset: str,
+    snapshot_start: str | None,
+) -> None:
+    """Run explicit auth/ACL prechecks before starting large table downloads."""
+    from google.cloud import bigquery
+
+    client = bigquery.Client(project=billing_project)
+    logger.info("Running BigQuery precheck: SELECT 1")
+    list(client.query("SELECT 1 AS ok").result())
+
+    socios_table = f"{source_project}.{source_dataset}.socios"
+    if snapshot_start:
+        precheck_sql = (
+            f"SELECT COUNT(1) AS n FROM `{socios_table}` "
+            "WHERE data >= @snapshot_start"
+        )
+        query_params = [
+            bigquery.ScalarQueryParameter("snapshot_start", "DATE", snapshot_start),
+        ]
+    else:
+        precheck_sql = f"SELECT COUNT(1) AS n FROM `{socios_table}`"
+        query_params = []
+
+    logger.info("Running BigQuery precheck: %s", precheck_sql)
+    rows = list(
+        client.query(
+            precheck_sql,
+            job_config=bigquery.QueryJobConfig(query_parameters=query_params),
+        ).result(),
+    )
+    check_value = rows[0].n if rows else 0
+    logger.info("BigQuery precheck OK: socios_count=%s", check_value)
+
+
 def _sha256_file(path: Path) -> str:
    digest = hashlib.sha256()
    with path.open("rb") as f:
@@ -292,6 +330,19 @@ def main(
        )
    source_project, source_dataset = dataset.split(".", 1)

+    try:
+        _run_bigquery_precheck(
+            billing_project=billing_project,
+            source_project=source_project,
+            source_dataset=source_dataset,
+            snapshot_start=snapshot_start,
+        )
+    except Exception as exc:
+        raise click.ClickException(
+            "BigQuery precheck failed. Configure a non-interactive service account "
+            "(GOOGLE_APPLICATION_CREDENTIALS) with dataset ACL and billing access.",
+        ) from exc
+
    selected = list(tables) if tables else list(TABLES.keys())
    run_id = f"cnpj-bq-{datetime.now(UTC).strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:8]}"
    logger.info(
--- a/etl/scripts/download_dou.py
+++ b/etl/scripts/download_dou.py
@@ -22,7 +22,6 @@ from pathlib import Path

 import click
 import httpx
-from _download_utils import safe_extract_zip

 logging.basicConfig(
    level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s"
@@ -90,15 +89,24 @@ def _download_zip(
    xml_count = 0

    try:
+        resolved_dir = section_dir.resolve()
        with zipfile.ZipFile(BytesIO(resp.content)) as zf:
-            extracted = safe_extract_zip(zf, section_dir)
-            xml_count = sum(1 for path in extracted if path.suffix.lower() == ".xml")
+            for member in zf.namelist():
+                # Path traversal guard
+                target = (section_dir / member).resolve()
+                if not target.is_relative_to(resolved_dir):
+                    logger.warning(
+                        "Path traversal detected in %s: %s — skipping",
+                        zip_name,
+                        member,
+                    )
+                    continue
+                if member.lower().endswith(".xml"):
+                    zf.extract(member, section_dir)
+                    xml_count += 1
    except zipfile.BadZipFile:
        logger.warning("Bad ZIP file: %s", zip_name)
        return 0
-    except ValueError as exc:
-        logger.warning("Unsafe ZIP file %s: %s", zip_name, exc)
-        return 0

    if xml_count > 0:
        marker.write_text(str(xml_count))
--- a/etl/scripts/download_mides.py
+++ b/etl/scripts/download_mides.py
@@ -71,7 +71,7 @@ def _write_manifest(out_dir: Path, tables: list[dict[str, Any]]) -> Path:


@click.command()
-@click.option("--billing-project", default="bracc-corruptos", help="GCP billing project")
+@click.option("--billing-project", default="icarus-corruptos", help="GCP billing project")
@click.option(
    "--dataset",
    default=WORLD_WB_DATASET,
--- a/etl/scripts/download_pncp.py
+++ b/etl/scripts/download_pncp.py
@@ -439,7 +439,7 @@ def main(
    client = httpx.Client(
        timeout=timeout,
        follow_redirects=True,
-        headers={"User-Agent": "BRACC-ETL/1.0 (public data research)"},
+        headers={"User-Agent": "BR-ACC-ETL/1.0 (public data research)"},
    )

    total_records = 0
--- a/etl/scripts/download_renuncias.py
+++ b/etl/scripts/download_renuncias.py
@@ -8,10 +8,12 @@ from __future__ import annotations

 import argparse
 import logging
-import zipfile
+import sys
 from pathlib import Path

 import httpx
+
+sys.path.insert(0, str(Path(__file__).parent))
 from _download_utils import safe_extract_zip

 logger = logging.getLogger(__name__)
@@ -34,14 +36,13 @@ def download_year(output_dir: Path, year: int) -> None:
            url,
            follow_redirects=True,
            timeout=300,
-            headers={"User-Agent": "BRACC-ETL/1.0"},
+            headers={"User-Agent": "BR-ACC-ETL/1.0"},
        )
        response.raise_for_status()
        dest_zip.write_bytes(response.content)
        logger.info("Downloaded: %s (%d bytes)", dest_zip.name, len(response.content))

-        with zipfile.ZipFile(dest_zip, "r") as zf:
-            extracted = safe_extract_zip(zf, output_dir)
+        extracted = safe_extract_zip(dest_zip, output_dir)
        logger.info("Extracted %d files", len(extracted))
    except httpx.HTTPError:
        logger.warning("Failed to download renuncias for %d", year)
--- a/etl/scripts/download_senado_cpis.py
+++ b/etl/scripts/download_senado_cpis.py
@@ -16,13 +16,13 @@ import hashlib
 import json
 import logging
 import re
+import defusedxml.ElementTree as ET
 from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any

 import click
 import httpx
-from defusedxml import ElementTree as ET
 from download_senado_cpi_archive import fetch_archive_historical

 logger = logging.getLogger(__name__)
--- a/etl/scripts/download_siconfi.py
+++ b/etl/scripts/download_siconfi.py
@@ -44,7 +44,7 @@ def get_all_entities() -> list[dict]:
            url,
            params={"offset": offset, "limit": limit},
            timeout=60,
-            headers={"User-Agent": "BRACC-ETL/1.0"},
+            headers={"User-Agent": "BR-ACC-ETL/1.0"},
        )
        response.raise_for_status()
        data = response.json()
@@ -125,7 +125,7 @@ def download_year(
    header_written = partial.exists() and partial.stat().st_size > 0

    with (
-        httpx.Client(headers={"User-Agent": "BRACC-ETL/1.0"}) as client,
+        httpx.Client(headers={"User-Agent": "BR-ACC-ETL/1.0"}) as client,
        open(partial, "a", newline="", encoding="utf-8") as f,
    ):
        writer: csv.DictWriter | None = None
--- a/etl/scripts/download_stf.py
+++ b/etl/scripts/download_stf.py
@@ -5,9 +5,9 @@ Streams from BigQuery table basedosdados.br_stf_corte_aberta.decisoes to local C
 Requires `google-cloud-bigquery` and an authenticated GCP project.

 Usage:
-    python etl/scripts/download_stf.py --billing-project bracc-corruptos
-    python etl/scripts/download_stf.py --billing-project bracc-corruptos --skip-existing
-    python etl/scripts/download_stf.py --billing-project bracc-corruptos --output-dir ./data/stf
+    python etl/scripts/download_stf.py --billing-project icarus-corruptos
+    python etl/scripts/download_stf.py --billing-project icarus-corruptos --skip-existing
+    python etl/scripts/download_stf.py --billing-project icarus-corruptos --output-dir ./data/stf
 """

 from __future__ import annotations
--- a/etl/scripts/download_tse_bens.py
+++ b/etl/scripts/download_tse_bens.py
@@ -5,9 +5,9 @@ Streams from BigQuery table `basedosdados.br_tse_eleicoes.bens_candidato` to a l
 Requires `google-cloud-bigquery` and an authenticated GCP project.

 Usage:
-    python etl/scripts/download_tse_bens.py --billing-project bracc-corruptos
-    python etl/scripts/download_tse_bens.py --billing-project bracc-corruptos --start-year 2018
-    python etl/scripts/download_tse_bens.py --billing-project bracc-corruptos --skip-existing
+    python etl/scripts/download_tse_bens.py --billing-project icarus-corruptos
+    python etl/scripts/download_tse_bens.py --billing-project icarus-corruptos --start-year 2018
+    python etl/scripts/download_tse_bens.py --billing-project icarus-corruptos --skip-existing
 """

 from __future__ import annotations
--- a/etl/scripts/download_tse_filiados.py
+++ b/etl/scripts/download_tse_filiados.py
@@ -7,9 +7,9 @@ Filters to REGULAR status only (active members) to reduce volume.
 Requires `google-cloud-bigquery` and an authenticated GCP project.

 Usage:
-    python etl/scripts/download_tse_filiados.py --billing-project bracc-corruptos
-    python etl/scripts/download_tse_filiados.py --billing-project bracc-corruptos --skip-existing
-    python etl/scripts/download_tse_filiados.py --billing-project bracc-corruptos --all-statuses
+    python etl/scripts/download_tse_filiados.py --billing-project icarus-corruptos
+    python etl/scripts/download_tse_filiados.py --billing-project icarus-corruptos --skip-existing
+    python etl/scripts/download_tse_filiados.py --billing-project icarus-corruptos --all-statuses
 """

 from __future__ import annotations
--- a/etl/scripts/download_un_sanctions.py
+++ b/etl/scripts/download_un_sanctions.py
@@ -14,10 +14,10 @@ from __future__ import annotations
 import json
 import logging
 import sys
+import defusedxml.ElementTree as ET
 from pathlib import Path

 import click
-from defusedxml import ElementTree as ET

 # Allow imports from scripts/ directory
 sys.path.insert(0, str(Path(__file__).parent))
--- a/etl/src/bracc_etl/base.py
+++ b/etl/src/bracc_etl/base.py
@@ -21,12 +21,16 @@ class Pipeline(ABC):
        limit: int | None = None,
        chunk_size: int = 50_000,
        neo4j_database: str | None = None,
+        history: bool = False,
    ) -> None:
        self.driver = driver
        self.data_dir = data_dir
        self.limit = limit
        self.chunk_size = chunk_size
        self.neo4j_database = neo4j_database or os.getenv("NEO4J_DATABASE", "neo4j")
+        self.history = history
+        self.rows_in: int = 0
+        self.rows_loaded: int = 0
        source_key = getattr(self, "source_id", getattr(self, "name", "unknown_source"))
        self.run_id = f"{source_key}_{datetime.now(tz=UTC).strftime('%Y%m%d%H%M%S')}"

@@ -87,8 +91,8 @@ class Pipeline(ABC):
            "    r.started_at = coalesce($started_at, r.started_at), "
            "    r.finished_at = coalesce($finished_at, r.finished_at), "
            "    r.error = coalesce($error, r.error), "
-            "    r.rows_in = coalesce(r.rows_in, 0), "
-            "    r.rows_loaded = coalesce(r.rows_loaded, 0)"
+            "    r.rows_in = $rows_in, "
+            "    r.rows_loaded = $rows_loaded"
        )
        run_id = getattr(self, "run_id", f"{source_id}_manual")
        params = {
@@ -98,6 +102,8 @@ class Pipeline(ABC):
            "started_at": started_at,
            "finished_at": finished_at,
            "error": error,
+            "rows_in": self.rows_in,
+            "rows_loaded": self.rows_loaded,
        }
        try:
            with self.driver.session(database=self.neo4j_database) as session:
--- a/etl/src/bracc_etl/entity_resolution/config.py
+++ b/etl/src/bracc_etl/entity_resolution/config.py
@@ -13,7 +13,7 @@ def get_person_settings() -> dict[str, Any]:
    """
    try:
        import splink.comparison_library as cl  # type: ignore[import-not-found]
-        from splink import SettingsCreator
+        from splink import SettingsCreator  # type: ignore[import-not-found,unused-ignore]
    except ImportError as exc:
        raise ImportError(
            "splink is required for entity resolution. "
--- a/etl/src/bracc_etl/pipelines/bcb.py
+++ b/etl/src/bracc_etl/pipelines/bcb.py
@@ -51,8 +51,9 @@ class BcbPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.penalties: list[dict[str, Any]] = []
        self.company_rels: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/bndes.py
+++ b/etl/src/bracc_etl/pipelines/bndes.py
@@ -33,8 +33,9 @@ class BndesPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.finances: list[dict[str, Any]] = []
        self.relationships: list[dict[str, Any]] = []
@@ -51,8 +52,15 @@ class BndesPipeline(Pipeline):

    def extract(self) -> None:
        bndes_dir = Path(self.data_dir) / "bndes"
+        if not bndes_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, bndes_dir)
+            return
+        csv_path = bndes_dir / "operacoes-nao-automaticas.csv"
+        if not csv_path.exists():
+            logger.warning("[%s] CSV file not found: %s", self.name, csv_path)
+            return
        self._raw = pd.read_csv(
-            bndes_dir / "operacoes-nao-automaticas.csv",
+            csv_path,
            dtype=str,
            delimiter=";",
            encoding="latin-1",
--- a/etl/src/bracc_etl/pipelines/caged.py
+++ b/etl/src/bracc_etl/pipelines/caged.py
@@ -88,8 +88,9 @@ class CagedPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._csv_files: list[Path] = []

    def extract(self) -> None:
--- a/etl/src/bracc_etl/pipelines/camara.py
+++ b/etl/src/bracc_etl/pipelines/camara.py
@@ -60,8 +60,9 @@ class CamaraPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.expenses: list[dict[str, Any]] = []
        self.deputies: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/camara_inquiries.py
+++ b/etl/src/bracc_etl/pipelines/camara_inquiries.py
@@ -66,8 +66,9 @@ class CamaraInquiriesPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)

        self._raw_inquiries: pd.DataFrame = pd.DataFrame()
        self._raw_requirements: pd.DataFrame = pd.DataFrame()
--- a/etl/src/bracc_etl/pipelines/ceaf.py
+++ b/etl/src/bracc_etl/pipelines/ceaf.py
@@ -31,8 +31,9 @@ class CeafPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.expulsions: list[dict[str, Any]] = []
        self.person_rels: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/cepim.py
+++ b/etl/src/bracc_etl/pipelines/cepim.py
@@ -37,8 +37,9 @@ class CepimPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.ngos: list[dict[str, Any]] = []
        self.company_rels: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/cnpj.py
+++ b/etl/src/bracc_etl/pipelines/cnpj.py
@@ -216,9 +216,11 @@ class CNPJPipeline(Pipeline):
        limit: int | None = None,
        chunk_size: int = 50_000,
        history: bool = False,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
-        self.history = history
+        super().__init__(
+            driver, data_dir, limit=limit, chunk_size=chunk_size, history=history, **kwargs,
+        )
        self.run_id = f"cnpj-{datetime.now(UTC).strftime('%Y%m%d%H%M%S')}"
        self._raw_empresas: pd.DataFrame = pd.DataFrame()
        self._raw_socios: pd.DataFrame = pd.DataFrame()
--- a/etl/src/bracc_etl/pipelines/comprasnet.py
+++ b/etl/src/bracc_etl/pipelines/comprasnet.py
@@ -63,7 +63,7 @@ class ComprasnetPipeline(Pipeline):
    """ETL pipeline for PNCP federal procurement contracts."""

    name = "comprasnet"
-    source_id = "pncp"
+    source_id = "comprasnet"

    def __init__(
        self,
@@ -71,8 +71,9 @@ class ComprasnetPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self.contracts: list[dict[str, Any]] = []

    def extract(self) -> None:
--- a/etl/src/bracc_etl/pipelines/cpgf.py
+++ b/etl/src/bracc_etl/pipelines/cpgf.py
@@ -84,8 +84,9 @@ class CpgfPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.expenses: list[dict[str, Any]] = []
        self.cardholders: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/cvm.py
+++ b/etl/src/bracc_etl/pipelines/cvm.py
@@ -38,8 +38,9 @@ class CvmPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw_processos: pd.DataFrame = pd.DataFrame()
        self._raw_acusados: pd.DataFrame = pd.DataFrame()
        self.proceedings: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/cvm_funds.py
+++ b/etl/src/bracc_etl/pipelines/cvm_funds.py
@@ -43,8 +43,9 @@ class CvmFundsPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.funds: list[dict[str, Any]] = []
        self.admin_rels: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/datajud.py
+++ b/etl/src/bracc_etl/pipelines/datajud.py
@@ -50,8 +50,9 @@ class DatajudPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)

        self._raw_cases: pd.DataFrame = pd.DataFrame()
        self._raw_parties: pd.DataFrame = pd.DataFrame()
--- a/etl/src/bracc_etl/pipelines/datasus.py
+++ b/etl/src/bracc_etl/pipelines/datasus.py
@@ -29,8 +29,9 @@ class DatasusPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.facilities: list[dict[str, Any]] = []
        self.company_links: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/dou.py
+++ b/etl/src/bracc_etl/pipelines/dou.py
@@ -17,7 +17,10 @@ import re
 from pathlib import Path
 from typing import TYPE_CHECKING, Any

-from defusedxml import ElementTree  # type: ignore[import-untyped]
+from defusedxml.ElementTree import ParseError as _XmlParseError  # type: ignore[import-untyped]
+from defusedxml.ElementTree import (
+    parse as _safe_xml_parse,  # type: ignore[import-untyped,unused-ignore]
+)

 from bracc_etl.base import Pipeline
 from bracc_etl.loader import Neo4jBatchLoader
@@ -141,8 +144,9 @@ class DouPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw_acts: list[dict[str, str]] = []
        self.acts: list[dict[str, Any]] = []
        self.person_rels: list[dict[str, Any]] = []
@@ -227,8 +231,8 @@ class DouPipeline(Pipeline):
        """Extract acts from Imprensa Nacional XML dumps."""
        for f in xml_files:
            try:
-                tree = ElementTree.parse(f)  # noqa: S314
-            except ElementTree.ParseError:
+                tree = _safe_xml_parse(f)
+            except _XmlParseError:
                logger.warning("[dou] Failed to parse XML: %s", f.name)
                continue

--- a/etl/src/bracc_etl/pipelines/eu_sanctions.py
+++ b/etl/src/bracc_etl/pipelines/eu_sanctions.py
@@ -76,8 +76,9 @@ class EuSanctionsPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.sanctions: list[dict[str, Any]] = []
        self.person_rels: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/holdings.py
+++ b/etl/src/bracc_etl/pipelines/holdings.py
@@ -36,8 +36,9 @@ class HoldingsPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.holding_rels: list[dict[str, Any]] = []

--- a/etl/src/bracc_etl/pipelines/ibama.py
+++ b/etl/src/bracc_etl/pipelines/ibama.py
@@ -40,8 +40,9 @@ class IbamaPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.embargoes: list[dict[str, Any]] = []
        self.companies: list[dict[str, Any]] = []
@@ -65,7 +66,13 @@ class IbamaPipeline(Pipeline):

    def extract(self) -> None:
        ibama_dir = Path(self.data_dir) / "ibama"
+        if not ibama_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, ibama_dir)
+            return
        csv_path = ibama_dir / "areas_embargadas.csv"
+        if not csv_path.exists():
+            logger.warning("[%s] CSV file not found: %s", self.name, csv_path)
+            return
        logger.info("[ibama] Reading %s", csv_path)
        self._raw = pd.read_csv(
            csv_path,
--- a/etl/src/bracc_etl/pipelines/icij.py
+++ b/etl/src/bracc_etl/pipelines/icij.py
@@ -42,8 +42,9 @@ class ICIJPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._entities_raw: pd.DataFrame = pd.DataFrame()
        self._officers_raw: pd.DataFrame = pd.DataFrame()
        self._intermediaries_raw: pd.DataFrame = pd.DataFrame()
--- a/etl/src/bracc_etl/pipelines/inep.py
+++ b/etl/src/bracc_etl/pipelines/inep.py
@@ -42,8 +42,9 @@ class InepPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self.schools: list[dict[str, Any]] = []
        self.school_company_links: list[dict[str, Any]] = []

--- a/etl/src/bracc_etl/pipelines/leniency.py
+++ b/etl/src/bracc_etl/pipelines/leniency.py
@@ -31,8 +31,9 @@ class LeniencyPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.agreements: list[dict[str, Any]] = []
        self.company_rels: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/mides.py
+++ b/etl/src/bracc_etl/pipelines/mides.py
@@ -74,8 +74,9 @@ class MidesPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)

        self._raw_bids: pd.DataFrame = pd.DataFrame()
        self._raw_contracts: pd.DataFrame = pd.DataFrame()
--- a/etl/src/bracc_etl/pipelines/ofac.py
+++ b/etl/src/bracc_etl/pipelines/ofac.py
@@ -63,8 +63,9 @@ class OfacPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.sanctions: list[dict[str, Any]] = []

--- a/etl/src/bracc_etl/pipelines/opensanctions.py
+++ b/etl/src/bracc_etl/pipelines/opensanctions.py
@@ -81,8 +81,9 @@ class OpenSanctionsPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw_entities: list[dict[str, Any]] = []
        self.global_peps: list[dict[str, Any]] = []
        self.pep_match_rels: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/pep_cgu.py
+++ b/etl/src/bracc_etl/pipelines/pep_cgu.py
@@ -84,8 +84,9 @@ class PepCguPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._raw: pd.DataFrame = pd.DataFrame()
        self.pep_records: list[dict[str, Any]] = []
        self.person_links: list[dict[str, Any]] = []
--- a/etl/src/bracc_etl/pipelines/pgfn.py
+++ b/etl/src/bracc_etl/pipelines/pgfn.py
@@ -38,8 +38,9 @@ class PgfnPipeline(Pipeline):
        data_dir: str = "./data",
        limit: int | None = None,
        chunk_size: int = 50_000,
+        **kwargs: Any,
    ) -> None:
-        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size)
+        super().__init__(driver, data_dir, limit=limit, chunk_size=chunk_size, **kwargs)
        self._csv_files: list[Path] = []
        self.finances: list[dict[str, Any]] = []
        self.relationships: list[dict[str, Any]] = []
@@ -56,10 +57,13 @@ class PgfnPipeline(Pipeline):

    def extract(self) -> None:
        pgfn_dir = Path(self.data_dir) / "pgfn"
+        if not pgfn_dir.exists():
+            logger.warning("[%s] Data directory not found: %s", self.name, pgfn_dir)
+            return
        self._csv_files = sorted(pgfn_dir.glob("arquivo_lai_SIDA_*_*.csv"))
        if not self._csv_files:
-            msg = f"No PGFN CSV files found in {pgfn_dir}"
-            raise FileNotFoundError(msg)
+            logger.warning("[%s] No PGFN CSV files found in %s", self.name, pgfn_dir)
+            return
        logger.info("[pgfn] Found %d CSV files to process", len(self._csv_files))

    def transform(self) -> None:
--- a/Show More
+++ b/Show More