diff --git a/.env.example b/.env.example index c19aa28..1274e82 100644 --- a/.env.example +++ b/.env.example @@ -20,7 +20,8 @@ APP_ENV=dev JWT_SECRET_KEY=change-me-generate-with-openssl-rand-hex-32 INVITE_CODE= CORS_ORIGINS=http://localhost:3000 -PRODUCT_TIER=advanced +PRODUCT_TIER=community +PATTERNS_ENABLED=false PUBLIC_MODE=false PUBLIC_ALLOW_PERSON=false PUBLIC_ALLOW_ENTITY_LOOKUP=false @@ -29,6 +30,7 @@ PUBLIC_ALLOW_INVESTIGATIONS=false # Frontend (dev only — production uses Caddy reverse proxy with relative paths) VITE_API_URL=http://localhost:8000 VITE_PUBLIC_MODE=false +VITE_PATTERNS_ENABLED=false # Optional: Google Cloud (for Base dos Dados / TSE BigQuery) # GOOGLE_APPLICATION_CREDENTIALS=path/to/service-account.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d03bd5..e3ea1bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: name: API (Python) runs-on: ubuntu-latest env: - PRODUCT_TIER: ${{ github.repository == 'brunoclz/world-transparency-graph' && 'community' || 'advanced' }} + PRODUCT_TIER: community defaults: run: working-directory: api diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml index 9ab9b74..a3b9a0c 100644 --- a/.github/workflows/security.yml +++ b/.github/workflows/security.yml @@ -109,5 +109,5 @@ jobs: with: python-version: "3.12" - - name: Validate public edition specialized boundary + - name: Validate public edition scope run: python scripts/check_open_core_boundary.py --repo-root . diff --git a/LGPD.md b/LGPD.md index 51c412f..5837fa1 100644 --- a/LGPD.md +++ b/LGPD.md @@ -12,12 +12,12 @@ This baseline is operational guidance and does not replace formal legal advice. ## Data categories and exclusions -Data categories processed in the public edition context: +Data categories processed in this public repository: - Company records and corporate relationships. - Public procurement and public finance records. - Publicly disclosed sanctions and regulatory actions. -- Public legislative and advanced records. +- Public legislative and administrative records. Default exclusions in public mode: diff --git a/README.md b/README.md index e9790ff..137925f 100644 --- a/README.md +++ b/README.md @@ -1,136 +1,81 @@ -# World Transparency Graph (WTG) — Icarus Core - -Global public-data graph analysis platform. +# World Transparency Graph (WTG) [![WTG Header](docs/brand/wtg-header.png)](docs/brand/wtg-header.png) [![CI](https://github.com/brunoclz/world-transparency-graph/actions/workflows/ci.yml/badge.svg)](https://github.com/brunoclz/world-transparency-graph/actions/workflows/ci.yml) [![License: AGPL v3](https://img.shields.io/badge/License-AGPL_v3-blue.svg)](https://www.gnu.org/licenses/agpl-3.0) ---- +WTG is an open-source graph infrastructure for public data intelligence. -## What it is +This repository contains the full code for the WTG public edition. The pattern engine is temporarily disabled pending validation. -WTG (powered by Icarus Core) ingests public records and enables visual exploration of connections between companies, contracts, elections, and sanctions. +## What it does -**Data patterns from public records. Not accusations.** +- Ingests public records with reproducible ETL pipelines. +- Loads normalized data into Neo4j. +- Exposes a public-safe API surface. +- Provides a React frontend for graph exploration. -## Brand model +Data patterns from public records are signals, not legal proof. -- Public product: **World Transparency Graph (WTG)** -- Civic movement: **BRCC** -- Advanced engine: **Icarus Core** +## Stack -Public edition boundary reference: [docs/brand/open_core_boundary.md](docs/brand/open_core_boundary.md) - -## Architecture - -``` -┌─────────────┐ ┌──────────────┐ ┌──────────────┐ -│ Frontend │────▶│ FastAPI │────▶│ Neo4j │ -│ React SPA │ │ REST API │ │ Graph DB │ -│ :3000 │ │ :8000 │ │ :7687 │ -└─────────────┘ └──────────────┘ └──────────────┘ - ▲ - ┌──────┴──────┐ - │ ETL Pipes │ - │ CNPJ, TSE │ - │ Transp, │ - │ Sanctions │ - └─────────────┘ -``` - -| Layer | Technology | -|---|---| -| Graph DB | Neo4j 5 Community | -| Backend | FastAPI (Python 3.12+, async) | -| Frontend | Vite + React 19 + TypeScript | -| ETL | Python (pandas, httpx) | -| Entity Resolution | splink 4 (optional) | -| Infra | Docker Compose | -| i18n | EN (default), PT-BR | +- Graph DB: Neo4j 5 Community +- Backend: FastAPI (Python 3.12+, async) +- Frontend: Vite + React 19 + TypeScript +- ETL: Python (pandas, httpx) +- Infra: Docker Compose ## Quick start ```bash -# Prerequisites: Docker, Node 22+, Python 3.12+, uv cp .env.example .env -# Edit .env with your Neo4j password +# set at least NEO4J_PASSWORD -# Start full stack make dev -# Load development seed data export NEO4J_PASSWORD=your_password make seed - -# API: http://localhost:8000/health -# Frontend: http://localhost:3000 -# Neo4j Browser: http://localhost:7474 ``` -## Development +- API: `http://localhost:8000/health` +- Frontend: `http://localhost:3000` +- Neo4j Browser: `http://localhost:7474` -```bash -# Install dependencies -cd api && uv sync --dev -cd etl && uv sync --dev -cd frontend && npm install +## Public-safe defaults -# Run individual services -make api # FastAPI with hot reload -make frontend # Vite dev server - -# ETL -cd etl && uv run icarus-etl sources # List pipelines -cd etl && uv run icarus-etl run --source cnpj --neo4j-password $NEO4J_PASSWORD - -# Quality checks (run before commit) -make check # lint + types + tests -make neutrality # prohibited-word audit -``` - -## Tests - -```bash -make test # All (API + ETL + Frontend) -make test-api # Python API tests -make test-etl # Python ETL tests -make test-frontend # TypeScript frontend tests -``` - -## Analysis patterns - -| ID | Pattern | -|---|---| -| p01 | Self-dealing amendment | -| p05 | Patrimony incompatibility | -| p06 | Sanctioned still receiving | -| p10 | Donation-contract loop | -| p12 | Contract concentration | - -## Public mode contract - -WTG Open should run with the following safe defaults: +Use these defaults for public deployments: - `PRODUCT_TIER=community` - `PUBLIC_MODE=true` - `PUBLIC_ALLOW_PERSON=false` - `PUBLIC_ALLOW_ENTITY_LOOKUP=false` - `PUBLIC_ALLOW_INVESTIGATIONS=false` +- `PATTERNS_ENABLED=false` +- `VITE_PUBLIC_MODE=true` +- `VITE_PATTERNS_ENABLED=false` -With these defaults, public mode does not return personal-entity nodes (`Person`/`Partner`) or personal document properties. +## Development -## Commercial offerings +```bash +# dependencies +cd api && uv sync --dev +cd ../etl && uv sync --dev +cd ../frontend && npm install -WTG Open is public and auditable. -Icarus Advanced is specialized and not included in this repository. +# quality +make check +make neutrality +``` -Advanced offerings include: +## API surface -- Advanced entity-resolution precision modules -- Advanced scoring and high-sensitivity pattern intelligence -- Managed deployment and operational support +| Method | Route | Description | +|---|---|---| +| GET | `/health` | Health check | +| GET | `/api/v1/public/meta` | Aggregated metrics and source health | +| GET | `/api/v1/public/graph/company/{cnpj_or_id}` | Public company subgraph | +| GET | `/api/v1/public/patterns/company/{cnpj_or_id}` | Returns `503` while pattern engine is disabled | ## Legal & Ethics @@ -143,50 +88,6 @@ Advanced offerings include: - [ABUSE_RESPONSE.md](ABUSE_RESPONSE.md) - [docs/legal/legal-index.md](docs/legal/legal-index.md) -## API endpoints - -| Method | Route | Description | -|---|---|---| -| GET | `/health` | Health check | -| GET | `/api/v1/public/meta` | Aggregated metrics and source health | -| GET | `/api/v1/public/patterns/company/{cnpj_or_id}` | Public risk signals for a company | -| GET | `/api/v1/public/graph/company/{cnpj_or_id}` | Public company subgraph | - -### Advanced-only surface (internal deployment) - -- `/api/v1/entity/*` -- `/api/v1/search` -- `/api/v1/graph/*` -- `/api/v1/patterns/*` -- `/api/v1/investigations/*` - -## Project structure - -``` -CORRUPTOS/ -├── api/ # FastAPI backend -│ ├── src/icarus/ -│ │ ├── routers/ # 7 routers -│ │ ├── services/ # Business logic -│ │ ├── queries/ # 27 .cypher files -│ │ ├── models/ # Pydantic models -│ │ └── middleware/ # CPF masking -│ └── tests/ # 79 unit tests -├── etl/ # ETL pipelines -│ ├── src/icarus_etl/ -│ │ ├── pipelines/ # CNPJ, TSE, transparency, sanctions -│ │ ├── transforms/ # Name norm, doc formatting, dedup -│ │ └── linking hooks # Community/public-safe post-load hooks -│ └── tests/ # 63 unit tests -├── frontend/ # React SPA -│ └── src/ -│ ├── components/ # Graph, Entity, Search, Pattern, Investigation -│ ├── pages/ # Home, Search, GraphExplorer, Patterns, Investigations -│ └── stores/ # Zustand -├── infra/ # Docker Compose + Neo4j schema + seed data -└── .github/workflows/ # CI pipeline -``` - ## License [GNU Affero General Public License v3.0](LICENSE) diff --git a/api/src/icarus/config.py b/api/src/icarus/config.py index 069f8e8..de7b951 100644 --- a/api/src/icarus/config.py +++ b/api/src/icarus/config.py @@ -19,7 +19,8 @@ class Settings(BaseSettings): rate_limit_auth: str = "300/minute" invite_code: str = "" cors_origins: str = "http://localhost:3000" - product_tier: str = "advanced" + product_tier: str = "community" + patterns_enabled: bool = False public_mode: bool = False public_allow_person: bool = False public_allow_entity_lookup: bool = False diff --git a/api/src/icarus/models/entity.py b/api/src/icarus/models/entity.py index f59bc27..e847393 100644 --- a/api/src/icarus/models/entity.py +++ b/api/src/icarus/models/entity.py @@ -51,7 +51,7 @@ class ExposureResponse(BaseModel): peer_group: str peer_count: int sources: list[SourceAttribution] - intelligence_tier: str = "advanced" + intelligence_tier: str = "community" class TimelineEvent(BaseModel): diff --git a/api/src/icarus/models/pattern.py b/api/src/icarus/models/pattern.py index 441ae3c..a65ca8d 100644 --- a/api/src/icarus/models/pattern.py +++ b/api/src/icarus/models/pattern.py @@ -11,7 +11,7 @@ class PatternResult(BaseModel): entity_ids: list[str] sources: list[SourceAttribution] exposure_tier: str = "public_safe" - intelligence_tier: str = "advanced" + intelligence_tier: str = "community" class PatternResponse(BaseModel): diff --git a/api/src/icarus/routers/patterns.py b/api/src/icarus/routers/patterns.py index 5234200..1de5c33 100644 --- a/api/src/icarus/routers/patterns.py +++ b/api/src/icarus/routers/patterns.py @@ -13,6 +13,15 @@ from icarus.services.public_guard import enforce_entity_lookup_enabled router = APIRouter(prefix="/api/v1/patterns", tags=["patterns"]) +_PATTERN_ENGINE_DISABLED_DETAIL = ( + "Pattern engine temporarily unavailable pending validation." +) + + +def _enforce_patterns_enabled() -> None: + if not settings.patterns_enabled: + raise HTTPException(status_code=503, detail=_PATTERN_ENGINE_DISABLED_DETAIL) + async def run_all_patterns( driver: AsyncDriver, @@ -58,6 +67,7 @@ async def get_patterns_for_entity( lang: Annotated[str, Query()] = "pt", include_probable: Annotated[bool, Query()] = False, ) -> PatternResponse: + _enforce_patterns_enabled() if settings.public_mode: enforce_entity_lookup_enabled() results = await run_all_patterns( @@ -85,6 +95,7 @@ async def get_specific_pattern( lang: Annotated[str, Query()] = "pt", include_probable: Annotated[bool, Query()] = False, ) -> PatternResponse: + _enforce_patterns_enabled() if settings.public_mode: enforce_entity_lookup_enabled() available = [row["id"] for row in provider.list_patterns()] @@ -112,4 +123,5 @@ async def get_specific_pattern( async def list_patterns( provider: Annotated[IntelligenceProvider, Depends(get_intelligence_provider)], ) -> dict[str, list[dict[str, str]]]: + _enforce_patterns_enabled() return {"patterns": provider.list_patterns()} diff --git a/api/src/icarus/routers/public.py b/api/src/icarus/routers/public.py index 9d8003a..5645ef9 100644 --- a/api/src/icarus/routers/public.py +++ b/api/src/icarus/routers/public.py @@ -6,6 +6,7 @@ from typing import Annotated, Any from fastapi import APIRouter, Depends, HTTPException, Query from neo4j import AsyncSession # noqa: TC002 +from icarus.config import settings from icarus.dependencies import get_session from icarus.models.entity import SourceAttribution from icarus.models.graph import GraphEdge, GraphNode, GraphResponse @@ -107,6 +108,11 @@ async def public_patterns_for_company( session: Annotated[AsyncSession, Depends(get_session)], lang: Annotated[str, Query()] = "pt", ) -> PatternResponse: + if not settings.patterns_enabled: + raise HTTPException( + status_code=503, + detail="Pattern engine temporarily unavailable pending validation.", + ) company_id, company_cnpj = await _resolve_company(session, cnpj_or_id) records = await execute_query( session, diff --git a/api/src/icarus/services/intelligence_provider.py b/api/src/icarus/services/intelligence_provider.py index a5b8633..3a0a4ee 100644 --- a/api/src/icarus/services/intelligence_provider.py +++ b/api/src/icarus/services/intelligence_provider.py @@ -258,8 +258,8 @@ class CommunityIntelligenceProvider: return {} -class AdvancedIntelligenceProvider: - tier = "advanced" +class FullIntelligenceProvider: + tier = "full" def list_patterns(self) -> list[dict[str, str]]: return _build_pattern_meta(tuple(_load_pattern_queries().keys())) @@ -324,7 +324,7 @@ class AdvancedIntelligenceProvider: _PROVIDER_CACHE: dict[str, IntelligenceProvider] = {} -def _advanced_modules_available() -> bool: +def _full_modules_available() -> bool: return ( find_spec("icarus.services.pattern_service") is not None and find_spec("icarus.services.score_service") is not None @@ -333,9 +333,9 @@ def _advanced_modules_available() -> bool: def get_default_provider() -> IntelligenceProvider: tier = settings.product_tier.strip().lower() - if tier not in {"community", "advanced"}: - tier = "advanced" - if tier == "advanced" and not _advanced_modules_available(): + if tier not in {"community", "full"}: + tier = "full" + if tier == "full" and not _full_modules_available(): tier = "community" cached = _PROVIDER_CACHE.get(tier) if cached is not None: @@ -344,6 +344,6 @@ def get_default_provider() -> IntelligenceProvider: if tier == "community": provider = CommunityIntelligenceProvider() else: - provider = AdvancedIntelligenceProvider() + provider = FullIntelligenceProvider() _PROVIDER_CACHE[tier] = provider return provider diff --git a/api/tests/unit/test_entity.py b/api/tests/unit/test_entity.py index 19f959d..cbdefe8 100644 --- a/api/tests/unit/test_entity.py +++ b/api/tests/unit/test_entity.py @@ -202,7 +202,7 @@ def test_self_dealing_uses_value_committed_or_value_paid() -> None: try: cypher = _load_cypher("pattern_self_dealing") except FileNotFoundError: - pytest.skip("pattern_self_dealing.cypher not shipped in public edition snapshot") + pytest.skip("pattern_self_dealing.cypher not available in this scope") # Must use coalesce with both TransfereGov fields AND Transparencia fallback assert "a.value_committed" in cypher, ( "pattern_self_dealing.cypher missing a.value_committed (TransfereGov)" diff --git a/api/tests/unit/test_intelligence_provider.py b/api/tests/unit/test_intelligence_provider.py index a83030d..5e47dda 100644 --- a/api/tests/unit/test_intelligence_provider.py +++ b/api/tests/unit/test_intelligence_provider.py @@ -4,15 +4,15 @@ from icarus.config import settings from icarus.services import intelligence_provider as provider_module -def test_falls_back_to_community_when_advanced_modules_missing( +def test_falls_back_to_community_when_full_modules_missing( monkeypatch: MonkeyPatch, ) -> None: original_tier = settings.product_tier try: - monkeypatch.setattr(settings, "product_tier", "advanced") + monkeypatch.setattr(settings, "product_tier", "full") monkeypatch.setattr( provider_module, - "_advanced_modules_available", + "_full_modules_available", lambda: False, ) provider_module._PROVIDER_CACHE.clear() @@ -25,22 +25,22 @@ def test_falls_back_to_community_when_advanced_modules_missing( settings.product_tier = original_tier -def test_keeps_advanced_when_modules_are_available( +def test_keeps_full_when_modules_are_available( monkeypatch: MonkeyPatch, ) -> None: original_tier = settings.product_tier try: - monkeypatch.setattr(settings, "product_tier", "advanced") + monkeypatch.setattr(settings, "product_tier", "full") monkeypatch.setattr( provider_module, - "_advanced_modules_available", + "_full_modules_available", lambda: True, ) provider_module._PROVIDER_CACHE.clear() provider = provider_module.get_default_provider() - assert isinstance(provider, provider_module.AdvancedIntelligenceProvider) + assert isinstance(provider, provider_module.FullIntelligenceProvider) finally: provider_module._PROVIDER_CACHE.clear() settings.product_tier = original_tier diff --git a/api/tests/unit/test_patterns.py b/api/tests/unit/test_patterns.py index da5bc72..0091092 100644 --- a/api/tests/unit/test_patterns.py +++ b/api/tests/unit/test_patterns.py @@ -3,10 +3,16 @@ from unittest.mock import AsyncMock, patch import pytest from httpx import AsyncClient +from icarus.config import settings from icarus.models.pattern import PATTERN_METADATA from icarus.services.pattern_service import PATTERN_QUERIES +@pytest.fixture(autouse=True) +def _enable_patterns(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(settings, "patterns_enabled", True) + + def test_all_patterns_have_metadata() -> None: for pattern_id in PATTERN_QUERIES: assert pattern_id in PATTERN_METADATA, f"Missing metadata for {pattern_id}" @@ -38,11 +44,22 @@ async def test_list_patterns_endpoint(client: AsyncClient) -> None: assert response.status_code == 200 data = response.json() assert "patterns" in data - assert len(data["patterns"]) == 18 + assert len(data["patterns"]) == 4 ids = {p["id"] for p in data["patterns"]} - assert "self_dealing_amendment" in ids - assert "donation_contract_loop" in ids + assert "sanctioned_still_receiving" in ids + assert "debtor_contracts" in ids + + +@pytest.mark.anyio +async def test_patterns_endpoint_returns_503_when_disabled( + client: AsyncClient, + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(settings, "patterns_enabled", False) + response = await client.get("/api/v1/patterns/") + assert response.status_code == 503 + assert "temporarily unavailable" in response.json()["detail"] @pytest.mark.anyio diff --git a/api/tests/unit/test_public_mode.py b/api/tests/unit/test_public_mode.py index 0111deb..e8f63e7 100644 --- a/api/tests/unit/test_public_mode.py +++ b/api/tests/unit/test_public_mode.py @@ -125,7 +125,16 @@ async def test_public_meta_endpoint(client: AsyncClient) -> None: @pytest.mark.anyio async def test_public_patterns_company_endpoint(client: AsyncClient) -> None: + with patch("icarus.routers.public.settings.patterns_enabled", False): + response = await client.get("/api/v1/public/patterns/company/11111111000111") + assert response.status_code == 503 + assert "temporarily unavailable" in response.json()["detail"] + + +@pytest.mark.anyio +async def test_public_patterns_company_endpoint_when_enabled(client: AsyncClient) -> None: with ( + patch("icarus.routers.public.settings.patterns_enabled", True), patch( "icarus.routers.public.execute_query_single", new_callable=AsyncMock, diff --git a/docs/legal/legal-index.md b/docs/legal/legal-index.md index fe4deb7..59dc6f5 100644 --- a/docs/legal/legal-index.md +++ b/docs/legal/legal-index.md @@ -1,7 +1,7 @@ -# Legal and Ethics Index — WTG / Icarus +# Legal and Ethics Index — WTG -Policy-Version: v1.0.0 -Effective-Date: 2026-02-28 +Policy-Version: v1.1.0 +Effective-Date: 2026-03-01 Owner: WTG Governance Team ## Core policies @@ -17,45 +17,18 @@ Owner: WTG Governance Team ## Applicability by deployment model -### WTG Open (public-safe deployment) - -Applies directly: - -- ETHICS -- LGPD baseline -- PRIVACY -- TERMS -- DISCLAIMER -- SECURITY -- ABUSE_RESPONSE - -Additional operational constraints: +This repository uses a public-safe profile: - `PUBLIC_MODE=true` -- person-level entities blocked by default -- public privacy gate mandatory in CI - -### Icarus Advanced (internal deployment) - -Applies directly: - -- ETHICS -- LGPD baseline -- PRIVACY -- TERMS -- SECURITY -- ABUSE_RESPONSE - -Additional private controls: - -- internal runbooks -- restricted infrastructure details -- advanced investigation workflows +- `PUBLIC_ALLOW_PERSON=false` +- `PUBLIC_ALLOW_ENTITY_LOOKUP=false` +- `PUBLIC_ALLOW_INVESTIGATIONS=false` +- `PATTERNS_ENABLED=false` ## Change log policy Any policy change must: -1. Update Policy-Version and Effective-Date. -2. Include changelog note in PR description. +1. Update `Policy-Version` and `Effective-Date`. +2. Include a changelog note in the PR description. 3. Pass `Compliance Pack Gate`. diff --git a/docs/legal/public-compliance-pack.md b/docs/legal/public-compliance-pack.md index d3f65db..9557f6d 100644 --- a/docs/legal/public-compliance-pack.md +++ b/docs/legal/public-compliance-pack.md @@ -11,7 +11,7 @@ ## 2. LGPD-compatible operating principles - Purpose limitation: investigative transparency and civic oversight. - Data minimization: no person-level lookup in public surface. -- Security by design: role-separated advanced environment. +- Security by design: least-privilege runtime and auditable controls. - Transparency: source attribution and coverage caveats on every report. ## 3. Public terms of use requirements diff --git a/docs/release/public_boundary_matrix.csv b/docs/release/public_boundary_matrix.csv index 78a79d0..e200cad 100644 --- a/docs/release/public_boundary_matrix.csv +++ b/docs/release/public_boundary_matrix.csv @@ -1,33 +1,19 @@ path,classification,reason,action_for_public_repo -api/**,PUBLIC,Core API code is required for public edition,include -etl/**,PUBLIC,Core ingestion framework is required for public edition,include -frontend/**,PUBLIC,Public demo UI is required,include -infra/**,PUBLIC with review,Keep only generic local/dev deployment docs,include reviewed subset -etl/src/icarus_etl/entity_resolution/**,REMOVE_FROM_PUBLIC,Specialized entity resolution and matching logic,exclude -api/src/icarus/services/score_service.py,REMOVE_FROM_PUBLIC,Specialized exposure scoring logic,exclude -api/src/icarus/services/pattern_service.py,REMOVE_FROM_PUBLIC,Specialized high-sensitivity pattern intelligence orchestration,exclude -api/src/icarus/queries/pattern_*.cypher,REMOVE_FROM_PUBLIC,Specialized high-sensitivity pattern query logic,exclude -api/src/icarus/queries/entity_score*.cypher,REMOVE_FROM_PUBLIC,Specialized score/timeline aggregation queries,exclude -scripts/link_persons.cypher,REMOVE_FROM_PUBLIC,Specialized entity linking heuristics,exclude -scripts/link_partners_probable.cypher,REMOVE_FROM_PUBLIC,Specialized probable identity linking heuristics,exclude -docs/brand/**,PUBLIC,Brand governance for release,include -docs/demo/**,PUBLIC,Demo dataset contract and constraints,include -docs/legal/**,PUBLIC,Compliance and responsible-use baseline,include -ETHICS.md,PUBLIC,Public ethics policy baseline,include -LGPD.md,PUBLIC,LGPD baseline for public-safe operation,include -PRIVACY.md,PUBLIC,Public privacy policy baseline,include -TERMS.md,PUBLIC,Public terms of use baseline,include -DISCLAIMER.md,PUBLIC,Non-accusatory and limitations statement,include -SECURITY.md,PUBLIC,Public vulnerability disclosure policy,include -ABUSE_RESPONSE.md,PUBLIC,Public anti-abuse response policy,include -docs/source_registry_br_v1.csv,PUBLIC,Program governance and transparency,include -docs/data-sources.md,PUBLIC,Source catalog visibility,include -CLAUDE.md,REMOVE_FROM_PUBLIC,Contains operational host and infrastructure paths,exclude -.mcp.json,REMOVE_FROM_PUBLIC,Contains local runtime MCP wiring,exclude -scripts/auto_finalize_pncp_backfill.sh,REMOVE_FROM_PUBLIC,Production operational finalizer tied to server paths,exclude -scripts/storage_capacity_report.sh,INTERNAL,Operational script with server assumptions,exclude by default -docs/shadow_rollout_runbook.md,REMOVE_FROM_PUBLIC,Operational production rollout details,exclude -docs/ingestion_priority_runbook.md,REMOVE_FROM_PUBLIC,Contains production data paths and credential procedures,exclude -docs/ops/storage_operations.md,REMOVE_FROM_PUBLIC,Contains production operational process and server paths,exclude -audit-results/**,REMOVE_FROM_PUBLIC,Operational evidence and internal logs,exclude -data/**,INTERNAL by default,Only data/demo synthetic subset allowed,include only data/demo +api/**,PUBLIC,Core API code for the public edition,include +etl/**,PUBLIC,Core ingestion framework and loaders,include +frontend/**,PUBLIC,Public explorer UI,include +infra/**,PUBLIC with review,Keep generic local/dev infrastructure without secrets,include reviewed subset +docs/**,PUBLIC with review,Keep public documentation and legal pack,include reviewed subset +.github/workflows/**,PUBLIC,CI and security transparency,include +scripts/**,PUBLIC with review,Keep public utilities and gates,include reviewed subset +data/demo/**,PUBLIC,Synthetic demo dataset only,include +api/src/icarus/services/pattern_service.py,REMOVE_FROM_PUBLIC,Pattern engine disabled pending validation,exclude +api/src/icarus/queries/pattern_*.cypher,REMOVE_FROM_PUBLIC,Pattern query engine disabled pending validation,exclude +scripts/auto_finalize_pncp_backfill.sh,REMOVE_FROM_PUBLIC,Production operational script with server-specific assumptions,exclude +docs/shadow_rollout_runbook.md,REMOVE_FROM_PUBLIC,Production operational runbook details,exclude +docs/ingestion_priority_runbook.md,REMOVE_FROM_PUBLIC,Production operational runbook details,exclude +docs/ops/storage_operations.md,REMOVE_FROM_PUBLIC,Production operational runbook details,exclude +CLAUDE.md,REMOVE_FROM_PUBLIC,Internal operational context,exclude +.mcp.json,REMOVE_FROM_PUBLIC,Local operator MCP wiring,exclude +audit-results/**,REMOVE_FROM_PUBLIC,Operational evidence/log artifacts,exclude +data/**,INTERNAL by default,Only synthetic demo subset is public,include only data/demo diff --git a/docs/release/public_endpoint_matrix.md b/docs/release/public_endpoint_matrix.md index 3eaa69c..1e52a2d 100644 --- a/docs/release/public_endpoint_matrix.md +++ b/docs/release/public_endpoint_matrix.md @@ -1,27 +1,29 @@ -# Public vs Advanced Endpoint Matrix +# Public Endpoint Matrix + +## Public-safe defaults -## Public mode defaults - `PUBLIC_MODE=true` - `PUBLIC_ALLOW_PERSON=false` - `PUBLIC_ALLOW_ENTITY_LOOKUP=false` - `PUBLIC_ALLOW_INVESTIGATIONS=false` +- `PATTERNS_ENABLED=false` ## Endpoint behavior -| Endpoint | PUBLIC_MODE=false (advanced) | PUBLIC_MODE=true (default) | -|---|---|---| -| `GET /api/v1/entity/{cpf_or_cnpj}` | Allowed | `403` (`Entity lookup endpoint disabled in public mode`) | -| `GET /api/v1/entity/by-element-id/{id}` | Allowed | `403` (`Entity lookup endpoint disabled in public mode`) | -| `GET /api/v1/entity/{id}/connections` | Allowed | Person/Partner targets filtered out | -| `GET /api/v1/search` | Allowed | Person/Partner results filtered out | -| `GET /api/v1/graph/{entity_id}` | Allowed | Person/Partner center blocked, person nodes filtered | -| `GET /api/v1/patterns/{entity_id}` | Allowed | `403` when `PUBLIC_ALLOW_ENTITY_LOOKUP=false` | -| `GET /api/v1/investigations/*` | Allowed | `403` (`Investigation endpoints disabled in public mode`) | -| `GET /api/v1/public/meta` | Allowed | Allowed | -| `GET /api/v1/public/patterns/company/{cnpj_or_id}` | Allowed | Allowed | -| `GET /api/v1/public/graph/company/{cnpj_or_id}` | Allowed | Allowed | +| Endpoint | Behavior with public-safe defaults | +|---|---| +| `GET /api/v1/entity/{cpf_or_cnpj}` | `403` (`Entity lookup endpoint disabled in public mode`) | +| `GET /api/v1/entity/by-element-id/{id}` | `403` (`Entity lookup endpoint disabled in public mode`) | +| `GET /api/v1/entity/{id}/connections` | Person/Partner targets filtered out | +| `GET /api/v1/search` | Person/Partner results filtered out | +| `GET /api/v1/graph/{entity_id}` | Person/Partner center blocked, person nodes filtered | +| `GET /api/v1/patterns/{entity_id}` | `503` (`Pattern engine temporarily unavailable pending validation.`) | +| `GET /api/v1/investigations/*` | `403` (`Investigation endpoints disabled in public mode`) | +| `GET /api/v1/public/meta` | Allowed | +| `GET /api/v1/public/patterns/company/{cnpj_or_id}` | `503` while pattern engine is disabled | +| `GET /api/v1/public/graph/company/{cnpj_or_id}` | Allowed | -## Exposure tier contract -- `public_safe`: company/contract/sanction/aggregate entities allowed on public surface. -- `restricted`: person-adjacent entities (not returned by default in public mode). -- `internal_only`: workspace/admin artifacts (`User`, `Investigation`, `Annotation`, `Tag`). +## Exposure tiers + +- `public_safe`: company/contract/sanction/aggregate entities allowed in public surface. +- `restricted`: person-adjacent entities, filtered by default. diff --git a/docs/release/public_repo_release_checklist.md b/docs/release/public_repo_release_checklist.md index 3cb29ec..c1fac45 100644 --- a/docs/release/public_repo_release_checklist.md +++ b/docs/release/public_repo_release_checklist.md @@ -10,7 +10,7 @@ bash scripts/prepare_public_snapshot.sh /Users/brunoclz/CORRUPTOS /tmp/world-tra cd /tmp/world-transparency-graph-public git init git add . -git commit -m "Initial public public edition release (WTG)" +git commit -m "Initial public release (WTG)" ``` ## 3) Create GitHub repository (manual) @@ -46,7 +46,9 @@ Require all checks: - `PUBLIC_ALLOW_PERSON=false` - `PUBLIC_ALLOW_ENTITY_LOOKUP=false` - `PUBLIC_ALLOW_INVESTIGATIONS=false` + - `PATTERNS_ENABLED=false` - `VITE_PUBLIC_MODE=true` + - `VITE_PATTERNS_ENABLED=false` ## 7) Final checks before launch - `python scripts/check_public_privacy.py --repo-root .` => `PASS` diff --git a/etl/src/icarus_etl/linking_hooks.py b/etl/src/icarus_etl/linking_hooks.py index d4a9a29..3a46d62 100644 --- a/etl/src/icarus_etl/linking_hooks.py +++ b/etl/src/icarus_etl/linking_hooks.py @@ -44,8 +44,8 @@ def run_post_load_hooks( linking_tier: str, ) -> None: tier = linking_tier.strip().lower() - if tier not in {"community", "advanced"}: - tier = "advanced" + if tier not in {"community", "full"}: + tier = "full" if tier == "community": logger.info("Post-load hooks skipped (LINKING_TIER=community)") diff --git a/etl/src/icarus_etl/runner.py b/etl/src/icarus_etl/runner.py index f61826c..ccab1a0 100644 --- a/etl/src/icarus_etl/runner.py +++ b/etl/src/icarus_etl/runner.py @@ -117,8 +117,8 @@ def cli() -> None: @click.option("--chunk-size", type=int, default=50_000, help="Chunk size for batch processing") @click.option( "--linking-tier", - type=click.Choice(["community", "advanced"]), - default=os.getenv("LINKING_TIER", "advanced"), + type=click.Choice(["community", "full"]), + default=os.getenv("LINKING_TIER", "full"), show_default=True, help="Post-load linking strategy tier", ) diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx index b342dc8..07a8859 100644 --- a/frontend/src/App.tsx +++ b/frontend/src/App.tsx @@ -4,7 +4,7 @@ import { Navigate, Route, Routes, useParams } from "react-router"; import { AppShell } from "./components/common/AppShell"; import { PublicShell } from "./components/common/PublicShell"; import { Spinner } from "./components/common/Spinner"; -import { IS_PUBLIC_MODE } from "./config/runtime"; +import { IS_PATTERNS_ENABLED, IS_PUBLIC_MODE } from "./config/runtime"; import { Baseline } from "./pages/Baseline"; import { Dashboard } from "./pages/Dashboard"; import { Investigations } from "./pages/Investigations"; @@ -73,8 +73,8 @@ export function App() { } /> }>} /> } /> - } /> - } /> + {IS_PATTERNS_ENABLED && } />} + {IS_PATTERNS_ENABLED && } />} } /> {!IS_PUBLIC_MODE && } />} {!IS_PUBLIC_MODE && } />} diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index 9e61003..e18f578 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -137,7 +137,7 @@ export interface PatternResult { data: Record; entity_ids: string[]; sources: { database: string }[]; - intelligence_tier?: "community" | "advanced"; + intelligence_tier?: "community" | "full"; } export interface PatternResponse { @@ -407,7 +407,7 @@ export interface ExposureResponse { peer_group: string; peer_count: number; sources: SourceAttribution[]; - intelligence_tier?: "community" | "advanced"; + intelligence_tier?: "community" | "full"; } export interface TimelineEvent { diff --git a/frontend/src/components/common/AppShell.tsx b/frontend/src/components/common/AppShell.tsx index ae0f6d3..85818cc 100644 --- a/frontend/src/components/common/AppShell.tsx +++ b/frontend/src/components/common/AppShell.tsx @@ -19,7 +19,7 @@ import { Kbd } from "@/components/common/Kbd"; import { KeyboardShortcutsHelp } from "@/components/common/KeyboardShortcutsHelp"; import { StatusBar } from "@/components/common/StatusBar"; import { ToastContainer } from "@/components/common/ToastContainer"; -import { IS_PUBLIC_MODE } from "@/config/runtime"; +import { IS_PATTERNS_ENABLED, IS_PUBLIC_MODE } from "@/config/runtime"; import { useKeyboardShortcuts } from "@/hooks/useKeyboardShortcuts"; import { useAuthStore } from "@/stores/auth"; @@ -99,11 +99,15 @@ export function AppShell() { const base: Action[] = [ { id: "go-dashboard", label: t("command.goToDashboard"), shortcut: "cmd+1", group: t("command.navigation"), handler: () => navigate("/app") }, { id: "go-search", label: t("command.goToSearch"), shortcut: "cmd+2", group: t("command.navigation"), handler: () => navigate("/app/search") }, - { id: "go-patterns", label: t("command.goToPatterns"), shortcut: "cmd+3", group: t("command.navigation"), handler: () => navigate("/app/patterns") }, { id: "toggle-sidebar", label: t("command.toggleSidebar"), shortcut: "cmd+b", group: t("command.actions"), handler: () => setSidebarCollapsed((p) => !p) }, { id: "command-palette", label: t("shortcuts.commandPalette"), shortcut: "cmd+k", group: t("command.actions"), handler: () => setCommandOpen(true) }, { id: "show-shortcuts", label: t("command.showShortcuts"), shortcut: "shift+?", group: t("command.actions"), handler: () => setShortcutsOpen(true) }, ]; + if (IS_PATTERNS_ENABLED) { + base.push( + { id: "go-patterns", label: t("command.goToPatterns"), shortcut: "cmd+3", group: t("command.navigation"), handler: () => navigate("/app/patterns") }, + ); + } if (!IS_PUBLIC_MODE) { base.push( { id: "go-investigations", label: t("command.goToInvestigations"), shortcut: "cmd+4", group: t("command.navigation"), handler: () => navigate("/app/investigations") }, diff --git a/frontend/src/config/runtime.ts b/frontend/src/config/runtime.ts index 972e5bf..2656623 100644 --- a/frontend/src/config/runtime.ts +++ b/frontend/src/config/runtime.ts @@ -1 +1,2 @@ export const IS_PUBLIC_MODE = import.meta.env.VITE_PUBLIC_MODE === "true"; +export const IS_PATTERNS_ENABLED = import.meta.env.VITE_PATTERNS_ENABLED === "true"; diff --git a/frontend/src/pages/Landing.module.css b/frontend/src/pages/Landing.module.css index 6786ec0..ce9beb7 100644 --- a/frontend/src/pages/Landing.module.css +++ b/frontend/src/pages/Landing.module.css @@ -1,4 +1,4 @@ -/* ICARUS Landing — Intelligence briefing meets extended data platform */ +/* WTG landing styles */ .hero, .features, diff --git a/frontend/src/pages/Landing.tsx b/frontend/src/pages/Landing.tsx index dfb7366..aadfab4 100644 --- a/frontend/src/pages/Landing.tsx +++ b/frontend/src/pages/Landing.tsx @@ -9,7 +9,7 @@ import { InvestigationIcon, PatternIcon, } from "@/components/landing/FeatureIcons"; -import { IS_PUBLIC_MODE } from "@/config/runtime"; +import { IS_PATTERNS_ENABLED, IS_PUBLIC_MODE } from "@/config/runtime"; import { HeroGraph } from "@/components/landing/HeroGraph"; import { NetworkAnimation } from "@/components/landing/NetworkAnimation"; import { StatsBar } from "@/components/landing/StatsBar"; @@ -109,6 +109,10 @@ export function Landing() { .catch(() => {}); }, []); + const visibleFeatures = IS_PATTERNS_ENABLED + ? FEATURES + : FEATURES.filter((feature) => feature.key !== "patterns"); + return ( <>
@@ -146,7 +150,7 @@ export function Landing() { {t("landing.features.sectionHeading")}
- {FEATURES.map(({ key, icon, iconBg }) => ( + {visibleFeatures.map(({ key, icon, iconBg }) => ( list[str]: errors: list[str] = [] for pattern in FORBIDDEN_PATH_GLOBS: @@ -57,7 +51,7 @@ def check_forbidden_imports(repo_root: Path) -> list[str]: def main() -> int: - parser = argparse.ArgumentParser(description="Validate public edition public boundary") + parser = argparse.ArgumentParser(description="Validate public edition scope") parser.add_argument("--repo-root", default=".", help="Path to repository root") args = parser.parse_args() diff --git a/scripts/prepare_public_snapshot.sh b/scripts/prepare_public_snapshot.sh index 7f78ed6..ebe0257 100755 --- a/scripts/prepare_public_snapshot.sh +++ b/scripts/prepare_public_snapshot.sh @@ -6,7 +6,6 @@ OUT_DIR="${2:-/tmp/world-transparency-graph-public-$(date +%Y%m%d_%H%M%S)}" mkdir -p "$OUT_DIR" -# Include only intended public edition directories/files rsync -a \ --exclude='**/.venv/***' \ --exclude='**/__pycache__/***' \ @@ -15,13 +14,13 @@ rsync -a \ --exclude='**/.ruff_cache/***' \ --exclude='frontend/node_modules/***' \ --exclude='etl/data/***' \ + --exclude='**/dist/***' \ + --exclude='**/build/***' \ + --exclude='**/*.pyc' \ --exclude='.env' \ --exclude='api/.env' \ --exclude='etl/.env' \ --exclude='frontend/.env' \ - --exclude='**/dist/***' \ - --exclude='**/build/***' \ - --exclude='**/*.pyc' \ --include='api/' \ --include='api/***' \ --include='etl/' \ @@ -32,7 +31,7 @@ rsync -a \ --include='infra/***' \ --include='docs/' \ --include='docs/brand/' \ - --include='docs/brand/***' \ + --include='docs/brand/wtg-header.png' \ --include='docs/demo/' \ --include='docs/demo/***' \ --include='docs/legal/' \ @@ -70,32 +69,23 @@ rsync -a \ --exclude='*' \ "$SRC_ROOT/" "$OUT_DIR/" -# Explicit removals for internal-only artifacts +# Explicit removals for sensitive operational artifacts and disabled pattern engine. rm -f "$OUT_DIR/CLAUDE.md" rm -f "$OUT_DIR/.mcp.json" rm -f "$OUT_DIR/docs/shadow_rollout_runbook.md" rm -f "$OUT_DIR/docs/ingestion_priority_runbook.md" rm -f "$OUT_DIR/docs/ops/storage_operations.md" rm -f "$OUT_DIR/scripts/auto_finalize_pncp_backfill.sh" -rm -f "$OUT_DIR/scripts/link_persons.cypher" -rm -f "$OUT_DIR/scripts/link_partners_probable.cypher" -rm -rf "$OUT_DIR/etl/src/icarus_etl/entity_resolution" -rm -f "$OUT_DIR/api/src/icarus/services/score_service.py" rm -f "$OUT_DIR/api/src/icarus/services/pattern_service.py" rm -f "$OUT_DIR/api/src/icarus/queries/pattern_"*.cypher -rm -f "$OUT_DIR/api/src/icarus/queries/entity_score"*.cypher rm -f "$OUT_DIR/api/tests/unit/test_patterns.py" rm -f "$OUT_DIR/api/tests/unit/test_patterns_new.py" -rm -f "$OUT_DIR/api/tests/unit/test_score_service.py" -rm -f "$OUT_DIR/etl/tests/test_entity_resolution.py" -rm -f "$OUT_DIR/etl/tests/integration/test_link_persons.py" + rm -rf "$OUT_DIR/audit-results" -# Ensure demo data exists python3 "$OUT_DIR/scripts/generate_demo_dataset.py" --output "$OUT_DIR/data/demo/synthetic_graph.json" >/dev/null - -# Run public privacy gate on generated snapshot python3 "$OUT_DIR/scripts/check_public_privacy.py" --repo-root "$OUT_DIR" +python3 "$OUT_DIR/scripts/check_compliance_pack.py" --repo-root "$OUT_DIR" python3 "$OUT_DIR/scripts/check_open_core_boundary.py" --repo-root "$OUT_DIR" printf 'Public snapshot prepared at: %s\n' "$OUT_DIR"