Compare commits
15 Commits
9c14820c69
...
1db05e6caa
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1db05e6caa | ||
|
|
8655bd81bc | ||
|
|
5c955691a9 | ||
|
|
0bca368a7d | ||
|
|
d9a1f6a0f8 | ||
|
|
eeffac31bf | ||
|
|
92dd046820 | ||
|
|
7e68b30eb8 | ||
|
|
2f5445284b | ||
|
|
dec7b64b17 | ||
|
|
0500733541 | ||
|
|
8f8213605f | ||
|
|
621ac8d300 | ||
|
|
3373b542e9 | ||
|
|
abdcdb581f |
@@ -4,5 +4,5 @@ import gateway from './[rpc]';
|
||||
import { rewriteToSebuf } from '../../../server/alias-rewrite';
|
||||
|
||||
// Alias for documented v1 URL. See server/alias-rewrite.ts.
|
||||
export default (req: Request) =>
|
||||
rewriteToSebuf(req, '/api/scenario/v1/run-scenario', gateway);
|
||||
export default (req: Request, ctx: { waitUntil: (p: Promise<unknown>) => void }) =>
|
||||
rewriteToSebuf(req, '/api/scenario/v1/run-scenario', gateway, ctx);
|
||||
|
||||
@@ -4,5 +4,5 @@ import gateway from './[rpc]';
|
||||
import { rewriteToSebuf } from '../../../server/alias-rewrite';
|
||||
|
||||
// Alias for documented v1 URL. See server/alias-rewrite.ts.
|
||||
export default (req: Request) =>
|
||||
rewriteToSebuf(req, '/api/scenario/v1/get-scenario-status', gateway);
|
||||
export default (req: Request, ctx: { waitUntil: (p: Promise<unknown>) => void }) =>
|
||||
rewriteToSebuf(req, '/api/scenario/v1/get-scenario-status', gateway, ctx);
|
||||
|
||||
@@ -4,5 +4,5 @@ import gateway from './[rpc]';
|
||||
import { rewriteToSebuf } from '../../../server/alias-rewrite';
|
||||
|
||||
// Alias for documented v1 URL. See server/alias-rewrite.ts.
|
||||
export default (req: Request) =>
|
||||
rewriteToSebuf(req, '/api/scenario/v1/list-scenario-templates', gateway);
|
||||
export default (req: Request, ctx: { waitUntil: (p: Promise<unknown>) => void }) =>
|
||||
rewriteToSebuf(req, '/api/scenario/v1/list-scenario-templates', gateway, ctx);
|
||||
|
||||
@@ -4,5 +4,5 @@ import gateway from './[rpc]';
|
||||
import { rewriteToSebuf } from '../../../server/alias-rewrite';
|
||||
|
||||
// Alias for documented v1 URL. See server/alias-rewrite.ts.
|
||||
export default (req: Request) =>
|
||||
rewriteToSebuf(req, '/api/supply-chain/v1/get-country-products', gateway);
|
||||
export default (req: Request, ctx: { waitUntil: (p: Promise<unknown>) => void }) =>
|
||||
rewriteToSebuf(req, '/api/supply-chain/v1/get-country-products', gateway, ctx);
|
||||
|
||||
@@ -4,5 +4,5 @@ import gateway from './[rpc]';
|
||||
import { rewriteToSebuf } from '../../../server/alias-rewrite';
|
||||
|
||||
// Alias for documented v1 URL. See server/alias-rewrite.ts.
|
||||
export default (req: Request) =>
|
||||
rewriteToSebuf(req, '/api/supply-chain/v1/get-multi-sector-cost-shock', gateway);
|
||||
export default (req: Request, ctx: { waitUntil: (p: Promise<unknown>) => void }) =>
|
||||
rewriteToSebuf(req, '/api/supply-chain/v1/get-multi-sector-cost-shock', gateway, ctx);
|
||||
|
||||
@@ -113,6 +113,7 @@ async function _createCheckoutSession(
|
||||
"https://finance.worldmonitor.app",
|
||||
"https://commodity.worldmonitor.app",
|
||||
"https://happy.worldmonitor.app",
|
||||
"https://energy.worldmonitor.app",
|
||||
new URL(siteUrl).origin,
|
||||
]);
|
||||
if (!allowedOrigins.has(parsedReturnUrl.origin)) {
|
||||
|
||||
@@ -48,6 +48,17 @@ paths:
|
||||
required: false
|
||||
schema:
|
||||
type: boolean
|
||||
- name: include_tankers
|
||||
in: query
|
||||
description: |-
|
||||
When true, populate VesselSnapshot.tanker_reports with per-vessel
|
||||
position reports for AIS ship-type 80-89 (tanker class). Used by the
|
||||
Energy Atlas live-tanker map layer. Stored separately from
|
||||
candidate_reports (which is military-only) so consumers self-select
|
||||
via this flag rather than the response field changing meaning.
|
||||
required: false
|
||||
schema:
|
||||
type: boolean
|
||||
responses:
|
||||
"200":
|
||||
description: Successful response
|
||||
@@ -171,6 +182,14 @@ components:
|
||||
When true, populate VesselSnapshot.candidate_reports with per-vessel
|
||||
position reports. Clients with no position callbacks should leave this
|
||||
false to keep responses small.
|
||||
includeTankers:
|
||||
type: boolean
|
||||
description: |-
|
||||
When true, populate VesselSnapshot.tanker_reports with per-vessel
|
||||
position reports for AIS ship-type 80-89 (tanker class). Used by the
|
||||
Energy Atlas live-tanker map layer. Stored separately from
|
||||
candidate_reports (which is military-only) so consumers self-select
|
||||
via this flag rather than the response field changing meaning.
|
||||
description: GetVesselSnapshotRequest specifies filters for the vessel snapshot.
|
||||
GetVesselSnapshotResponse:
|
||||
type: object
|
||||
@@ -205,6 +224,10 @@ components:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/SnapshotCandidateReport'
|
||||
tankerReports:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/SnapshotCandidateReport'
|
||||
description: VesselSnapshot represents a point-in-time view of civilian AIS vessel data.
|
||||
AisDensityZone:
|
||||
type: object
|
||||
|
||||
@@ -3977,6 +3977,17 @@ paths:
|
||||
required: false
|
||||
schema:
|
||||
type: boolean
|
||||
- name: include_tankers
|
||||
in: query
|
||||
description: |-
|
||||
When true, populate VesselSnapshot.tanker_reports with per-vessel
|
||||
position reports for AIS ship-type 80-89 (tanker class). Used by the
|
||||
Energy Atlas live-tanker map layer. Stored separately from
|
||||
candidate_reports (which is military-only) so consumers self-select
|
||||
via this flag rather than the response field changing meaning.
|
||||
required: false
|
||||
schema:
|
||||
type: boolean
|
||||
responses:
|
||||
"200":
|
||||
description: Successful response
|
||||
@@ -14743,6 +14754,14 @@ components:
|
||||
When true, populate VesselSnapshot.candidate_reports with per-vessel
|
||||
position reports. Clients with no position callbacks should leave this
|
||||
false to keep responses small.
|
||||
includeTankers:
|
||||
type: boolean
|
||||
description: |-
|
||||
When true, populate VesselSnapshot.tanker_reports with per-vessel
|
||||
position reports for AIS ship-type 80-89 (tanker class). Used by the
|
||||
Energy Atlas live-tanker map layer. Stored separately from
|
||||
candidate_reports (which is military-only) so consumers self-select
|
||||
via this flag rather than the response field changing meaning.
|
||||
description: GetVesselSnapshotRequest specifies filters for the vessel snapshot.
|
||||
worldmonitor_maritime_v1_GetVesselSnapshotResponse:
|
||||
type: object
|
||||
@@ -14777,6 +14796,10 @@ components:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/worldmonitor_maritime_v1_SnapshotCandidateReport'
|
||||
tankerReports:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/worldmonitor_maritime_v1_SnapshotCandidateReport'
|
||||
description: VesselSnapshot represents a point-in-time view of civilian AIS vessel data.
|
||||
worldmonitor_maritime_v1_AisDensityZone:
|
||||
type: object
|
||||
|
||||
351
docs/architecture/usage-telemetry.md
Normal file
@@ -0,0 +1,351 @@
|
||||
# Usage telemetry (Axiom)
|
||||
|
||||
Operator + developer guide to the gateway's per-request usage telemetry pipeline.
|
||||
Implements the requirements in `docs/brainstorms/2026-04-24-axiom-api-observability-requirements.md`.
|
||||
|
||||
---
|
||||
|
||||
## What it is
|
||||
|
||||
Every inbound API request that hits `createDomainGateway()` emits one structured
|
||||
event to Axiom describing **who** called **what**, **how it was authenticated**,
|
||||
**what it cost**, and **how it was served**. Deep fetch helpers
|
||||
(`fetchJson`, `cachedFetchJsonWithMeta`) emit a second event type per upstream
|
||||
call so customer × provider attribution is reconstructible.
|
||||
|
||||
It is **observability only** — never on the request-critical path. The whole
|
||||
sink runs inside `ctx.waitUntil(...)` with a 1.5s timeout, no retries, and a
|
||||
circuit breaker that trips on 5% failure ratio over a 5-minute window.
|
||||
|
||||
## What you get out of it
|
||||
|
||||
Two event types in dataset `wm_api_usage`:
|
||||
|
||||
### `request` (one per inbound request)
|
||||
|
||||
| Field | Example | Notes |
|
||||
|--------------------|-------------------------------------------|----------------------------------------------|
|
||||
| `event_type` | `"request"` | |
|
||||
| `request_id` | `"req_xxx"` | from `x-request-id` or generated |
|
||||
| `route` | `/api/market/v1/analyze-stock` | |
|
||||
| `domain` | `"market"` | strips leading `vN` for `/api/v2/<svc>/…` |
|
||||
| `method`, `status` | `"GET"`, `200` | |
|
||||
| `duration_ms` | `412` | wall-clock at the gateway |
|
||||
| `req_bytes`, `res_bytes` | | response counted only on 200/304 GET |
|
||||
| `customer_id` | Clerk user ID, org ID, enterprise slug, or widget key | `null` only for anon |
|
||||
| `principal_id` | user ID or **hash** of API/widget key | never the raw secret |
|
||||
| `auth_kind` | `clerk_jwt` \| `user_api_key` \| `enterprise_api_key` \| `widget_key` \| `anon` | |
|
||||
| `tier` | `0` free / `1` pro / `2` api / `3` enterprise | `0` if unknown |
|
||||
| `cache_tier` | `fast` \| `medium` \| `slow` \| `slow-browser` \| `static` \| `daily` \| `no-store` | only on 200/304 |
|
||||
| `country`, `execution_region` | `"US"`, `"iad1"` | Vercel-provided |
|
||||
| `execution_plane` | `"vercel-edge"` | |
|
||||
| `origin_kind` | `api-key` \| `oauth` \| `browser-same-origin` \| `browser-cross-origin` \| `null` | derived from headers by `deriveOriginKind()` — `mcp` and `internal-cron` exist in the `OriginKind` type for upstream/future use but are not currently emitted on the request path |
|
||||
| `ua_hash` | SHA-256 of the UA | hashed so PII doesn't land in Axiom |
|
||||
| `sentry_trace_id` | `"abc123…"` | join key into Sentry |
|
||||
| `reason` | `ok` \| `origin_403` \| `rate_limit_429` \| `preflight` \| `auth_401` \| `auth_403` \| `tier_403` | `auth_*` distinguishes auth-rejection paths from genuine successes when filtering on `status` alone is ambiguous |
|
||||
|
||||
### `upstream` (one per outbound fetch from a request handler)
|
||||
|
||||
| Field | Example |
|
||||
|----------------------|--------------------------|
|
||||
| `request_id` | links back to the parent |
|
||||
| `provider`, `host` | `"yahoo-finance"`, `"query1.finance.yahoo.com"` |
|
||||
| `operation` | logical op name set by the helper |
|
||||
| `status`, `duration_ms`, `request_bytes`, `response_bytes` | |
|
||||
| `cache_status` | `miss` \| `fresh` \| `stale-while-revalidate` \| `neg-sentinel` |
|
||||
| `customer_id`, `route`, `tier` | inherited from the inbound request via AsyncLocalStorage |
|
||||
|
||||
## What it answers
|
||||
|
||||
A non-exhaustive list — copy-paste APL queries are in the **Analysis** section below.
|
||||
|
||||
- Per-customer request volume, p50/p95 latency, error rate
|
||||
- Per-route premium-vs-free traffic mix
|
||||
- CDN cache-tier distribution per route (calibrate `RPC_CACHE_TIER`)
|
||||
- Top-of-funnel for noisy abusers (`auth_kind=anon` × `country` × `route`)
|
||||
- Upstream provider cost per customer (`upstream` join `request` on `request_id`)
|
||||
- Bearer-vs-API-key vs anon ratio per premium route
|
||||
- Region heatmaps (`execution_region` × `route`)
|
||||
|
||||
---
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────┐
|
||||
│ Vercel Edge handler │
|
||||
│ │
|
||||
request ──► │ createDomainGateway() │
|
||||
│ auth resolution → usage:UsageIdentityInput │
|
||||
│ runWithUsageScope({ ctx, customerId, route, … }) │
|
||||
│ └─ user handler ── fetchJson / cachedFetch... ─┼─► upstream
|
||||
│ (reads scope, emits │ API
|
||||
│ upstream event) │
|
||||
│ emitRequest(...) at every return point ──────────┼────► Axiom
|
||||
│ └─ ctx.waitUntil(emitUsageEvents(...)) │ wm_api_usage
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
Code map:
|
||||
|
||||
| Concern | File |
|
||||
|----------------------------------------|--------------------------------------------|
|
||||
| Gateway emit points + identity accumulator | `server/gateway.ts` |
|
||||
| Identity resolver (pure) | `server/_shared/usage-identity.ts` |
|
||||
| Event shapes, builders, Axiom sink, breaker, ALS scope | `server/_shared/usage.ts` |
|
||||
| Upstream-event emission from fetch helpers | `server/_shared/cached-fetch.ts`, `server/_shared/fetch-json.ts` |
|
||||
|
||||
Key invariants:
|
||||
|
||||
1. **Builders accept allowlisted primitives only** — they never accept
|
||||
`Request`, `Response`, or untyped objects, so future field additions can't
|
||||
leak by structural impossibility.
|
||||
2. **`emitRequest()` fires at every gateway return path** — origin block,
|
||||
OPTIONS, 401/403/404/405, rate-limit 429, ETag 304, success 200, error 500.
|
||||
Adding a new return path requires adding the emit, or telemetry coverage
|
||||
silently regresses.
|
||||
3. **`principal_id` is a hash for secret-bearing auth** (API key, widget key)
|
||||
so raw secrets never land in Axiom.
|
||||
4. **Telemetry failure must not affect API availability or latency** — sink is
|
||||
fire-and-forget with timeout + breaker; any error path drops the event with
|
||||
a 1%-sampled `console.warn`.
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
Two env vars control the pipeline. Both are independent of every other system.
|
||||
|
||||
| Var | Required for | Behavior when missing |
|
||||
|--------------------|--------------|-------------------------------------------|
|
||||
| `USAGE_TELEMETRY` | Emission | Set to `1` to enable. Anything else → emission is a no-op (zero network calls, zero allocations of the event payload). |
|
||||
| `AXIOM_API_TOKEN` | Delivery | Events build but `sendToAxiom` short-circuits to a 1%-sampled `[usage-telemetry] drop { reason: 'no-token' }` warning. |
|
||||
|
||||
Vercel project setup:
|
||||
|
||||
1. Axiom → create dataset **`wm_api_usage`** (the constant in
|
||||
`server/_shared/usage.ts:18`; rename if you want a different name).
|
||||
2. Axiom → Settings → API Tokens → create an **Ingest** token scoped to that
|
||||
dataset. Copy the `xaat-…` value.
|
||||
3. Vercel → Project → Settings → Environment Variables, add for the desired
|
||||
environments (Production / Preview):
|
||||
```
|
||||
USAGE_TELEMETRY=1
|
||||
AXIOM_API_TOKEN=xaat-...
|
||||
```
|
||||
4. Redeploy. Axiom infers schema from the first events — no upfront schema
|
||||
work needed.
|
||||
|
||||
### Failure modes (deploy-with-Axiom-down is safe)
|
||||
|
||||
| Scenario | Behavior |
|
||||
|---------------------------------------|------------------------------------------------------|
|
||||
| `USAGE_TELEMETRY` unset | emit is a no-op, identity object is still built but discarded |
|
||||
| `USAGE_TELEMETRY=1`, no token | event built, `fetch` skipped, sampled warn |
|
||||
| Axiom returns non-2xx | `recordSample(false)`, sampled warn |
|
||||
| Axiom timeout (>1.5s) | `AbortController` aborts, sampled warn |
|
||||
| ≥5% failure ratio over 5min (≥20 samples) | breaker trips → all sends short-circuit until ratio recovers |
|
||||
| Direct gateway caller passes no `ctx` | emit is a no-op (the `ctx?.waitUntil` guard) |
|
||||
|
||||
### Kill switch
|
||||
|
||||
There is no in-code feature flag separate from the env vars. To disable in
|
||||
production: set `USAGE_TELEMETRY=0` (or unset it) and redeploy. Existing
|
||||
in-flight requests drain on the next isolate cycle.
|
||||
|
||||
---
|
||||
|
||||
## Local development & testing
|
||||
|
||||
### Smoke test without Axiom
|
||||
|
||||
Just run the dev server with neither env var set. Hit any route. The path is
|
||||
fully exercised — only the Axiom POST is skipped.
|
||||
|
||||
```sh
|
||||
vercel dev
|
||||
curl http://localhost:3000/api/seismology/v1/list-earthquakes
|
||||
```
|
||||
|
||||
In any non-`production` build, the response carries an `x-usage-telemetry`
|
||||
header. Use it as a wiring check:
|
||||
|
||||
```sh
|
||||
curl -sI http://localhost:3000/api/seismology/v1/list-earthquakes | grep -i x-usage
|
||||
# x-usage-telemetry: off # USAGE_TELEMETRY unset
|
||||
# x-usage-telemetry: ok # enabled, breaker closed
|
||||
# x-usage-telemetry: degraded # breaker tripped — Axiom is failing
|
||||
```
|
||||
|
||||
### End-to-end with a real Axiom dataset
|
||||
|
||||
```sh
|
||||
USAGE_TELEMETRY=1 AXIOM_API_TOKEN=xaat-... vercel dev
|
||||
curl http://localhost:3000/api/market/v1/list-market-quotes?symbols=AAPL
|
||||
```
|
||||
|
||||
Then in Axiom:
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where _time > ago(2m)
|
||||
| project _time, route, status, customer_id, auth_kind, tier, duration_ms
|
||||
```
|
||||
|
||||
### Automated tests
|
||||
|
||||
Three suites cover the pipeline:
|
||||
|
||||
1. **Identity unit tests** — `server/__tests__/usage-identity.test.ts` cover the
|
||||
pure `buildUsageIdentity()` resolver across every `auth_kind` branch.
|
||||
2. **Gateway emit assertions** — `tests/usage-telemetry-emission.test.mts`
|
||||
stubs `globalThis.fetch` to capture the Axiom POST body and asserts the
|
||||
`domain`, `customer_id`, `auth_kind`, and `tier` fields end-to-end through
|
||||
the gateway.
|
||||
3. **Auth-path regression tests** — `tests/premium-stock-gateway.test.mts` and
|
||||
`tests/gateway-cdn-origin-policy.test.mts` exercise the gateway without a
|
||||
`ctx` argument, locking in the "telemetry must not break direct callers"
|
||||
invariant.
|
||||
|
||||
Run them:
|
||||
|
||||
```sh
|
||||
npx tsx --test tests/usage-telemetry-emission.test.mts \
|
||||
tests/premium-stock-gateway.test.mts \
|
||||
tests/gateway-cdn-origin-policy.test.mts
|
||||
npx vitest run server/__tests__/usage-identity.test.ts
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Analysis recipes (Axiom APL)
|
||||
|
||||
All queries assume dataset `wm_api_usage`. Adjust time windows as needed.
|
||||
|
||||
### Per-customer request volume + error rate
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where event_type == "request" and _time > ago(24h)
|
||||
| summarize requests = count(),
|
||||
errors_5xx = countif(status >= 500),
|
||||
errors_4xx = countif(status >= 400 and status < 500),
|
||||
p95_ms = percentile(duration_ms, 95)
|
||||
by customer_id
|
||||
| order by requests desc
|
||||
```
|
||||
|
||||
### p50 / p95 latency per route
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where event_type == "request" and _time > ago(1h)
|
||||
| summarize p50 = percentile(duration_ms, 50),
|
||||
p95 = percentile(duration_ms, 95),
|
||||
n = count()
|
||||
by route
|
||||
| where n > 50
|
||||
| order by p95 desc
|
||||
```
|
||||
|
||||
### Premium vs free traffic mix per route
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where event_type == "request" and _time > ago(24h)
|
||||
| extend tier_bucket = case(tier >= 2, "api+ent", tier == 1, "pro", "free/anon")
|
||||
| summarize n = count() by route, tier_bucket
|
||||
| evaluate pivot(tier_bucket, sum(n))
|
||||
| order by route asc
|
||||
```
|
||||
|
||||
### CDN cache-tier mix per route — calibrates `RPC_CACHE_TIER`
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where event_type == "request" and status == 200 and method == "GET" and _time > ago(24h)
|
||||
| summarize n = count() by route, cache_tier
|
||||
| evaluate pivot(cache_tier, sum(n))
|
||||
| order by route asc
|
||||
```
|
||||
|
||||
A route dominated by `slow-browser` that *should* be CDN-cached is a hint to
|
||||
add an entry to `RPC_CACHE_TIER` in `server/gateway.ts`.
|
||||
|
||||
### Anonymous abuse hotspots
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where event_type == "request" and auth_kind == "anon" and _time > ago(1h)
|
||||
| summarize n = count() by route, country
|
||||
| where n > 100
|
||||
| order by n desc
|
||||
```
|
||||
|
||||
### Upstream cost per customer (provider attribution)
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where event_type == "upstream" and _time > ago(24h)
|
||||
| summarize calls = count(),
|
||||
response_bytes_mb = sum(response_bytes) / 1024.0 / 1024.0,
|
||||
p95_ms = percentile(duration_ms, 95)
|
||||
by customer_id, provider
|
||||
| order by calls desc
|
||||
```
|
||||
|
||||
### Cache hit ratio per provider (correctness signal)
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where event_type == "upstream" and _time > ago(24h)
|
||||
| summarize n = count() by provider, cache_status
|
||||
| evaluate pivot(cache_status, sum(n))
|
||||
| extend hit_ratio = (fresh + coalesce(['stale-while-revalidate'], 0)) * 1.0 / (fresh + miss + coalesce(['stale-while-revalidate'], 0))
|
||||
| order by hit_ratio asc
|
||||
```
|
||||
|
||||
### Sentry × Axiom join
|
||||
|
||||
When Sentry surfaces an exception, copy its trace ID and:
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where sentry_trace_id == "<paste from Sentry>"
|
||||
```
|
||||
|
||||
…to see the exact request envelope (route, customer, latency, cache outcome).
|
||||
|
||||
### Telemetry health watch
|
||||
|
||||
```kusto
|
||||
['wm_api_usage']
|
||||
| where _time > ago(1h)
|
||||
| summarize events_per_min = count() by bin(_time, 1m)
|
||||
| order by _time asc
|
||||
```
|
||||
|
||||
A drop to zero with no corresponding traffic drop = breaker tripped or
|
||||
Vercel/Axiom integration broken — pair it with the `[usage-telemetry] drop`
|
||||
warns in Vercel logs to find the cause.
|
||||
|
||||
---
|
||||
|
||||
## Adding new telemetry fields
|
||||
|
||||
1. Add the field to `RequestEvent` (or `UpstreamEvent`) in
|
||||
`server/_shared/usage.ts`.
|
||||
2. Extend the corresponding builder (`buildRequestEvent` /
|
||||
`buildUpstreamEvent`) — only allowlisted primitives, no untyped objects.
|
||||
3. If the value comes from gateway state, set it on the `usage` accumulator
|
||||
in `gateway.ts`. Otherwise plumb it through the builder call sites.
|
||||
4. Axiom auto-discovers the new column on the next ingest. No schema migration.
|
||||
5. Update this doc's field table.
|
||||
|
||||
## Adding a new gateway return path
|
||||
|
||||
If you add a new `return new Response(...)` inside `createDomainGateway()`,
|
||||
**you must call `emitRequest(status, reason, cacheTier, resBytes?)` immediately
|
||||
before it.** Telemetry coverage is enforced by code review, not lint. The
|
||||
`reason` field uses the existing `RequestReason` union — extend it if the
|
||||
return represents a new failure class.
|
||||
@@ -65,6 +65,102 @@ No human review queue gates the transition — quality comes from the tiered evi
|
||||
|
||||
Pipeline-registry data derived from [Global Energy Monitor](https://globalenergymonitor.org) (CC-BY 4.0), with additional operator and regulator material incorporated under fair-use for news reporting.
|
||||
|
||||
The hand-curated subset (operator/regulator/sanctions-bearing rows with classifier confidence ≥ 0.7) ships with full evidence bundles: operator statements, sanction references, last-evidence-update timestamps, and named source authorities. The GEM-imported subset (long-tail coverage rows) ships with minimum-viable evidence — `physicalStateSource: gem`, `classifierConfidence ≤ 0.5`, no operator statement, no sanction references. Both subsets pass the same registry validator and feed the same public-badge derivation.
|
||||
|
||||
## Operator runbook — GEM import refresh
|
||||
|
||||
### Cadence
|
||||
|
||||
**Refresh quarterly** (or whenever a new GEM release lands — check the GGIT/GOIT landing pages below). The refresh is operator-mediated rather than cron-driven because:
|
||||
|
||||
- GEM downloads are gated behind a per-request form; the resulting URL is release-specific and rotates each quarter, so a hardcoded URL would silently fetch a different version than the one we attribute.
|
||||
- Each release adjusts column names occasionally; the schema-drift sentinel in `scripts/import-gem-pipelines.mjs` catches this loudly, but it requires a human review of the diff before committing.
|
||||
|
||||
If a quarter passes without a refresh, set a calendar reminder. Suggested cadence: review every 90 days; refresh whenever a peer reference site (e.g. global-energy-flow.com) advertises a newer release than ours.
|
||||
|
||||
### Source datasets
|
||||
|
||||
The two files we use are GEM's pipeline-only trackers (NOT the combined "Oil & Gas Extraction Tracker" — that's upstream wells/fields and has a different schema):
|
||||
|
||||
| Tracker | Acronym | What it contains | Landing page |
|
||||
|---|---|---|---|
|
||||
| Global Gas Infrastructure Tracker | **GGIT** | Gas pipelines + LNG terminals | [globalenergymonitor.org/projects/global-gas-infrastructure-tracker](https://globalenergymonitor.org/projects/global-gas-infrastructure-tracker/) |
|
||||
| Global Oil Infrastructure Tracker | **GOIT** | Oil + NGL pipelines | [globalenergymonitor.org/projects/global-oil-infrastructure-tracker](https://globalenergymonitor.org/projects/global-oil-infrastructure-tracker/) |
|
||||
|
||||
The **GIS .zip download** (containing GeoJSON, GeoPackage, and shapefile) is what we want — NOT the .xlsx. The XLSX has properties but no lat/lon columns; only the GeoJSON has both column properties AND `LineString.coordinates` for endpoint extraction.
|
||||
|
||||
#### Last-known-good URLs (rotate per release)
|
||||
|
||||
These are the URLs we used for the 2026-04-25 import. GEM rotates them per release, so always re-request via the landing page above for the current release before re-running:
|
||||
|
||||
```
|
||||
GGIT Gas (2025-11): https://globalenergymonitor.org/wp-content/uploads/2025/11/GEM-GGIT-Gas-Pipelines-2025-11.zip
|
||||
GOIT Oil (2025-03): https://globalenergymonitor.org/wp-content/uploads/2025/03/GEM-GOIT-Oil-NGL-Pipelines-2025-03.zip
|
||||
```
|
||||
|
||||
URL pattern is stable: `globalenergymonitor.org/wp-content/uploads/YYYY/MM/GEM-{GGIT,GOIT}-{tracker-name}-YYYY-MM.zip`. If the landing-page download flow changes, this pattern is the fallback for figuring out the new URL given the release date GEM publishes.
|
||||
|
||||
### Refresh steps
|
||||
|
||||
1. **Request the data** via either landing page above. GEM emails you per-release URLs (one for the .xlsx, one for the GIS .zip). Registration is required even though the data itself is CC-BY 4.0.
|
||||
|
||||
2. **Download both GIS .zips** and unzip:
|
||||
```bash
|
||||
unzip -o ~/Downloads/GEM-GGIT-Gas-Pipelines-YYYY-MM.zip -d /tmp/gem-gis/gas/
|
||||
unzip -o ~/Downloads/GEM-GOIT-Oil-NGL-Pipelines-YYYY-MM.zip -d /tmp/gem-gis/oil/
|
||||
```
|
||||
|
||||
3. **Convert GeoJSON → canonical JSON** via the in-repo converter. It reads both GeoJSON files, applies the filter knobs documented in the script header, normalizes country names to ISO 3166-1 alpha-2 via `pycountry`, and emits the operator-shape envelope:
|
||||
```bash
|
||||
pip3 install pycountry # one-time
|
||||
GEM_GAS_GEOJSON=/tmp/gem-gis/gas/GEM-GGIT-Gas-Pipelines-YYYY-MM.geojson \
|
||||
GEM_OIL_GEOJSON=/tmp/gem-gis/oil/GEM-GOIT-Oil-NGL-Pipelines-YYYY-MM.geojson \
|
||||
GEM_DOWNLOADED_AT=YYYY-MM-DD \
|
||||
GEM_SOURCE_VERSION="GEM-GGIT-YYYY-MM+GOIT-YYYY-MM" \
|
||||
python3 scripts/_gem-geojson-to-canonical.py > /tmp/gem-pipelines.json 2> /tmp/gem-drops.log
|
||||
cat /tmp/gem-drops.log # inspect drop counts before merging
|
||||
```
|
||||
|
||||
Filter knob defaults (in `scripts/_gem-geojson-to-canonical.py`):
|
||||
- `MIN_LENGTH_KM_GAS = 750` (trunk-class only)
|
||||
- `MIN_LENGTH_KM_OIL = 400` (trunk-class only)
|
||||
- `ACCEPTED_STATUS = {operating, construction}`
|
||||
- Capacity unit conversions: bcm/y native; MMcf/d, MMSCMD, mtpa, m3/day, bpd, Mb/d, kbd → bcm/y (gas) or bbl/d (oil)
|
||||
|
||||
These thresholds were tuned empirically against the 2025-11/2025-03 release to land at ~250-300 entries per registry. Adjust if a future release shifts the volume distribution.
|
||||
|
||||
4. **Dry-run** to inspect candidate counts before touching the registry:
|
||||
```bash
|
||||
GEM_PIPELINES_FILE=/tmp/gem-pipelines.json node scripts/import-gem-pipelines.mjs --print-candidates \
|
||||
| jq '{ gas: (.gas | length), oil: (.oil | length) }'
|
||||
```
|
||||
|
||||
5. **Merge** into `scripts/data/pipelines-{gas,oil}.json` (writes both atomically — validates both before either is touched on disk):
|
||||
```bash
|
||||
GEM_PIPELINES_FILE=/tmp/gem-pipelines.json node scripts/import-gem-pipelines.mjs --merge
|
||||
```
|
||||
Spot-check 5-10 random GEM-sourced rows in the diff before committing — known major trunks (Druzhba, Nord Stream, Keystone, TAPI, Centro Oeste) are good sanity-check anchors.
|
||||
|
||||
6. **Commit** the data + record provenance. Per-release SHA256s go in the commit message so future audits can verify reproducibility:
|
||||
```bash
|
||||
shasum -a 256 ~/Downloads/GEM-GGIT-Gas-Pipelines-YYYY-MM.xlsx \
|
||||
~/Downloads/GEM-GOIT-Oil-NGL-Pipelines-YYYY-MM.xlsx
|
||||
```
|
||||
If the row count crosses a threshold, also bump `MIN_PIPELINES_PER_REGISTRY` in `scripts/_pipeline-registry.mjs` so future partial re-imports fail loud rather than silently halving the registry.
|
||||
|
||||
7. **Verify** `npm run test:data` is green before pushing.
|
||||
|
||||
### Failure modes and what to do
|
||||
|
||||
| Symptom | Cause | Fix |
|
||||
|---|---|---|
|
||||
| Converter exits with `GEM_GAS_GEOJSON env vars are required` | Env vars not set | Re-run with both `GEM_GAS_GEOJSON` and `GEM_OIL_GEOJSON` pointed at the unzipped `.geojson` files |
|
||||
| Many rows dropped on `country:Foo|Bar` | New country name GEM uses isn't in `pycountry` or the alias table | Add the alias to `COUNTRY_ALIASES` in `scripts/_gem-geojson-to-canonical.py` |
|
||||
| Many rows dropped on `no_capacity` with a unit we haven't seen | GEM added a capacity unit | Add the conversion factor to `gas_capacity()` or `oil_capacity()` in the converter |
|
||||
| Parser throws `schema drift — pipelines[i] missing column "X"` | GEM renamed a column between releases | The parser will name the missing column; map it back in the converter and re-run |
|
||||
| `validateRegistry` rejects the merged registry | Almost always: count below `MIN_PIPELINES_PER_REGISTRY`, or an evidence-source not in the whitelist | Inspect the merged JSON; if the row drop is real, lower the floor; if a row's evidence is malformed, fix the converter |
|
||||
| Net adds drop precipitously between releases | GEM removed a tracker subset, OR the dedup is over-matching | Run `--print-candidates` and diff against the prior quarter's output; adjust the haversine/Jaccard knobs in `scripts/_pipeline-dedup.mjs` if needed |
|
||||
|
||||
## Corrections
|
||||
|
||||
See [`/corrections`](/corrections) for the planned revision-log shape
|
||||
|
||||
@@ -53,6 +53,7 @@ Deployment SPEED (weeks vs months vs years) is the core signal.
|
||||
| Tier | Value | Meaning | Concrete precedents |
|
||||
|---|---|---|---|
|
||||
| Nil access | **0.1** | Sanctions, asset freeze, or political paralysis makes deployment effectively impossible within a crisis window | Russia NWF (post-2022 asset freeze), Libya LIA (sanctions + frozen assets), Iran NDFI (sanctions + access concerns). Currently deferred from v1 for this reason. |
|
||||
| Statutorily-gated long-horizon | **0.20** | Withdrawals require statutory supermajority / bicameral-equivalent action; gate has been crossed in extreme cases (single, capped draw under emergency law) but NOT for ordinary stabilization. Distinct from "Intergenerational savings" (0.3) because the gate is *statutory* rather than ruler-discretionary — Council-of-Ministers + parliamentary or constitutional thresholds replace head-of-state direction. | KIA Future Generations Fund (Decree 106 of 1976; Council-of-Ministers + Emir decree required; gate crossed once during COVID for a small capped draw). Phase 1B addition (Plan 2026-04-25-001). |
|
||||
| Intergenerational savings | **0.3** | Pure long-horizon wealth-preservation mandate; no explicit stabilization rule; withdrawal requires ruler / head-of-state / parliamentary discretion with no codified trigger | ADIA (Abu Dhabi, intergenerational mandate, ruler-discretionary); Brunei BIA (deferred candidate) |
|
||||
| Hybrid / constrained | **0.5** | Mandate mixes strategic + savings + partial stabilization; deployment is mechanically possible but constrained by strategic allocation locked to policy objectives (Vision 2030, industrial policy, geopolitical holdings) | PIF (Saudi Arabia, Vision 2030-locked), QIA (Qatar, long-horizon wealth-management with amiri-decree deployment), Mubadala (UAE, strategic + financial hybrid), Ireland ISIF (strategic-development mandate) |
|
||||
| Explicit stabilization with rule | **0.7** | Legislated or rule-based mechanism for fiscal support during specific shock classes, with historical precedent of actual deployment | KIA General Reserve Fund (legislated finance of budget shortfalls from oil-revenue swings). NO GPFG is BORDERLINE — has a fiscal rule capping withdrawal at ~3% expected real return, which is an access MECHANISM but also an access CONSTRAINT (see below). NOTE: GIC is discussed in the alignment table below as a candidate for this tier based on its NIRC framework, but the current manifest rates it 0.6 — so it's a 0.7 *candidate*, not a 0.7 *precedent*. |
|
||||
|
||||
@@ -45,6 +45,7 @@ const VARIANT_HOST_MAP: Record<string, string> = {
|
||||
'finance.worldmonitor.app': 'finance',
|
||||
'commodity.worldmonitor.app': 'commodity',
|
||||
'happy.worldmonitor.app': 'happy',
|
||||
'energy.worldmonitor.app': 'energy',
|
||||
};
|
||||
|
||||
// Source of truth: src/config/variant-meta.ts — keep in sync when variant metadata changes.
|
||||
@@ -73,6 +74,12 @@ const VARIANT_OG: Record<string, { title: string; description: string; image: st
|
||||
image: 'https://happy.worldmonitor.app/favico/happy/og-image.png',
|
||||
url: 'https://happy.worldmonitor.app/',
|
||||
},
|
||||
energy: {
|
||||
title: 'Energy Atlas - Real-Time Global Energy Intelligence Dashboard',
|
||||
description: 'Real-time global energy atlas tracking oil and gas pipelines, storage facilities, chokepoints, fuel shortages, tanker flows, and disruption events worldwide.',
|
||||
image: 'https://energy.worldmonitor.app/favico/energy/og-image.png',
|
||||
url: 'https://energy.worldmonitor.app/',
|
||||
},
|
||||
};
|
||||
|
||||
const ALLOWED_HOSTS = new Set([
|
||||
|
||||
@@ -19,6 +19,12 @@ message GetVesselSnapshotRequest {
|
||||
// position reports. Clients with no position callbacks should leave this
|
||||
// false to keep responses small.
|
||||
bool include_candidates = 5 [(sebuf.http.query) = { name: "include_candidates" }];
|
||||
// When true, populate VesselSnapshot.tanker_reports with per-vessel
|
||||
// position reports for AIS ship-type 80-89 (tanker class). Used by the
|
||||
// Energy Atlas live-tanker map layer. Stored separately from
|
||||
// candidate_reports (which is military-only) so consumers self-select
|
||||
// via this flag rather than the response field changing meaning.
|
||||
bool include_tankers = 6 [(sebuf.http.query) = { name: "include_tankers" }];
|
||||
}
|
||||
|
||||
// GetVesselSnapshotResponse contains the vessel traffic snapshot.
|
||||
|
||||
@@ -22,6 +22,12 @@ message VesselSnapshot {
|
||||
// Recent position reports for individual vessels. Only populated when the
|
||||
// request sets include_candidates=true — empty otherwise.
|
||||
repeated SnapshotCandidateReport candidate_reports = 6;
|
||||
// Recent position reports for tanker vessels (AIS ship type 80-89). Only
|
||||
// populated when the request sets include_tankers=true — empty otherwise.
|
||||
// Reuses the SnapshotCandidateReport message shape; the field is parallel
|
||||
// to candidate_reports (military-detection) so adding tanker-rendering
|
||||
// doesn't change the meaning of the existing surface.
|
||||
repeated SnapshotCandidateReport tanker_reports = 7;
|
||||
}
|
||||
|
||||
// AisSnapshotStatus reports relay health at the time of the snapshot.
|
||||
|
||||
BIN
public/favico/energy/android-chrome-192x192.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
public/favico/energy/android-chrome-512x512.png
Normal file
|
After Width: | Height: | Size: 96 KiB |
BIN
public/favico/energy/apple-touch-icon.png
Normal file
|
After Width: | Height: | Size: 32 KiB |
BIN
public/favico/energy/favicon-16x16.png
Normal file
|
After Width: | Height: | Size: 751 B |
BIN
public/favico/energy/favicon-32x32.png
Normal file
|
After Width: | Height: | Size: 2.3 KiB |
BIN
public/favico/energy/favicon.ico
Normal file
|
After Width: | Height: | Size: 778 B |
BIN
public/favico/energy/og-image.png
Normal file
|
After Width: | Height: | Size: 76 KiB |
382
scripts/_gem-geojson-to-canonical.py
Normal file
@@ -0,0 +1,382 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Pre-convert GEM GeoJSON (GGIT gas + GOIT oil pipelines) → canonical JSON shape
|
||||
that scripts/import-gem-pipelines.mjs::REQUIRED_COLUMNS expects.
|
||||
|
||||
Why GeoJSON, not XLSX:
|
||||
GEM publishes both XLSX and GIS .zip downloads (with GeoJSON, GeoPackage,
|
||||
shapefile inside). The XLSX has properties but NO lat/lon columns — endpoint
|
||||
geometry only lives in the GIS feed. The GeoJSON `properties` block carries
|
||||
the same column set as the XLSX, AND `geometry.coordinates` gives us the
|
||||
LineString endpoints we need for haversine dedup. So we use GeoJSON only.
|
||||
|
||||
Usage:
|
||||
GEM_GAS_GEOJSON=/path/to/GEM-GGIT-Gas-Pipelines-YYYY-MM.geojson \\
|
||||
GEM_OIL_GEOJSON=/path/to/GEM-GOIT-Oil-NGL-Pipelines-YYYY-MM.geojson \\
|
||||
python3 scripts/_gem-geojson-to-canonical.py \\
|
||||
> /tmp/gem-pipelines.json
|
||||
|
||||
# Then feed to the merge step:
|
||||
GEM_PIPELINES_FILE=/tmp/gem-pipelines.json node \\
|
||||
scripts/import-gem-pipelines.mjs --print-candidates # dry run
|
||||
GEM_PIPELINES_FILE=/tmp/gem-pipelines.json node \\
|
||||
scripts/import-gem-pipelines.mjs --merge
|
||||
|
||||
Dependencies:
|
||||
pip3 install pycountry # ISO 3166-1 alpha-2 mapping for country names
|
||||
|
||||
Drop-summary log goes to stderr; canonical JSON goes to stdout.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import pycountry
|
||||
|
||||
GAS_PATH = os.environ.get("GEM_GAS_GEOJSON")
|
||||
OIL_PATH = os.environ.get("GEM_OIL_GEOJSON")
|
||||
if not GAS_PATH or not OIL_PATH:
|
||||
sys.exit(
|
||||
"GEM_GAS_GEOJSON and GEM_OIL_GEOJSON env vars are required. "
|
||||
"Point each at the GEM-{GGIT,GOIT}-{Gas,Oil-NGL}-Pipelines-YYYY-MM.geojson "
|
||||
"file unzipped from the GIS download. See script header for details."
|
||||
)
|
||||
|
||||
# Filter knobs (per plan: trunk-class only, target 250-300 entries per registry).
|
||||
# Asymmetric thresholds: gas has more long-distance trunks worldwide (LNG-feeder
|
||||
# corridors, Russia→Europe, Russia→China), oil pipelines tend to be shorter
|
||||
# regional collectors. Tuned empirically against the 2025-11 GEM release to
|
||||
# yield ~265 gas + ~300 oil after dedup against the 75 hand-curated rows.
|
||||
MIN_LENGTH_KM_GAS = 750.0
|
||||
MIN_LENGTH_KM_OIL = 400.0
|
||||
ACCEPTED_STATUS = {"operating", "construction"}
|
||||
|
||||
# GEM (lowercase) → parser STATUS_MAP key (PascalCase)
|
||||
STATUS_PASCAL = {
|
||||
"operating": "Operating",
|
||||
"construction": "Construction",
|
||||
"proposed": "Proposed",
|
||||
"cancelled": "Cancelled",
|
||||
"shelved": "Cancelled", # treat shelved as cancelled per plan U2
|
||||
"mothballed": "Mothballed",
|
||||
"idle": "Idle",
|
||||
"shut-in": "Shut-in",
|
||||
"retired": "Mothballed",
|
||||
"mixed status": "Operating", # rare; treat as operating
|
||||
}
|
||||
|
||||
# Country aliases for cases pycountry's fuzzy match fails on
|
||||
COUNTRY_ALIASES = {
|
||||
"United States": "US",
|
||||
"United Kingdom": "GB",
|
||||
"Russia": "RU",
|
||||
"South Korea": "KR",
|
||||
"North Korea": "KP",
|
||||
"Iran": "IR",
|
||||
"Syria": "SY",
|
||||
"Venezuela": "VE",
|
||||
"Bolivia": "BO",
|
||||
"Tanzania": "TZ",
|
||||
"Vietnam": "VN",
|
||||
"Laos": "LA",
|
||||
"Czech Republic": "CZ",
|
||||
"Czechia": "CZ",
|
||||
"Slovakia": "SK",
|
||||
"Macedonia": "MK",
|
||||
"North Macedonia": "MK",
|
||||
"Moldova": "MD",
|
||||
"Brunei": "BN",
|
||||
"Cape Verde": "CV",
|
||||
"Ivory Coast": "CI",
|
||||
"Cote d'Ivoire": "CI",
|
||||
"Republic of the Congo": "CG",
|
||||
"Democratic Republic of the Congo": "CD",
|
||||
"DR Congo": "CD",
|
||||
"DRC": "CD",
|
||||
"Congo": "CG",
|
||||
"Burma": "MM",
|
||||
"Myanmar": "MM",
|
||||
"Taiwan": "TW",
|
||||
"Palestine": "PS",
|
||||
"Kosovo": "XK", # not ISO-2 official; use XK (commonly accepted)
|
||||
}
|
||||
|
||||
|
||||
def country_to_iso2(name):
|
||||
if not name:
|
||||
return None
|
||||
name = name.strip()
|
||||
if name in COUNTRY_ALIASES:
|
||||
return COUNTRY_ALIASES[name]
|
||||
try:
|
||||
c = pycountry.countries.get(name=name)
|
||||
if c:
|
||||
return c.alpha_2
|
||||
# Try common_name (e.g. "Russia" → "Russian Federation")
|
||||
c = pycountry.countries.get(common_name=name)
|
||||
if c:
|
||||
return c.alpha_2
|
||||
# Fuzzy
|
||||
results = pycountry.countries.search_fuzzy(name)
|
||||
if results:
|
||||
return results[0].alpha_2
|
||||
except (LookupError, KeyError):
|
||||
pass
|
||||
return None
|
||||
|
||||
|
||||
def split_countries(s):
|
||||
"""Parse 'Russia, Belarus, Ukraine' → ['Russia','Belarus','Ukraine']"""
|
||||
if not s:
|
||||
return []
|
||||
return [x.strip() for x in s.split(",") if x.strip()]
|
||||
|
||||
|
||||
def get_endpoints(geom):
|
||||
"""Return ((startLon, startLat), (endLon, endLat)) or None."""
|
||||
if not geom:
|
||||
return None
|
||||
t = geom.get("type")
|
||||
coords = geom.get("coordinates")
|
||||
if t == "LineString" and coords and len(coords) >= 2:
|
||||
return coords[0], coords[-1]
|
||||
if t == "MultiLineString" and coords:
|
||||
flat = [pt for line in coords if line for pt in line]
|
||||
if len(flat) >= 2:
|
||||
return flat[0], flat[-1]
|
||||
if t == "GeometryCollection":
|
||||
geoms = geom.get("geometries") or []
|
||||
all_coords = []
|
||||
for g in geoms:
|
||||
if g and g.get("type") == "LineString" and g.get("coordinates"):
|
||||
all_coords.extend(g["coordinates"])
|
||||
elif g and g.get("type") == "MultiLineString" and g.get("coordinates"):
|
||||
for line in g["coordinates"]:
|
||||
all_coords.extend(line)
|
||||
if len(all_coords) >= 2:
|
||||
return all_coords[0], all_coords[-1]
|
||||
return None
|
||||
|
||||
|
||||
def first_year(props):
|
||||
for k in ("StartYear1", "StartYear2", "StartYear3"):
|
||||
v = props.get(k)
|
||||
if v:
|
||||
try:
|
||||
return int(float(v))
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
def best_length_km(props):
|
||||
for k in ("LengthMergedKm", "LengthKnownKm", "LengthEstimateKm"):
|
||||
v = props.get(k)
|
||||
if v in (None, "", "NA"):
|
||||
continue
|
||||
try:
|
||||
f = float(v)
|
||||
if f > 0:
|
||||
return f
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
return 0.0
|
||||
|
||||
|
||||
def _f(v):
|
||||
if v in (None, "", "NA"):
|
||||
return None
|
||||
try:
|
||||
f = float(v)
|
||||
return f if f > 0 else None
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
def gas_capacity(props):
|
||||
"""Return (capacity, 'bcm/y'). GGIT has CapacityBcm/y derived for many rows."""
|
||||
f = _f(props.get("CapacityBcm/y"))
|
||||
if f is not None:
|
||||
return f, "bcm/y"
|
||||
# Fall back to raw Capacity + CapacityUnits with conversions to bcm/y.
|
||||
cap = _f(props.get("Capacity"))
|
||||
if cap is None:
|
||||
return None, None
|
||||
u = (props.get("CapacityUnits") or "").strip().lower()
|
||||
if u == "bcm/y":
|
||||
return cap, "bcm/y"
|
||||
if u == "mmcf/d": # million standard cubic feet/day → bcm/y
|
||||
return cap * 0.01034, "bcm/y"
|
||||
if u == "mmscmd": # million standard cubic metres/day
|
||||
return cap * 365.25 / 1000.0, "bcm/y"
|
||||
if u == "mill.sm3/day": # million Sm3/day = MMSCMD
|
||||
return cap * 365.25 / 1000.0, "bcm/y"
|
||||
if u == "scm/y": # standard cubic metres/year
|
||||
return cap / 1e9, "bcm/y"
|
||||
if u == "mtpa": # million tonnes/annum LNG → bcm/y (1 mtpa ≈ 1.36 bcm/y)
|
||||
return cap * 1.36, "bcm/y"
|
||||
return None, None
|
||||
|
||||
|
||||
def oil_capacity(props):
|
||||
"""Return (capacity, capacityUnit) for oil. Convert to bbl/d for parser
|
||||
consumption (parser then converts bbl/d / 1e6 → Mbd internally)."""
|
||||
cap = _f(props.get("Capacity"))
|
||||
unit_raw = (props.get("CapacityUnits") or "").strip().lower()
|
||||
if cap is None or not unit_raw:
|
||||
# Fallback: derive from CapacityBOEd if present (already bpd-equivalent).
|
||||
boed = _f(props.get("CapacityBOEd"))
|
||||
if boed is not None:
|
||||
return boed, "bbl/d"
|
||||
return None, None
|
||||
if unit_raw == "bpd":
|
||||
return cap, "bbl/d"
|
||||
if unit_raw in ("mb/d", "mbd"):
|
||||
# GEM "Mb/d" = thousand bbl/day (industry shorthand). Convert to bbl/d.
|
||||
return cap * 1000.0, "bbl/d"
|
||||
if unit_raw in ("kbd", "kb/d"):
|
||||
return cap * 1000.0, "bbl/d"
|
||||
if unit_raw == "mtpa":
|
||||
# Million tonnes/annum crude → bbl/d (avg crude: 7.33 bbl/tonne).
|
||||
return cap * 1e6 * 7.33 / 365.25, "bbl/d"
|
||||
if unit_raw == "m3/day":
|
||||
# 1 m3 = 6.2898 bbl
|
||||
return cap * 6.2898, "bbl/d"
|
||||
if unit_raw == "m3/month":
|
||||
return cap * 6.2898 / 30.4, "bbl/d"
|
||||
if unit_raw == "m3/year":
|
||||
return cap * 6.2898 / 365.25, "bbl/d"
|
||||
if unit_raw == "thousand m3/year":
|
||||
return cap * 1000 * 6.2898 / 365.25, "bbl/d"
|
||||
if unit_raw == "tn/d": # tonnes/day
|
||||
return cap * 7.33, "bbl/d"
|
||||
# Unknown unit → fall back to BOEd if available.
|
||||
boed = _f(props.get("CapacityBOEd"))
|
||||
if boed is not None:
|
||||
return boed, "bbl/d"
|
||||
return None, None
|
||||
|
||||
|
||||
def convert_one(props, geom, fuel_token):
|
||||
name = (props.get("PipelineName") or "").strip()
|
||||
seg = (props.get("SegmentName") or "").strip()
|
||||
if seg and seg.lower() not in ("main line", "mainline", "main"):
|
||||
name = f"{name} - {seg}" if name else seg
|
||||
if not name:
|
||||
return None, "no_name"
|
||||
|
||||
status = (props.get("Status") or "").strip().lower()
|
||||
if status not in ACCEPTED_STATUS:
|
||||
return None, f"status:{status or 'empty'}"
|
||||
|
||||
pts = get_endpoints(geom)
|
||||
if not pts:
|
||||
return None, "no_geom"
|
||||
s_lon, s_lat = pts[0][0], pts[0][1]
|
||||
e_lon, e_lat = pts[1][0], pts[1][1]
|
||||
# Drop degenerate geometry (start == end). GEM occasionally publishes
|
||||
# rows with a Point geometry or a single-coord LineString, which we'd
|
||||
# otherwise emit as zero-length routes. PR #3406 review found 9 such
|
||||
# rows (Trans-Alaska, Enbridge Line 3 Replacement, Ichthys, etc.).
|
||||
if s_lat == e_lat and s_lon == e_lon:
|
||||
return None, "zero_length"
|
||||
|
||||
length = best_length_km(props)
|
||||
threshold = MIN_LENGTH_KM_GAS if fuel_token == "Gas" else MIN_LENGTH_KM_OIL
|
||||
if length < threshold:
|
||||
return None, "too_short"
|
||||
|
||||
if fuel_token == "Gas":
|
||||
cap, unit = gas_capacity(props)
|
||||
from_country_name = props.get("StartCountryOrArea")
|
||||
to_country_name = props.get("EndCountryOrArea")
|
||||
all_countries = split_countries(props.get("CountriesOrAreas"))
|
||||
else:
|
||||
cap, unit = oil_capacity(props)
|
||||
from_country_name = props.get("StartCountry")
|
||||
to_country_name = props.get("EndCountry")
|
||||
all_countries = split_countries(props.get("Countries"))
|
||||
if cap is None or unit is None:
|
||||
return None, "no_capacity"
|
||||
|
||||
from_iso = country_to_iso2(from_country_name)
|
||||
to_iso = country_to_iso2(to_country_name)
|
||||
if not from_iso or not to_iso:
|
||||
return None, f"country:{from_country_name}|{to_country_name}"
|
||||
|
||||
transit = []
|
||||
for c in all_countries:
|
||||
iso = country_to_iso2(c)
|
||||
if iso and iso != from_iso and iso != to_iso:
|
||||
transit.append(iso)
|
||||
|
||||
operator = (props.get("Owner") or props.get("Parent") or "").strip()
|
||||
if not operator:
|
||||
operator = "Unknown"
|
||||
|
||||
row = {
|
||||
"name": name,
|
||||
"operator": operator,
|
||||
"fuel": fuel_token,
|
||||
"fromCountry": from_iso,
|
||||
"toCountry": to_iso,
|
||||
"transitCountries": transit,
|
||||
"capacity": cap,
|
||||
"capacityUnit": unit,
|
||||
"lengthKm": length,
|
||||
"status": STATUS_PASCAL.get(status, "Operating"),
|
||||
"startLat": s_lat,
|
||||
"startLon": s_lon,
|
||||
"endLat": e_lat,
|
||||
"endLon": e_lon,
|
||||
"startYear": first_year(props),
|
||||
}
|
||||
return row, None
|
||||
|
||||
|
||||
def process(path, fuel_token, drops):
|
||||
with open(path) as f:
|
||||
gj = json.load(f)
|
||||
out = []
|
||||
for ft in gj["features"]:
|
||||
props = ft.get("properties") or {}
|
||||
geom = ft.get("geometry")
|
||||
row, reason = convert_one(props, geom, fuel_token)
|
||||
if row:
|
||||
out.append(row)
|
||||
else:
|
||||
drops[reason] = drops.get(reason, 0) + 1
|
||||
return out
|
||||
|
||||
|
||||
def main():
|
||||
drops_gas, drops_oil = {}, {}
|
||||
gas_rows = process(GAS_PATH, "Gas", drops_gas)
|
||||
oil_rows = process(OIL_PATH, "Oil", drops_oil)
|
||||
|
||||
# The operator stamps `downloadedAt` and `sourceVersion` per release so
|
||||
# the parser's deterministic-timestamp logic (resolveEvidenceTimestamp in
|
||||
# scripts/import-gem-pipelines.mjs) produces a stable lastEvidenceUpdate
|
||||
# tied to the actual download date — not "now". Override via env so the
|
||||
# script doesn't drift across re-runs.
|
||||
downloaded_at = os.environ.get("GEM_DOWNLOADED_AT", "1970-01-01")
|
||||
source_version = os.environ.get("GEM_SOURCE_VERSION", "GEM-unspecified-release")
|
||||
envelope = {
|
||||
"downloadedAt": downloaded_at,
|
||||
"sourceVersion": source_version,
|
||||
"pipelines": gas_rows + oil_rows,
|
||||
}
|
||||
json.dump(envelope, sys.stdout, indent=2, ensure_ascii=False)
|
||||
|
||||
print("\n--- DROP SUMMARY (gas) ---", file=sys.stderr)
|
||||
for k, v in sorted(drops_gas.items(), key=lambda x: -x[1]):
|
||||
print(f" {k}: {v}", file=sys.stderr)
|
||||
print(f" KEPT: {len(gas_rows)}", file=sys.stderr)
|
||||
print("--- DROP SUMMARY (oil) ---", file=sys.stderr)
|
||||
for k, v in sorted(drops_oil.items(), key=lambda x: -x[1]):
|
||||
print(f" {k}: {v}", file=sys.stderr)
|
||||
print(f" KEPT: {len(oil_rows)}", file=sys.stderr)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
223
scripts/_pipeline-dedup.mjs
Normal file
@@ -0,0 +1,223 @@
|
||||
// @ts-check
|
||||
//
|
||||
// Pure deterministic deduplication for the GEM pipeline import. NOT an entry
|
||||
// point — see scripts/import-gem-pipelines.mjs for the orchestrator.
|
||||
//
|
||||
// Match rule (BOTH must hold):
|
||||
// 1. Endpoint distance ≤ 5 km (haversine, route-direction-flipped pair-aware
|
||||
// so Mozyr→Adamowo and Adamowo→Mozyr count as the same).
|
||||
// 2. Name token Jaccard ≥ 0.6 (lowercased word tokens, stopwords removed).
|
||||
//
|
||||
// Conflict resolution: existing row WINS. Hand-curated rows have richer
|
||||
// evidence (operator statements, sanction refs, classifier confidence ≥ 0.7)
|
||||
// that GEM's minimum-viable evidence shouldn't overwrite. The dedup function
|
||||
// returns { toAdd, skippedDuplicates } so the caller can audit which GEM
|
||||
// candidates were absorbed by existing rows.
|
||||
//
|
||||
// Determinism: zero Date.now() / Math.random() / Set ordering reliance. Two
|
||||
// invocations on identical inputs produce identical outputs.
|
||||
|
||||
const STOPWORDS = new Set([
|
||||
'pipeline', 'pipelines', 'system', 'systems', 'line', 'lines', 'network',
|
||||
'route', 'project', 'the', 'and', 'of', 'a', 'an',
|
||||
]);
|
||||
|
||||
const MATCH_DISTANCE_KM = 5;
|
||||
const MATCH_JACCARD_MIN = 0.6;
|
||||
// When the candidate's tokenized name equals the existing row's tokenized
|
||||
// name (Jaccard == 1.0 after stopword removal), accept the match if ANY
|
||||
// endpoint pairing is within MATCH_NAME_IDENTICAL_DISTANCE_KM. Catches PR
|
||||
// #3406 review's Dampier-Bunbury case: GEM digitized only the southern
|
||||
// 60% of the line, so the average-endpoint distance was 287km but the
|
||||
// shared Bunbury terminus matched within 13.7km. A pure name-only rule
|
||||
// would false-positive on coincidental collisions in different oceans
|
||||
// (e.g. unrelated "Nord Stream 1" in the Pacific), so we still require
|
||||
// SOME geographic anchor.
|
||||
const MATCH_NAME_IDENTICAL_DISTANCE_KM = 25;
|
||||
const EARTH_RADIUS_KM = 6371;
|
||||
|
||||
/**
|
||||
* Haversine great-circle distance in km between two lat/lon points.
|
||||
*/
|
||||
function haversineKm(a, b) {
|
||||
const toRad = (deg) => (deg * Math.PI) / 180;
|
||||
const dLat = toRad(b.lat - a.lat);
|
||||
const dLon = toRad(b.lon - a.lon);
|
||||
const lat1 = toRad(a.lat);
|
||||
const lat2 = toRad(b.lat);
|
||||
const x =
|
||||
Math.sin(dLat / 2) ** 2 +
|
||||
Math.sin(dLon / 2) ** 2 * Math.cos(lat1) * Math.cos(lat2);
|
||||
const c = 2 * Math.atan2(Math.sqrt(x), Math.sqrt(1 - x));
|
||||
return EARTH_RADIUS_KM * c;
|
||||
}
|
||||
|
||||
/**
|
||||
* Average endpoint distance between two pipelines, considering both forward
|
||||
* and reversed pairings. The smaller of the two is returned so a route
|
||||
* direction flip doesn't appear as a different pipeline.
|
||||
*/
|
||||
function averageEndpointDistanceKm(a, b) {
|
||||
const forward =
|
||||
(haversineKm(a.startPoint, b.startPoint) + haversineKm(a.endPoint, b.endPoint)) / 2;
|
||||
const reversed =
|
||||
(haversineKm(a.startPoint, b.endPoint) + haversineKm(a.endPoint, b.startPoint)) / 2;
|
||||
return Math.min(forward, reversed);
|
||||
}
|
||||
|
||||
/**
|
||||
* Minimum of all four cross-pairings between candidate and existing endpoints.
|
||||
* Used by the name-identical short-circuit: if the candidate digitizes a
|
||||
* different segment of the same physical pipeline, only ONE endpoint pair
|
||||
* may match closely (e.g. Dampier-Bunbury: shared Bunbury terminus 13.7 km,
|
||||
* other end 560 km away because GEM stopped at Onslow vs the full Dampier
|
||||
* route). A tight average would miss this; the min of the four pairings
|
||||
* doesn't.
|
||||
*/
|
||||
function minPairwiseEndpointDistanceKm(a, b) {
|
||||
return Math.min(
|
||||
haversineKm(a.startPoint, b.startPoint),
|
||||
haversineKm(a.startPoint, b.endPoint),
|
||||
haversineKm(a.endPoint, b.startPoint),
|
||||
haversineKm(a.endPoint, b.endPoint),
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize a name: lowercased word tokens, ASCII-only word boundaries,
|
||||
* stopwords removed. Stable across invocations.
|
||||
*/
|
||||
function tokenize(name) {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.normalize('NFKD')
|
||||
// Strip combining marks (diacritics) so "Limón" → "limon", not "limo'n".
|
||||
// Range ̀-ͯ covers Combining Diacritical Marks per Unicode.
|
||||
.replace(/[̀-ͯ]/g, '')
|
||||
.replace(/[^a-z0-9 ]+/g, ' ')
|
||||
.split(/\s+/)
|
||||
.filter((t) => t.length > 0 && !STOPWORDS.has(t));
|
||||
}
|
||||
|
||||
/**
|
||||
* Jaccard similarity = |A ∩ B| / |A ∪ B| over token sets.
|
||||
*/
|
||||
function jaccard(a, b) {
|
||||
const setA = new Set(tokenize(a));
|
||||
const setB = new Set(tokenize(b));
|
||||
if (setA.size === 0 && setB.size === 0) return 0;
|
||||
let intersection = 0;
|
||||
for (const t of setA) if (setB.has(t)) intersection++;
|
||||
const unionSize = setA.size + setB.size - intersection;
|
||||
return unionSize === 0 ? 0 : intersection / unionSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide if a candidate matches an existing row.
|
||||
*
|
||||
* Two acceptance paths:
|
||||
* (a) Token sets are IDENTICAL (Jaccard == 1.0 after stopword removal) —
|
||||
* the same pipeline regardless of how either source digitized its
|
||||
* endpoints. Catches the Dampier-Bunbury case (PR #3406 review):
|
||||
* GEM's GeoJSON terminus was 13.7 km from the curated terminus
|
||||
* (just over the 5 km distance gate) but both names tokenize to
|
||||
* {dampier, to, bunbury, natural, gas}, so they are clearly the
|
||||
* same physical pipeline.
|
||||
* (b) Distance ≤ 5 km AND Jaccard ≥ 0.6 — the original conjunctive rule
|
||||
* for slight name-variation cases (e.g. "Druzhba Pipeline" vs
|
||||
* "Druzhba Oil Pipeline").
|
||||
*/
|
||||
function isDuplicate(candidate, existing) {
|
||||
const sim = jaccard(candidate.name, existing.name);
|
||||
// Path (a): identical token-set + at least one endpoint pair within 25 km.
|
||||
// The geographic anchor distinguishes the Dampier-Bunbury case from a
|
||||
// theoretical name-collision in a different ocean.
|
||||
if (sim >= 1.0) {
|
||||
const minDist = minPairwiseEndpointDistanceKm(candidate, existing);
|
||||
if (minDist <= MATCH_NAME_IDENTICAL_DISTANCE_KM) return true;
|
||||
// Identical names but no endpoint near each other → distinct pipelines
|
||||
// sharing a name (rare but real). Fall through to the conjunctive rule
|
||||
// below, which will return false because Jaccard 1.0 with > 25km min
|
||||
// pair always exceeds 5 km average.
|
||||
}
|
||||
const dist = averageEndpointDistanceKm(candidate, existing);
|
||||
if (dist > MATCH_DISTANCE_KM) return false;
|
||||
return sim >= MATCH_JACCARD_MIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* Disambiguate a candidate's id against existing ids by appending -2, -3, ...
|
||||
* until unique. Stable: same input → same output.
|
||||
*/
|
||||
function uniqueId(baseId, takenIds) {
|
||||
if (!takenIds.has(baseId)) return baseId;
|
||||
let n = 2;
|
||||
while (takenIds.has(`${baseId}-${n}`)) n++;
|
||||
return `${baseId}-${n}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure dedup function.
|
||||
*
|
||||
* @param {Array<{ id: string, name: string, startPoint: {lat:number,lon:number}, endPoint: {lat:number,lon:number} }>} existing
|
||||
* @param {Array<{ id: string, name: string, startPoint: {lat:number,lon:number}, endPoint: {lat:number,lon:number} }>} candidates
|
||||
* @returns {{ toAdd: any[], skippedDuplicates: Array<{ candidate: any, matchedExistingId: string, distanceKm: number, jaccard: number }> }}
|
||||
*/
|
||||
export function dedupePipelines(existing, candidates) {
|
||||
const taken = new Set(existing.map((p) => p.id));
|
||||
const toAdd = [];
|
||||
const skippedDuplicates = [];
|
||||
|
||||
for (const cand of candidates) {
|
||||
// Compare against BOTH existing rows AND candidates already accepted
|
||||
// into toAdd. Without this, two GEM rows that match each other but
|
||||
// not anything in `existing` would both be added — duplicate-import
|
||||
// bug. Existing rows still win on cross-set match (they have richer
|
||||
// hand-curated evidence); within-toAdd matches retain the FIRST
|
||||
// accepted candidate (deterministic by candidate-list order).
|
||||
let matched = null;
|
||||
for (const ex of existing) {
|
||||
if (isDuplicate(cand, ex)) {
|
||||
matched = ex;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!matched) {
|
||||
for (const earlier of toAdd) {
|
||||
if (isDuplicate(cand, earlier)) {
|
||||
matched = earlier;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (matched) {
|
||||
skippedDuplicates.push({
|
||||
candidate: cand,
|
||||
matchedExistingId: matched.id,
|
||||
distanceKm: averageEndpointDistanceKm(cand, matched),
|
||||
jaccard: jaccard(cand.name, matched.name),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
const finalId = uniqueId(cand.id, taken);
|
||||
taken.add(finalId);
|
||||
toAdd.push({ ...cand, id: finalId });
|
||||
}
|
||||
|
||||
return { toAdd, skippedDuplicates };
|
||||
}
|
||||
|
||||
// Internal exports for test coverage; not part of the public surface.
|
||||
export const _internal = {
|
||||
haversineKm,
|
||||
averageEndpointDistanceKm,
|
||||
minPairwiseEndpointDistanceKm,
|
||||
tokenize,
|
||||
jaccard,
|
||||
isDuplicate,
|
||||
uniqueId,
|
||||
STOPWORDS,
|
||||
MATCH_DISTANCE_KM,
|
||||
MATCH_JACCARD_MIN,
|
||||
MATCH_NAME_IDENTICAL_DISTANCE_KM,
|
||||
};
|
||||
@@ -29,7 +29,13 @@ export const PIPELINES_TTL_SECONDS = 21 * 24 * 3600;
|
||||
|
||||
const VALID_PHYSICAL_STATES = new Set(['flowing', 'reduced', 'offline', 'unknown']);
|
||||
const VALID_COMMERCIAL_STATES = new Set(['under_contract', 'expired', 'suspended', 'unknown']);
|
||||
const VALID_SOURCES = new Set(['operator', 'regulator', 'press', 'satellite', 'ais-relay']);
|
||||
// `gem` covers rows imported from Global Energy Monitor's Oil & Gas
|
||||
// Infrastructure Trackers (CC-BY 4.0). Treated as an evidence-bearing source
|
||||
// for non-flowing badges in the same way as `press` / `satellite` / `ais-relay`,
|
||||
// since GEM is an academic/curated dataset with traceable provenance — not a
|
||||
// silent default. Exported alongside VALID_OIL_PRODUCT_CLASSES so test suites
|
||||
// can assert against the same source of truth the validator uses.
|
||||
export const VALID_SOURCES = new Set(['operator', 'regulator', 'press', 'satellite', 'ais-relay', 'gem']);
|
||||
// Required on every oil pipeline. `crude` = crude-oil lines (default),
|
||||
// `products` = refined-product lines (gasoline/diesel/jet), `mixed` =
|
||||
// dual-use bridges moving both. Gas pipelines don't carry this field
|
||||
@@ -38,9 +44,11 @@ const VALID_SOURCES = new Set(['operator', 'regulator', 'press', 'satellite', 'a
|
||||
// inline copy in tests could silently drift when the enum is extended.
|
||||
export const VALID_OIL_PRODUCT_CLASSES = new Set(['crude', 'products', 'mixed']);
|
||||
|
||||
// Minimum viable registry size. Expansion to ~75 each happens in the follow-up
|
||||
// GEM import PR; this seeder doesn't care about exact counts beyond the floor.
|
||||
const MIN_PIPELINES_PER_REGISTRY = 8;
|
||||
// Minimum viable registry size. Post-GEM-import floor: 200. Live counts after
|
||||
// the 2025-11 GGIT + 2025-03 GOIT merge are 297 gas / 334 oil; 200 leaves ~100
|
||||
// rows of jitter headroom so a partial GEM re-import or a coverage-narrowing
|
||||
// release fails loud rather than silently halving the registry.
|
||||
const MIN_PIPELINES_PER_REGISTRY = 200;
|
||||
|
||||
function loadRegistry(filename) {
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
@@ -90,6 +98,13 @@ export function validateRegistry(data) {
|
||||
if (!p.endPoint || typeof p.endPoint.lat !== 'number' || typeof p.endPoint.lon !== 'number') return false;
|
||||
if (!isValidLatLon(p.startPoint.lat, p.startPoint.lon)) return false;
|
||||
if (!isValidLatLon(p.endPoint.lat, p.endPoint.lon)) return false;
|
||||
// Reject degenerate routes where startPoint == endPoint. PR #3406 review
|
||||
// surfaced 9 GEM rows (incl. Trans-Alaska, Enbridge Line 3, Ichthys)
|
||||
// whose source GeoJSON had a Point geometry or a single-coord LineString,
|
||||
// producing zero-length pipelines that render as map-point artifacts and
|
||||
// skew aggregate-length statistics. Defense in depth — converter also
|
||||
// drops these — but the validator gate makes the contract explicit.
|
||||
if (p.startPoint.lat === p.endPoint.lat && p.startPoint.lon === p.endPoint.lon) return false;
|
||||
|
||||
if (!p.evidence || typeof p.evidence !== 'object') return false;
|
||||
const ev = p.evidence;
|
||||
@@ -104,13 +119,16 @@ export function validateRegistry(data) {
|
||||
|
||||
// Every non-`flowing` badge requires at least one evidence field with signal.
|
||||
// This prevents shipping an `offline` label with zero supporting evidence.
|
||||
// `gem` joins the evidence-bearing sources because GEM is a curated
|
||||
// academic dataset with traceable provenance, not a silent default.
|
||||
if (ev.physicalState !== 'flowing') {
|
||||
const hasEvidence =
|
||||
ev.operatorStatement != null ||
|
||||
ev.sanctionRefs.length > 0 ||
|
||||
ev.physicalStateSource === 'ais-relay' ||
|
||||
ev.physicalStateSource === 'satellite' ||
|
||||
ev.physicalStateSource === 'press';
|
||||
ev.physicalStateSource === 'press' ||
|
||||
ev.physicalStateSource === 'gem';
|
||||
if (!hasEvidence) return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6365,6 +6365,12 @@ const vessels = new Map();
|
||||
const vesselHistory = new Map();
|
||||
const densityGrid = new Map();
|
||||
const candidateReports = new Map();
|
||||
// Parallel store for tanker (AIS ship type 80-89) position reports — populated
|
||||
// alongside candidateReports but with a different inclusion predicate.
|
||||
// Required by the Energy Atlas live-tanker map layer (parity-push PR 3).
|
||||
// Kept SEPARATE from candidateReports so the existing military-detection
|
||||
// consumer's contract is unchanged.
|
||||
const tankerReports = new Map();
|
||||
|
||||
let snapshotSequence = 0;
|
||||
let lastSnapshot = null;
|
||||
@@ -6643,6 +6649,26 @@ function processPositionReportForSnapshot(data) {
|
||||
timestamp: now,
|
||||
});
|
||||
}
|
||||
|
||||
// Tanker capture for the Energy Atlas live-tanker layer. AIS ship type
|
||||
// 80-89 covers all tanker subtypes per ITU-R M.1371 (oil/chemical tanker,
|
||||
// hazardous cargo classes A-D, and other tanker variants). Stored in a
|
||||
// SEPARATE Map from candidateReports so the existing military-detection
|
||||
// consumer never sees tankers (their contract is unchanged).
|
||||
const shipType = Number(meta.ShipType);
|
||||
if (Number.isFinite(shipType) && shipType >= 80 && shipType <= 89) {
|
||||
tankerReports.set(mmsi, {
|
||||
mmsi,
|
||||
name: meta.ShipName || '',
|
||||
lat,
|
||||
lon,
|
||||
shipType,
|
||||
heading: pos.TrueHeading,
|
||||
speed: pos.Sog,
|
||||
course: pos.Cog,
|
||||
timestamp: now,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function cleanupAggregates() {
|
||||
@@ -6701,6 +6727,18 @@ function cleanupAggregates() {
|
||||
// Hard cap: keep freshest candidate reports.
|
||||
evictMapByTimestamp(candidateReports, MAX_CANDIDATE_REPORTS, (report) => report.timestamp || 0);
|
||||
|
||||
// Tanker reports: same retention window as candidate reports — a vessel
|
||||
// that hasn't broadcast a position in CANDIDATE_RETENTION_MS is no longer
|
||||
// useful for a live-tanker map layer. Cap at 2× the per-response cap so
|
||||
// we have headroom for bbox filtering to find recent fixes anywhere on
|
||||
// the globe (not just one chokepoint).
|
||||
for (const [mmsi, report] of tankerReports) {
|
||||
if (report.timestamp < now - CANDIDATE_RETENTION_MS) {
|
||||
tankerReports.delete(mmsi);
|
||||
}
|
||||
}
|
||||
evictMapByTimestamp(tankerReports, MAX_TANKER_REPORTS_PER_RESPONSE * 10, (report) => report.timestamp || 0);
|
||||
|
||||
// Clean chokepoint buckets: remove stale vessels
|
||||
for (const [cpName, bucket] of chokepointBuckets) {
|
||||
for (const mmsi of bucket) {
|
||||
@@ -6844,6 +6882,55 @@ function getCandidateReportsSnapshot() {
|
||||
.slice(0, MAX_CANDIDATE_REPORTS);
|
||||
}
|
||||
|
||||
// Server-side cap for tanker_reports per request — protects the response
|
||||
// payload from a misbehaving filter that returns thousands of vessels.
|
||||
// 200/zone × 6 chokepoints in worst case is well under any practical
|
||||
// CDN/edge payload budget. Energy Atlas live-tanker layer also caps
|
||||
// client-side on top of this.
|
||||
const MAX_TANKER_REPORTS_PER_RESPONSE = 200;
|
||||
|
||||
/**
|
||||
* Parse a "bbox" query param of the form "swLat,swLon,neLat,neLon" into a
|
||||
* {sw: {lat, lon}, ne: {lat, lon}} or null if absent / malformed.
|
||||
*
|
||||
* Validates:
|
||||
* - 4 comma-separated finite numbers
|
||||
* - sw <= ne (after normalization)
|
||||
* - bbox size ≤ 10° on both lat and lon (10° max per parity-push plan U7;
|
||||
* prevents pulling every vessel through one query)
|
||||
*
|
||||
* @param {string | null | undefined} raw
|
||||
* @returns {{ sw: {lat:number, lon:number}, ne: {lat:number, lon:number} } | null}
|
||||
*/
|
||||
function parseBbox(raw) {
|
||||
if (!raw) return null;
|
||||
const parts = String(raw).split(',').map(Number);
|
||||
if (parts.length !== 4 || parts.some((v) => !Number.isFinite(v))) return null;
|
||||
const [swLat, swLon, neLat, neLon] = parts;
|
||||
if (swLat > neLat || swLon > neLon) return null;
|
||||
if (swLat < -90 || neLat > 90 || swLon < -180 || neLon > 180) return null;
|
||||
if (neLat - swLat > 10 || neLon - swLon > 10) return null; // 10° guard
|
||||
return { sw: { lat: swLat, lon: swLon }, ne: { lat: neLat, lon: neLon } };
|
||||
}
|
||||
|
||||
/**
|
||||
* Filtered + capped tanker reports. Sorted by recency of last fix so the
|
||||
* 200-cap keeps the most-recently-seen vessels rather than a random subset.
|
||||
*
|
||||
* @param {{ sw: {lat:number,lon:number}, ne: {lat:number,lon:number} } | null} bbox
|
||||
*/
|
||||
function getTankerReportsSnapshot(bbox) {
|
||||
let arr = Array.from(tankerReports.values());
|
||||
if (bbox) {
|
||||
arr = arr.filter(
|
||||
(r) => r.lat >= bbox.sw.lat && r.lat <= bbox.ne.lat &&
|
||||
r.lon >= bbox.sw.lon && r.lon <= bbox.ne.lon,
|
||||
);
|
||||
}
|
||||
arr.sort((a, b) => b.timestamp - a.timestamp);
|
||||
return arr.slice(0, MAX_TANKER_REPORTS_PER_RESPONSE);
|
||||
}
|
||||
|
||||
function buildSnapshot() {
|
||||
const now = Date.now();
|
||||
if (lastSnapshot && now - lastSnapshotAt < Math.floor(SNAPSHOT_INTERVAL_MS / 2)) {
|
||||
@@ -8791,19 +8878,40 @@ const server = http.createServer(async (req, res) => {
|
||||
buildSnapshot(); // ensures cache is warm
|
||||
const url = new URL(req.url, `http://localhost:${PORT}`);
|
||||
const includeCandidates = url.searchParams.get('candidates') === 'true';
|
||||
const json = includeCandidates ? lastSnapshotWithCandJson : lastSnapshotJson;
|
||||
const gz = includeCandidates ? lastSnapshotWithCandGzip : lastSnapshotGzip;
|
||||
const br = includeCandidates ? lastSnapshotWithCandBrotli : lastSnapshotBrotli;
|
||||
const includeTankers = url.searchParams.get('tankers') === 'true';
|
||||
const bbox = parseBbox(url.searchParams.get('bbox'));
|
||||
|
||||
if (json) {
|
||||
sendPreGzipped(req, res, 200, {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'public, max-age=2',
|
||||
'CDN-Cache-Control': 'public, max-age=10',
|
||||
}, json, gz, br);
|
||||
// Fast path: pre-gzipped cache covers the {with|without}-candidates
|
||||
// case only (no tankers, no bbox). Used by the existing AIS density +
|
||||
// military-detection consumers, which are the vast majority of traffic.
|
||||
if (!includeTankers && !bbox) {
|
||||
const json = includeCandidates ? lastSnapshotWithCandJson : lastSnapshotJson;
|
||||
const gz = includeCandidates ? lastSnapshotWithCandGzip : lastSnapshotGzip;
|
||||
const br = includeCandidates ? lastSnapshotWithCandBrotli : lastSnapshotBrotli;
|
||||
if (json) {
|
||||
sendPreGzipped(req, res, 200, {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'public, max-age=2',
|
||||
'CDN-Cache-Control': 'public, max-age=10',
|
||||
}, json, gz, br);
|
||||
} else {
|
||||
const payload = { ...lastSnapshot, candidateReports: includeCandidates ? getCandidateReportsSnapshot() : [], tankerReports: [] };
|
||||
sendCompressed(req, res, 200, {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'public, max-age=2',
|
||||
'CDN-Cache-Control': 'public, max-age=10',
|
||||
}, JSON.stringify(payload));
|
||||
}
|
||||
} else {
|
||||
// Cold start fallback
|
||||
const payload = { ...lastSnapshot, candidateReports: includeCandidates ? getCandidateReportsSnapshot() : [] };
|
||||
// Live-tanker path: bbox-filtered + tanker-included responses skip the
|
||||
// pre-gzipped cache (bbox space would explode the cache key set).
|
||||
// Handler-side 60s cache (server/worldmonitor/maritime/v1/get-vessel-snapshot.ts)
|
||||
// and the gateway 'live' tier absorb identical-bbox requests.
|
||||
const payload = {
|
||||
...lastSnapshot,
|
||||
candidateReports: includeCandidates ? getCandidateReportsSnapshot() : [],
|
||||
tankerReports: includeTankers ? getTankerReportsSnapshot(bbox) : [],
|
||||
};
|
||||
sendCompressed(req, res, 200, {
|
||||
'Content-Type': 'application/json',
|
||||
'Cache-Control': 'public, max-age=2',
|
||||
|
||||
372
scripts/brief-quality-report.mjs
Normal file
@@ -0,0 +1,372 @@
|
||||
#!/usr/bin/env node
|
||||
// Daily brief-quality dashboard.
|
||||
//
|
||||
// Pulls the most recent N replay-log ticks for a (variant, lang,
|
||||
// sensitivity, date) tuple and computes a single quality_score plus
|
||||
// the component metrics that produced it. Run daily; watch the trend.
|
||||
//
|
||||
// "Are we getting better" loop:
|
||||
// 1. Run this script, record the quality_score.
|
||||
// 2. Make a config change (env flip, code merge, threshold tune).
|
||||
// 3. Wait one cron tick, re-run, compare.
|
||||
// 4. If quality_score went down, revert.
|
||||
//
|
||||
// Metrics computed:
|
||||
// - pair_recall_cluster — % of "should-cluster" labeled pairs that
|
||||
// end up in the same topic at the active threshold
|
||||
// - false_adjacency — % of "should-separate" labeled pairs that end
|
||||
// up adjacent (false positive)
|
||||
// - cap_truncation_rate — % of qualified stories truncated by the
|
||||
// MAX_STORIES_PER_USER cap. ONLY reported when production drop logs
|
||||
// are piped in via --drop-lines-stdin. Without that input, this
|
||||
// metric is omitted entirely (no fallback estimate — replay records
|
||||
// don't capture the post-cap output count, so any estimate would be
|
||||
// misleading).
|
||||
// - multi_member_topic_share — % of topics with size > 1
|
||||
// - quality_score — composite (recall × 0.6 + (1-false-adj) × 0.3 +
|
||||
// multi-member × 0.1)
|
||||
//
|
||||
// Usage:
|
||||
// node --import tsx/esm scripts/brief-quality-report.mjs # today, full:en:all
|
||||
// node --import tsx/esm scripts/brief-quality-report.mjs --rule full:en:critical # specific rule
|
||||
// node --import tsx/esm scripts/brief-quality-report.mjs --date 2026-04-24 # specific date
|
||||
// node --import tsx/esm scripts/brief-quality-report.mjs --json # machine-readable
|
||||
//
|
||||
// Pipe production drop logs for accurate cap-truncation:
|
||||
// railway logs --service scripts-cron-digest-notifications | grep 'brief filter drops' | \
|
||||
// node --import tsx/esm scripts/brief-quality-report.mjs --drop-lines-stdin
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { loadEnvFile, getRedisCredentials } from './_seed-utils.mjs';
|
||||
import { singleLinkCluster } from './lib/brief-dedup-embed.mjs';
|
||||
import { normalizeForEmbedding } from './lib/brief-embedding.mjs';
|
||||
|
||||
loadEnvFile(import.meta.url);
|
||||
|
||||
const REPLAY_KEY_PREFIX = 'digest:replay-log:v1';
|
||||
|
||||
function parseArgs(argv) {
|
||||
const out = {
|
||||
date: new Date().toISOString().slice(0, 10),
|
||||
rule: 'full:en:all',
|
||||
json: false,
|
||||
dropLinesStdin: false,
|
||||
};
|
||||
for (let i = 2; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
if (a === '--date') out.date = argv[++i];
|
||||
else if (a === '--rule') out.rule = argv[++i];
|
||||
else if (a === '--json') out.json = true;
|
||||
else if (a === '--drop-lines-stdin') out.dropLinesStdin = true;
|
||||
else if (a === '--help' || a === '-h') {
|
||||
console.log(readFileSync(fileURLToPath(import.meta.url), 'utf8').split('\n').slice(0, 38).join('\n'));
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function redisLrangeAll(url, token, key) {
|
||||
const out = [];
|
||||
const PAGE = 1000;
|
||||
let start = 0;
|
||||
while (true) {
|
||||
const stop = start + PAGE - 1;
|
||||
const res = await fetch(`${url}/lrange/${encodeURIComponent(key)}/${start}/${stop}`, {
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
});
|
||||
if (!res.ok) throw new Error(`LRANGE failed: HTTP ${res.status}`);
|
||||
const body = await res.json();
|
||||
const items = Array.isArray(body?.result) ? body.result : [];
|
||||
out.push(...items);
|
||||
if (items.length < PAGE) break;
|
||||
start += PAGE;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function redisMget(url, token, keys) {
|
||||
if (keys.length === 0) return [];
|
||||
const path = keys.map((k) => encodeURIComponent(k)).join('/');
|
||||
const res = await fetch(`${url}/mget/${path}`, { headers: { Authorization: `Bearer ${token}` } });
|
||||
if (!res.ok) throw new Error(`MGET failed: HTTP ${res.status}`);
|
||||
const body = await res.json();
|
||||
return Array.isArray(body?.result) ? body.result : new Array(keys.length).fill(null);
|
||||
}
|
||||
|
||||
function loadLabels() {
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const raw = JSON.parse(readFileSync(resolve(__dirname, 'data', 'brief-adjacency-pairs.json'), 'utf8'));
|
||||
return (raw.pairs ?? []).map((p) => ({
|
||||
a: normalizeForEmbedding(p.title_a),
|
||||
b: normalizeForEmbedding(p.title_b),
|
||||
expected: p.expected,
|
||||
}));
|
||||
}
|
||||
|
||||
async function readStdinDropLines() {
|
||||
if (process.stdin.isTTY) return [];
|
||||
const chunks = [];
|
||||
for await (const chunk of process.stdin) chunks.push(chunk);
|
||||
return Buffer.concat(chunks).toString('utf8').split('\n').filter((l) => l.includes('brief filter drops'));
|
||||
}
|
||||
|
||||
function parseDropLine(line) {
|
||||
// [digest] brief filter drops user=X sensitivity=Y variant=Z outcome=W in=N dropped_*=N out=N
|
||||
const fields = {};
|
||||
for (const m of line.matchAll(/(\w+)=([^\s]+)/g)) fields[m[1]] = m[2];
|
||||
return fields;
|
||||
}
|
||||
|
||||
function summariseDropLines(lines) {
|
||||
let in_total = 0, out_total = 0, cap_total = 0, samples = 0;
|
||||
let shipped = 0, rejected = 0;
|
||||
for (const line of lines) {
|
||||
const f = parseDropLine(line);
|
||||
if (!f.in || !f.out) continue;
|
||||
in_total += Number(f.in);
|
||||
out_total += Number(f.out);
|
||||
cap_total += Number(f.dropped_cap ?? 0);
|
||||
samples += 1;
|
||||
if (f.outcome === 'shipped') shipped += 1;
|
||||
else if (f.outcome === 'rejected') rejected += 1;
|
||||
}
|
||||
return {
|
||||
samples,
|
||||
shipped,
|
||||
rejected,
|
||||
cap_truncation_rate: in_total > 0 ? cap_total / in_total : 0,
|
||||
avg_in: samples > 0 ? in_total / samples : 0,
|
||||
avg_out: samples > 0 ? out_total / samples : 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Mirror production: groupTopicsPostDedup operates on top-N reps after
|
||||
// the score floor, not the raw 800-rep deduped pool. Read from env so
|
||||
// a Railway DIGEST_SCORE_MIN / DIGEST_MAX_ITEMS flip stays in sync;
|
||||
// fall back to documented defaults if env is empty/invalid.
|
||||
const SCORE_FLOOR_DEFAULT = 63;
|
||||
const TOP_N_DEFAULT = 30;
|
||||
const MIN_SURVIVING_REPS = 5;
|
||||
|
||||
function envInt(name, fallback) {
|
||||
const raw = process.env[name];
|
||||
if (raw == null || raw === '') return fallback;
|
||||
const n = Number.parseInt(raw, 10);
|
||||
return Number.isFinite(n) && n > 0 ? n : fallback;
|
||||
}
|
||||
const SCORE_FLOOR = envInt('DIGEST_SCORE_MIN', SCORE_FLOOR_DEFAULT);
|
||||
const TOP_N = envInt('DIGEST_MAX_ITEMS', TOP_N_DEFAULT);
|
||||
|
||||
function scoreReplay({ records, embeddingByHash, labels, threshold }) {
|
||||
// Reuse the latest tick's reps as the canonical "today's brief" sample.
|
||||
const ticks = new Map();
|
||||
for (const r of records) {
|
||||
if (!ticks.has(r.briefTickId)) ticks.set(r.briefTickId, []);
|
||||
ticks.get(r.briefTickId).push(r);
|
||||
}
|
||||
const tickIds = [...ticks.keys()].sort();
|
||||
const latestTickId = tickIds[tickIds.length - 1];
|
||||
if (!latestTickId) return null;
|
||||
const allReps = ticks.get(latestTickId).filter((r) => r.isRep);
|
||||
if (allReps.length === 0) return null;
|
||||
|
||||
// Apply floor + slice to mirror production.
|
||||
const slicedReplay = allReps
|
||||
.filter((r) => Number(r.currentScore ?? 0) >= SCORE_FLOOR)
|
||||
.sort((a, b) => Number(b.currentScore ?? 0) - Number(a.currentScore ?? 0))
|
||||
.slice(0, TOP_N);
|
||||
if (slicedReplay.length <= 1) return null;
|
||||
|
||||
// Remap shape: replay uses storyHash/normalizedTitle; brief-dedup
|
||||
// expects hash/title. Title carries the normalized form so labels
|
||||
// match directly. Filter out reps whose embedding is missing from
|
||||
// the cache (transient eviction); skip the tick only if too few
|
||||
// reps survive.
|
||||
const remapped = slicedReplay.map((r) => ({
|
||||
hash: r.storyHash,
|
||||
title: r.normalizedTitle,
|
||||
currentScore: r.currentScore,
|
||||
}));
|
||||
const sliced = remapped.filter((r) => Array.isArray(embeddingByHash.get(r.hash)));
|
||||
const missingEmbedDrops = remapped.length - sliced.length;
|
||||
if (sliced.length < MIN_SURVIVING_REPS) {
|
||||
return { error: `only ${sliced.length} reps had cached embeddings (need ≥${MIN_SURVIVING_REPS}); ${missingEmbedDrops} dropped — re-run after cache warm-up` };
|
||||
}
|
||||
const items = sliced.map((r) => ({ title: r.title, embedding: embeddingByHash.get(r.hash) }));
|
||||
|
||||
// Direct single-link partition matches what production groupTopicsPostDedup does internally.
|
||||
const { clusters } = singleLinkCluster(items, { cosineThreshold: threshold, vetoFn: null });
|
||||
|
||||
const topicOfIdx = new Array(sliced.length).fill(-1);
|
||||
clusters.forEach((members, tIdx) => { for (const i of members) topicOfIdx[i] = tIdx; });
|
||||
|
||||
const titleToTopic = new Map();
|
||||
for (let i = 0; i < sliced.length; i++) titleToTopic.set(sliced[i].title, topicOfIdx[i]);
|
||||
|
||||
const topicCount = clusters.length;
|
||||
const sizes = clusters.map((c) => c.length);
|
||||
|
||||
let cluster_total = 0, cluster_hit = 0, separate_total = 0, separate_violation = 0;
|
||||
const violations = [];
|
||||
for (const lab of labels) {
|
||||
const tA = titleToTopic.get(lab.a);
|
||||
const tB = titleToTopic.get(lab.b);
|
||||
if (tA == null || tB == null) continue;
|
||||
const clustered = tA === tB;
|
||||
if (lab.expected === 'cluster') {
|
||||
cluster_total += 1;
|
||||
if (clustered) cluster_hit += 1;
|
||||
else violations.push({ kind: 'missed_cluster', a: lab.a, b: lab.b });
|
||||
} else {
|
||||
separate_total += 1;
|
||||
if (clustered) {
|
||||
separate_violation += 1;
|
||||
violations.push({ kind: 'false_adjacency', a: lab.a, b: lab.b });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const pair_recall_cluster = cluster_total > 0 ? cluster_hit / cluster_total : 0;
|
||||
const false_adjacency = separate_total > 0 ? separate_violation / separate_total : 0;
|
||||
const multi_member = sizes.filter((x) => x > 1).length;
|
||||
const multi_member_topic_share = topicCount > 0 ? multi_member / topicCount : 0;
|
||||
|
||||
return {
|
||||
tick_id: latestTickId,
|
||||
rep_count: allReps.length,
|
||||
sliced_rep_count: sliced.length,
|
||||
missing_embed_drops: missingEmbedDrops,
|
||||
score_floor: SCORE_FLOOR,
|
||||
top_n: TOP_N,
|
||||
topic_count: topicCount,
|
||||
multi_member_topics: multi_member,
|
||||
multi_member_topic_share,
|
||||
pair_recall_cluster,
|
||||
false_adjacency,
|
||||
cluster_pairs_evaluated: cluster_total,
|
||||
separate_pairs_evaluated: separate_total,
|
||||
violations,
|
||||
quality_score: pair_recall_cluster * 0.6 + (1 - false_adjacency) * 0.3 + multi_member_topic_share * 0.1,
|
||||
};
|
||||
}
|
||||
|
||||
function renderReport(out) {
|
||||
const L = [];
|
||||
L.push(`# Brief Quality Report — ${out.ctx.rule} on ${out.ctx.date}`);
|
||||
L.push('');
|
||||
L.push(`Active topic threshold: ${out.ctx.threshold} (env DIGEST_DEDUP_TOPIC_THRESHOLD or default 0.45)`);
|
||||
L.push(`Replay records: ${out.ctx.recordCount} across ${out.ctx.tickCount} ticks`);
|
||||
L.push('');
|
||||
if (out.replay?.error) {
|
||||
L.push('## Topic-grouping quality (latest tick)');
|
||||
L.push('');
|
||||
L.push(`⚠️ Could not score: ${out.replay.error}`);
|
||||
L.push('');
|
||||
} else if (out.replay) {
|
||||
L.push('## Topic-grouping quality (latest tick)');
|
||||
L.push('');
|
||||
L.push(`- **quality_score: ${out.replay.quality_score.toFixed(3)}** (target: ↑ over time)`);
|
||||
L.push(`- pair_recall_cluster: ${(out.replay.pair_recall_cluster * 100).toFixed(1)}% (${out.replay.cluster_pairs_evaluated} labeled pairs evaluated)`);
|
||||
L.push(`- false_adjacency: ${(out.replay.false_adjacency * 100).toFixed(1)}% (${out.replay.separate_pairs_evaluated} labeled pairs evaluated)`);
|
||||
L.push(`- multi_member_topic_share: ${(out.replay.multi_member_topic_share * 100).toFixed(1)}% (${out.replay.multi_member_topics}/${out.replay.topic_count} topics)`);
|
||||
L.push(`- topic_count: ${out.replay.topic_count} (from ${out.replay.sliced_rep_count} sliced reps; ${out.replay.rep_count} total in tick; floor=${out.replay.score_floor}, topN=${out.replay.top_n}${out.replay.missing_embed_drops > 0 ? `, ${out.replay.missing_embed_drops} reps dropped on missing embedding` : ''})`);
|
||||
if (out.replay.violations?.length > 0) {
|
||||
L.push('');
|
||||
L.push(' Violations vs labeled pairs:');
|
||||
for (const v of out.replay.violations) {
|
||||
const arrow = v.kind === 'missed_cluster' ? '✗ should-cluster but separate' : '✗ should-separate but clustered';
|
||||
L.push(` ${arrow}: "${v.a.slice(0, 60)}…" ↔ "${v.b.slice(0, 60)}…"`);
|
||||
}
|
||||
}
|
||||
L.push('');
|
||||
}
|
||||
if (out.drops) {
|
||||
L.push('## Production filter-drop telemetry (from stdin)');
|
||||
L.push('');
|
||||
L.push(`- samples: ${out.drops.samples} (shipped=${out.drops.shipped}, rejected=${out.drops.rejected})`);
|
||||
L.push(`- avg in: ${out.drops.avg_in.toFixed(1)} stories/tick`);
|
||||
L.push(`- avg out: ${out.drops.avg_out.toFixed(1)} stories/tick`);
|
||||
L.push(`- **cap_truncation_rate: ${(out.drops.cap_truncation_rate * 100).toFixed(1)}%** (target: ↓ after cap bump)`);
|
||||
L.push('');
|
||||
}
|
||||
L.push('## Interpretation');
|
||||
L.push('');
|
||||
L.push('- Higher `quality_score` and `pair_recall_cluster`, lower `false_adjacency` and `cap_truncation_rate` = better.');
|
||||
L.push('- Run before each config change; compare deltas. If a change moves quality_score down, revert.');
|
||||
L.push('- Add labeled pairs to `scripts/data/brief-adjacency-pairs.json` whenever a brief surfaces an adjacency outcome that\'s clearly right or clearly wrong.');
|
||||
return L.join('\n');
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv);
|
||||
const { url, token } = getRedisCredentials();
|
||||
const replayKey = `${REPLAY_KEY_PREFIX}:${args.rule}:${args.date}`;
|
||||
|
||||
const rawList = await redisLrangeAll(url, token, replayKey);
|
||||
const records = rawList.map((s) => { try { return JSON.parse(s); } catch { return null; } }).filter(Boolean);
|
||||
if (records.length === 0) {
|
||||
console.error(`No replay records at ${replayKey}.`);
|
||||
process.exit(2);
|
||||
}
|
||||
const tickIds = new Set(records.map((r) => r.briefTickId));
|
||||
|
||||
// Load embeddings for the latest tick only (the dashboard only scores
|
||||
// the latest snapshot — earlier ticks are the sweep harness's job).
|
||||
const sortedTickIds = [...tickIds].sort();
|
||||
const latestTickId = sortedTickIds[sortedTickIds.length - 1];
|
||||
const latestRecords = records.filter((r) => r.briefTickId === latestTickId);
|
||||
const reps = latestRecords.filter((r) => r.isRep);
|
||||
const cacheKeys = [...new Set(reps.map((r) => r.embeddingCacheKey).filter(Boolean))];
|
||||
const CHUNK = 50;
|
||||
const embByCacheKey = new Map();
|
||||
for (let i = 0; i < cacheKeys.length; i += CHUNK) {
|
||||
const chunk = cacheKeys.slice(i, i + CHUNK);
|
||||
const vals = await redisMget(url, token, chunk);
|
||||
for (let j = 0; j < chunk.length; j++) {
|
||||
if (typeof vals[j] !== 'string') continue;
|
||||
try { const v = JSON.parse(vals[j]); if (Array.isArray(v)) embByCacheKey.set(chunk[j], v); } catch { /* skip */ }
|
||||
}
|
||||
}
|
||||
const embeddingByHash = new Map();
|
||||
for (const r of reps) {
|
||||
const v = embByCacheKey.get(r.embeddingCacheKey);
|
||||
if (Array.isArray(v)) embeddingByHash.set(r.storyHash, v);
|
||||
}
|
||||
|
||||
// Active threshold: read from latest tickConfig, else default 0.45.
|
||||
const threshold = latestRecords[0]?.tickConfig?.topicThreshold ?? 0.45;
|
||||
const labels = loadLabels();
|
||||
|
||||
// Always call scoreReplay when there are reps. The function itself
|
||||
// filters missing embeddings and returns { error: '…' } if too few
|
||||
// survive (MIN_SURVIVING_REPS guard); renderReport surfaces that
|
||||
// error path with a ⚠️ warning. Gating here on
|
||||
// `embeddingByHash.size === reps.length` was defeating the
|
||||
// intended graceful-degradation behaviour — Greptile P2 on PR #3390.
|
||||
const replay = reps.length > 0
|
||||
? scoreReplay({ records: latestRecords, embeddingByHash, labels, threshold })
|
||||
: null;
|
||||
|
||||
const dropLines = args.dropLinesStdin ? await readStdinDropLines() : [];
|
||||
const drops = dropLines.length > 0 ? summariseDropLines(dropLines) : null;
|
||||
|
||||
const out = {
|
||||
ctx: { rule: args.rule, date: args.date, threshold, recordCount: records.length, tickCount: tickIds.size },
|
||||
replay,
|
||||
drops,
|
||||
};
|
||||
|
||||
if (args.json) {
|
||||
console.log(JSON.stringify(out, null, 2));
|
||||
} else {
|
||||
console.log(renderReport(out));
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(`brief-quality-report: ${err?.stack ?? err?.message ?? String(err)}`);
|
||||
process.exit(1);
|
||||
});
|
||||
102
scripts/data/brief-adjacency-pairs.json
Normal file
@@ -0,0 +1,102 @@
|
||||
{
|
||||
"meta": {
|
||||
"doc": "Labeled adjacency pairs for brief topic-grouping evaluation. Read by scripts/sweep-topic-thresholds.mjs and scripts/brief-quality-report.mjs. Each pair carries two normalized headlines and an expected verdict ('cluster' or 'separate'). The harness matches by normalizeForEmbedding(title) against replay-log records, so titles must match what was actually embedded (apply the same wire-suffix-stripping as scripts/lib/brief-embedding.mjs:normalizeForEmbedding).",
|
||||
"schema_version": 1,
|
||||
"observed_briefs": [
|
||||
"2026-04-24-2001 — first brief that surfaced the adjacency complaint (Iran-Iran-Iran split)",
|
||||
"2026-04-25-0802 — first brief after PR #3387 merged; Iran cluster of size 6 plus 3 split-off Iran subtopics"
|
||||
],
|
||||
"labeling_guidance": [
|
||||
"cluster — these two stories cover the SAME news event or share so much narrative context that a reader would expect them to be adjacent in a daily brief.",
|
||||
"separate — these two stories cover unrelated news events; clustering them would surface as a false adjacency to the reader.",
|
||||
"Aim for a balanced ratio. 1:1 cluster:separate is healthy. Heavy-skew toward 'cluster' inflates recall numbers without measuring false-adjacency.",
|
||||
"Add new pairs whenever a brief surfaces an adjacency outcome that was clearly right or clearly wrong. Treat this file as an append-only labeled corpus."
|
||||
]
|
||||
},
|
||||
"pairs": [
|
||||
{
|
||||
"title_a": "Pentagon chief Hegseth says US blockade on Iran 'going global'",
|
||||
"title_b": "US Navy secretary fired amid Iran blockade",
|
||||
"expected": "cluster",
|
||||
"rationale": "Same event narrative — Hegseth + US Navy + Iran blockade. The 2026-04-24 brief had these adjacent; that adjacency should be preserved.",
|
||||
"source_brief": "2026-04-24-2001"
|
||||
},
|
||||
{
|
||||
"title_a": "Pentagon chief Hegseth says US blockade on Iran 'going global'",
|
||||
"title_b": "US-Iran War Live Updates: Another US Aircraft Carrier Reaches Gulf, Joins 2 Already Operating There",
|
||||
"expected": "cluster",
|
||||
"rationale": "Same news cycle — US military escalation against Iran. Both name US naval/military assets and Iran. The 2026-04-24 brief split them (positions 1 vs 7), driving the original user complaint.",
|
||||
"source_brief": "2026-04-24-2001"
|
||||
},
|
||||
{
|
||||
"title_a": "36 serving military officers to be arraigned for coup plot against President Tinubu",
|
||||
"title_b": "Behind Nigeria's murky coup plot — the money, the prayers and a Nollywood arrest",
|
||||
"expected": "cluster",
|
||||
"rationale": "Same event — Nigeria's coup plot. President Tinubu IS Nigeria. The 2026-04-24 brief split them across positions 4 and 6 with movie junk in between.",
|
||||
"source_brief": "2026-04-24-2001"
|
||||
},
|
||||
{
|
||||
"title_a": "Pentagon chief Hegseth says US blockade on Iran 'going global'",
|
||||
"title_b": "Navy Secretary John Phelan fired amid tensions with Pete Hegseth as Iran war rages",
|
||||
"expected": "cluster",
|
||||
"rationale": "Same actor (Hegseth) + same news cycle (Iran war / US firings). The 2026-04-25 morning brief still split these (positions 1 vs 4) — proves the threshold-too-tight defect persists.",
|
||||
"source_brief": "2026-04-25-0802"
|
||||
},
|
||||
{
|
||||
"title_a": "Israel-Iran war LIVE: Iran's supreme leader Mojtaba Khamenei 'gravely wounded': NYT",
|
||||
"title_b": "Iran war leaves seafarers stranded in the Gulf",
|
||||
"expected": "cluster",
|
||||
"rationale": "Both are Iran-war effect stories. Reasonable expectation that they appear adjacent in a daily brief about the same conflict.",
|
||||
"source_brief": "2026-04-25-0802"
|
||||
},
|
||||
{
|
||||
"title_a": "Iran war leaves seafarers stranded in the Gulf",
|
||||
"title_b": "UAE pupils seek university places after exams scrapped due to Iran war",
|
||||
"expected": "cluster",
|
||||
"rationale": "Both are Iran-war regional-effect stories from Gulf countries. The 2026-04-25 brief split them across positions 8 and 11.",
|
||||
"source_brief": "2026-04-25-0802"
|
||||
},
|
||||
{
|
||||
"title_a": "ECB interest rate dilemma: Eurozone growth stalls as Iran war fuels inflation",
|
||||
"title_b": "Iran war leaves seafarers stranded in the Gulf",
|
||||
"expected": "cluster",
|
||||
"rationale": "Both are Iran-war economic-effect stories. Different geographic angle (Europe vs Gulf) but same root narrative.",
|
||||
"source_brief": "2026-04-25-0802"
|
||||
},
|
||||
{
|
||||
"title_a": "Pentagon chief Hegseth says US blockade on Iran 'going global'",
|
||||
"title_b": "Average of 47 women and girls killed daily between 2023-2025 during Israel's war in Gaza, UN says",
|
||||
"expected": "separate",
|
||||
"rationale": "Different conflicts (US-Iran military vs Israel-Gaza humanitarian). Clustering would create a false adjacency that conflates two distinct news threads.",
|
||||
"source_brief": "2026-04-24-2001"
|
||||
},
|
||||
{
|
||||
"title_a": "Pentagon chief Hegseth says US blockade on Iran 'going global'",
|
||||
"title_b": "Armenia marks 111th anniversary of Armenian genocide in Yerevan",
|
||||
"expected": "separate",
|
||||
"rationale": "Different topics, different geographies, different news cycles. Should NEVER cluster.",
|
||||
"source_brief": "2026-04-24-2001"
|
||||
},
|
||||
{
|
||||
"title_a": "Pentagon chief Hegseth says US blockade on Iran 'going global'",
|
||||
"title_b": "Could the US military handle a monster invasion? Monarch: Legacy of Monsters begs the question",
|
||||
"expected": "separate",
|
||||
"rationale": "Anti-pair. The Apple TV+ show review should never cluster with real geopolitical news. (Separate classifier-hardening problem; included here as a hard-negative to keep the false-adjacency metric honest.)",
|
||||
"source_brief": "2026-04-24-2001"
|
||||
},
|
||||
{
|
||||
"title_a": "Israel-Iran war LIVE: Iran's supreme leader Mojtaba Khamenei 'gravely wounded': NYT",
|
||||
"title_b": "Syrian authorities arrest main suspect in 2013 Tadamon massacre",
|
||||
"expected": "separate",
|
||||
"rationale": "Both Middle East but different countries, different events, different news cycles (Iran-war today vs 12-year-old Syrian war crime).",
|
||||
"source_brief": "2026-04-25-0802"
|
||||
},
|
||||
{
|
||||
"title_a": "Pentagon chief Hegseth says US blockade on Iran 'going global'",
|
||||
"title_b": "How Israel targeted and killed Lebanese journalist Amal Khalil",
|
||||
"expected": "separate",
|
||||
"rationale": "Different conflicts, different perpetrators, different victims. Should NOT cluster even though both are Middle East / military.",
|
||||
"source_brief": "2026-04-25-0802"
|
||||
}
|
||||
]
|
||||
}
|
||||
429
scripts/import-gem-pipelines.mjs
Normal file
@@ -0,0 +1,429 @@
|
||||
// @ts-check
|
||||
//
|
||||
// One-shot import: GEM Oil & Gas Infrastructure Trackers (CC-BY 4.0) →
|
||||
// scripts/data/pipelines-{gas,oil}.json shape.
|
||||
//
|
||||
// PROVENANCE / OPERATOR-MEDIATED:
|
||||
// This script is INTENTIONALLY local-file-only — it does NOT fetch GEM at
|
||||
// runtime. The GEM download URL changes per release; a hardcoded URL would
|
||||
// silently fetch a different version than the one we attribute. The
|
||||
// operator runs:
|
||||
//
|
||||
// 1. Visit https://globalenergymonitor.org/projects/global-oil-gas-infrastructure-tracker/
|
||||
// (registration required for direct download even though the data
|
||||
// itself is CC-BY 4.0 licensed).
|
||||
// 2. Download the latest gas + oil tracker Excel workbooks.
|
||||
// 3. Pre-convert each workbook's primary sheet to JSON (Numbers /
|
||||
// pandas / csvkit / equivalent) using the canonical column names
|
||||
// documented in REQUIRED_COLUMNS below. Country names should be
|
||||
// pre-mapped to ISO 3166-1 alpha-2 codes during conversion.
|
||||
// 4. Save the JSON to a local path and run this script with:
|
||||
// GEM_PIPELINES_FILE=/path/to/gem.json node scripts/import-gem-pipelines.mjs --merge
|
||||
// 5. Record the GEM release date + download URL + file SHA256 in the
|
||||
// commit message and docs/methodology/pipelines.mdx, per the
|
||||
// seed-imf-external.mjs provenance pattern.
|
||||
//
|
||||
// EXECUTION MODES:
|
||||
// --print-candidates : parse + print candidates as JSON to stdout (dry run)
|
||||
// --merge : parse, dedupe against existing pipelines-{gas,oil}.json,
|
||||
// write merged JSON to disk, abort on validate failure
|
||||
//
|
||||
// NO xlsx DEPENDENCY: the operator pre-converts externally; this keeps the
|
||||
// runtime dependency surface tight and avoids the known CVE history of the
|
||||
// xlsx package for a quarterly one-shot operation.
|
||||
|
||||
import { readFileSync, writeFileSync } from 'node:fs';
|
||||
import { dirname, resolve as resolvePath } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { dedupePipelines } from './_pipeline-dedup.mjs';
|
||||
import { validateRegistry } from './_pipeline-registry.mjs';
|
||||
|
||||
/**
|
||||
* Canonical input columns. The operator's Excel-to-JSON conversion must
|
||||
* preserve these EXACT key names for each row in `pipelines[]`. Schema-drift
|
||||
* sentinel below throws on missing keys before any data is emitted.
|
||||
*/
|
||||
export const REQUIRED_COLUMNS = [
|
||||
'name',
|
||||
'operator',
|
||||
'fuel', // 'Natural Gas' | 'Oil'
|
||||
'fromCountry', // ISO 3166-1 alpha-2
|
||||
'toCountry', // ISO 3166-1 alpha-2
|
||||
'transitCountries', // string[] (may be empty)
|
||||
'capacity',
|
||||
'capacityUnit', // 'bcm/y' | 'bbl/d' | 'Mbd'
|
||||
'lengthKm',
|
||||
'status', // GEM Status string (mapped below)
|
||||
'startLat',
|
||||
'startLon',
|
||||
'endLat',
|
||||
'endLon',
|
||||
];
|
||||
|
||||
/**
|
||||
* Maps GEM status strings to our `physicalState` enum.
|
||||
* Default: 'unknown' — falls into the "treat as not commissioned" bucket.
|
||||
*/
|
||||
const STATUS_MAP = {
|
||||
Operating: 'flowing',
|
||||
Operational: 'flowing',
|
||||
Construction: 'unknown',
|
||||
Proposed: 'unknown',
|
||||
Cancelled: 'offline',
|
||||
Mothballed: 'offline',
|
||||
Idle: 'offline',
|
||||
'Shut-in': 'offline',
|
||||
};
|
||||
|
||||
/**
|
||||
* Maps GEM `product` field to our `productClass` enum (oil only).
|
||||
*/
|
||||
const PRODUCT_CLASS_MAP = {
|
||||
'Crude Oil': 'crude',
|
||||
Crude: 'crude',
|
||||
'Refined Products': 'products',
|
||||
'Petroleum Products': 'products',
|
||||
Products: 'products',
|
||||
Mixed: 'mixed',
|
||||
'Crude/Products': 'mixed',
|
||||
};
|
||||
|
||||
const VALID_LAT = (v) => Number.isFinite(v) && v >= -90 && v <= 90;
|
||||
const VALID_LON = (v) => Number.isFinite(v) && v >= -180 && v <= 180;
|
||||
|
||||
function slugify(name, country) {
|
||||
const base = name.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-+|-+$/g, '')
|
||||
.slice(0, 60);
|
||||
return `${base}-${country.toLowerCase()}`;
|
||||
}
|
||||
|
||||
function inferFuel(row) {
|
||||
const f = String(row.fuel ?? '').toLowerCase();
|
||||
if (f.includes('gas')) return 'gas';
|
||||
if (f.includes('oil') || f.includes('crude') || f.includes('petroleum')) return 'oil';
|
||||
return null;
|
||||
}
|
||||
|
||||
function mapStatus(gemStatus) {
|
||||
return STATUS_MAP[gemStatus] ?? 'unknown';
|
||||
}
|
||||
|
||||
function mapProductClass(rawProduct) {
|
||||
if (!rawProduct) return 'crude'; // conservative default per plan U2
|
||||
const cls = PRODUCT_CLASS_MAP[rawProduct];
|
||||
if (cls) return cls;
|
||||
// Best-effort substring match for Excel column variations
|
||||
const lower = rawProduct.toLowerCase();
|
||||
if (lower.includes('crude') && lower.includes('product')) return 'mixed';
|
||||
if (lower.includes('crude')) return 'crude';
|
||||
if (lower.includes('product') || lower.includes('refined')) return 'products';
|
||||
return 'crude';
|
||||
}
|
||||
|
||||
function convertCapacityToBcmYr(value, unit) {
|
||||
if (unit === 'bcm/y' || unit === 'bcm/yr') return Number(value);
|
||||
// Future: add bcf/d → bcm/y conversion if needed. Throw loudly so the
|
||||
// operator notices instead of silently writing zeros.
|
||||
throw new Error(`Unsupported gas capacity unit: ${unit}. Expected 'bcm/y'.`);
|
||||
}
|
||||
|
||||
function convertCapacityToMbd(value, unit) {
|
||||
// Schema convention: capacityMbd is in MILLION barrels per day (e.g. CPC
|
||||
// pipeline = 1.4 Mbd = 1.4M bbl/day). So conversions:
|
||||
// 'Mbd' → preserved
|
||||
// 'bbl/d' → divide by 1_000_000
|
||||
// 'kbd' → divide by 1_000 (rare)
|
||||
if (unit === 'Mbd') return Number(value);
|
||||
if (unit === 'bbl/d') return Number(value) / 1_000_000;
|
||||
if (unit === 'kbd') return Number(value) / 1_000;
|
||||
throw new Error(`Unsupported oil capacity unit: ${unit}. Expected 'Mbd' / 'bbl/d' / 'kbd'.`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve `lastEvidenceUpdate` for emitted candidates. Prefers the
|
||||
* operator-recorded `downloadedAt` (or `sourceVersion` if it parses) so
|
||||
* two parser runs on the same input produce byte-identical output.
|
||||
* Falls back to the unix-epoch sentinel `1970-01-01` rather than
|
||||
* `new Date()` — the fallback is deliberately ugly so anyone reviewing
|
||||
* the data file sees that the operator forgot to set the date and re-runs.
|
||||
*
|
||||
* @param {Record<string, unknown>} envelope
|
||||
*/
|
||||
function resolveEvidenceTimestamp(envelope) {
|
||||
const candidates = [envelope.downloadedAt, envelope.sourceVersion];
|
||||
for (const v of candidates) {
|
||||
if (typeof v === 'string') {
|
||||
// Accept full ISO strings OR bare YYYY-MM-DD; coerce to midnight-UTC.
|
||||
const isoMatch = v.match(/^\d{4}-\d{2}-\d{2}/);
|
||||
if (isoMatch) return `${isoMatch[0]}T00:00:00Z`;
|
||||
}
|
||||
}
|
||||
// Sentinel: GEM data SHOULD always carry downloadedAt per the operator
|
||||
// runbook. If neither field is present, surface the gap loudly via the
|
||||
// epoch date — it'll show up obviously in the diff.
|
||||
return '1970-01-01T00:00:00Z';
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a GEM-shape JSON object into our two-registry candidate arrays.
|
||||
*
|
||||
* @param {unknown} data
|
||||
* @returns {{ gas: any[], oil: any[] }}
|
||||
* @throws {Error} on schema drift, malformed input, or unknown capacity units.
|
||||
*/
|
||||
export function parseGemPipelines(data) {
|
||||
if (!data || typeof data !== 'object' || Array.isArray(data)) {
|
||||
throw new Error('parseGemPipelines: input must be an object');
|
||||
}
|
||||
const obj = /** @type {Record<string, unknown>} */ (data);
|
||||
if (!Array.isArray(obj.pipelines)) {
|
||||
throw new Error('parseGemPipelines: input must contain pipelines[] array');
|
||||
}
|
||||
// Compute once per parse run so every emitted candidate gets the SAME
|
||||
// timestamp — and so two runs on identical input produce byte-identical
|
||||
// JSON (Greptile P2 on PR #3397: previous use of `new Date().toISOString()`
|
||||
// made re-running the parser produce a noisy diff every time).
|
||||
const evidenceTimestamp = resolveEvidenceTimestamp(obj);
|
||||
|
||||
// Schema sentinel: assert every required column is present on every row.
|
||||
// GEM occasionally renames columns between releases; the operator's
|
||||
// conversion step is supposed to normalize, but we double-check here so
|
||||
// a missed rename fails loud instead of producing silent zero-data.
|
||||
for (const [i, row] of obj.pipelines.entries()) {
|
||||
if (!row || typeof row !== 'object') {
|
||||
throw new Error(`parseGemPipelines: pipelines[${i}] is not an object`);
|
||||
}
|
||||
const r = /** @type {Record<string, unknown>} */ (row);
|
||||
for (const col of REQUIRED_COLUMNS) {
|
||||
if (!(col in r)) {
|
||||
throw new Error(
|
||||
`parseGemPipelines: schema drift — pipelines[${i}] missing column "${col}". ` +
|
||||
`Re-run the operator's Excel→JSON conversion using the canonical ` +
|
||||
`column names documented in scripts/import-gem-pipelines.mjs::REQUIRED_COLUMNS.`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const gas = [];
|
||||
const oil = [];
|
||||
const droppedReasons = { fuel: 0, coords: 0, capacity: 0 };
|
||||
|
||||
for (const row of obj.pipelines) {
|
||||
const r = /** @type {Record<string, any>} */ (row);
|
||||
const fuel = inferFuel(r);
|
||||
if (!fuel) {
|
||||
droppedReasons.fuel++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const startLat = Number(r.startLat);
|
||||
const startLon = Number(r.startLon);
|
||||
const endLat = Number(r.endLat);
|
||||
const endLon = Number(r.endLon);
|
||||
if (!VALID_LAT(startLat) || !VALID_LON(startLon) || !VALID_LAT(endLat) || !VALID_LON(endLon)) {
|
||||
droppedReasons.coords++;
|
||||
continue;
|
||||
}
|
||||
|
||||
let capacityField, capacityValue;
|
||||
try {
|
||||
if (fuel === 'gas') {
|
||||
capacityField = 'capacityBcmYr';
|
||||
capacityValue = convertCapacityToBcmYr(r.capacity, r.capacityUnit);
|
||||
} else {
|
||||
capacityField = 'capacityMbd';
|
||||
capacityValue = convertCapacityToMbd(r.capacity, r.capacityUnit);
|
||||
}
|
||||
} catch (err) {
|
||||
// Unsupported unit → drop the row; let the operator notice via the count
|
||||
// delta in dry-run output. Throwing would abort the entire run on a
|
||||
// single bad row, which is too brittle.
|
||||
droppedReasons.capacity++;
|
||||
continue;
|
||||
}
|
||||
if (!Number.isFinite(capacityValue) || capacityValue <= 0) {
|
||||
droppedReasons.capacity++;
|
||||
continue;
|
||||
}
|
||||
|
||||
const id = slugify(r.name, r.fromCountry);
|
||||
const transitCountries = Array.isArray(r.transitCountries)
|
||||
? r.transitCountries.filter((c) => typeof c === 'string')
|
||||
: [];
|
||||
|
||||
const candidate = {
|
||||
id,
|
||||
name: r.name,
|
||||
operator: r.operator,
|
||||
commodityType: fuel,
|
||||
fromCountry: r.fromCountry,
|
||||
toCountry: r.toCountry,
|
||||
transitCountries,
|
||||
[capacityField]: capacityValue,
|
||||
lengthKm: Number(r.lengthKm) || 0,
|
||||
inService: Number(r.startYear) || 0,
|
||||
startPoint: { lat: startLat, lon: startLon },
|
||||
endPoint: { lat: endLat, lon: endLon },
|
||||
evidence: {
|
||||
physicalState: mapStatus(r.status),
|
||||
physicalStateSource: 'gem',
|
||||
operatorStatement: null,
|
||||
commercialState: 'unknown',
|
||||
sanctionRefs: [],
|
||||
lastEvidenceUpdate: evidenceTimestamp,
|
||||
classifierVersion: 'gem-import-v1',
|
||||
classifierConfidence: 0.4,
|
||||
},
|
||||
};
|
||||
|
||||
if (fuel === 'oil') {
|
||||
candidate.productClass = mapProductClass(r.product);
|
||||
}
|
||||
|
||||
(fuel === 'gas' ? gas : oil).push(candidate);
|
||||
}
|
||||
|
||||
return { gas, oil };
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a GEM-shape JSON file and return parsed candidates. Returns the same
|
||||
* shape as parseGemPipelines but accepts a file path instead of an in-memory
|
||||
* object — useful for CLI and dedup pipelines.
|
||||
*
|
||||
* @param {string} filePath
|
||||
* @returns {{ gas: any[], oil: any[] }}
|
||||
*/
|
||||
export function loadGemPipelinesFromFile(filePath) {
|
||||
const raw = readFileSync(filePath, 'utf-8');
|
||||
let data;
|
||||
try {
|
||||
data = JSON.parse(raw);
|
||||
} catch (err) {
|
||||
throw new Error(
|
||||
`parseGemPipelines: file at ${filePath} is not valid JSON. ` +
|
||||
`Did the operator pre-convert the GEM Excel correctly?`,
|
||||
);
|
||||
}
|
||||
return parseGemPipelines(data);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read an existing registry file and return its parsed envelope.
|
||||
* @param {string} filename
|
||||
*/
|
||||
function loadExistingRegistry(filename) {
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const path = resolvePath(__dirname, 'data', filename);
|
||||
const raw = readFileSync(path, 'utf-8');
|
||||
return { path, envelope: JSON.parse(raw) };
|
||||
}
|
||||
|
||||
/**
|
||||
* Build (but do NOT write) a merged registry envelope. Pure: no disk I/O.
|
||||
* Throws on validation failure so the caller can short-circuit before any
|
||||
* file is written.
|
||||
*
|
||||
* @param {string} filename - 'pipelines-gas.json' or 'pipelines-oil.json'
|
||||
* @param {any[]} candidates - parser output for that fuel
|
||||
* @returns {{ path: string, mergedEnvelope: any, added: number, skipped: number, total: number }}
|
||||
*/
|
||||
function prepareMerge(filename, candidates) {
|
||||
const { path, envelope } = loadExistingRegistry(filename);
|
||||
const existing = Object.values(envelope.pipelines ?? {});
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
|
||||
// Append in a stable order (alphabetical-by-id) so repeated runs produce
|
||||
// a clean diff. Hand-curated rows keep their original ordering at the top.
|
||||
const appended = [...toAdd].sort((a, b) => a.id.localeCompare(b.id));
|
||||
const mergedPipelines = { ...envelope.pipelines };
|
||||
for (const p of appended) mergedPipelines[p.id] = p;
|
||||
|
||||
const mergedEnvelope = {
|
||||
...envelope,
|
||||
source: envelope.source?.includes('Global Energy Monitor')
|
||||
? envelope.source
|
||||
: `${envelope.source ?? 'Hand-curated'} + Global Energy Monitor (CC-BY 4.0)`,
|
||||
pipelines: mergedPipelines,
|
||||
};
|
||||
|
||||
if (!validateRegistry(mergedEnvelope)) {
|
||||
throw new Error(
|
||||
`prepareMerge: merged ${filename} would FAIL validateRegistry. ` +
|
||||
`Aborting before writing to disk. Inspect the diff with --print-candidates first.`,
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
path,
|
||||
mergedEnvelope,
|
||||
added: toAdd.length,
|
||||
skipped: skippedDuplicates.length,
|
||||
total: Object.keys(mergedPipelines).length,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Cross-file-atomic merge: builds AND validates BOTH gas + oil envelopes
|
||||
* before writing EITHER file. If oil validation fails after gas already
|
||||
* succeeded, neither is written — prevents the half-imported state where
|
||||
* gas has GEM rows on disk but oil doesn't.
|
||||
*
|
||||
* Two-phase: prepare both → write both. Pure prepare phase, side-effecting
|
||||
* write phase. Order of writes is stable (gas first, oil second), but the
|
||||
* "validate everything before any write" guarantee is what prevents
|
||||
* partial state on failure.
|
||||
*
|
||||
* @returns {{ gas: ReturnType<typeof prepareMerge>, oil: ReturnType<typeof prepareMerge> }}
|
||||
*/
|
||||
function mergeBothRegistries(gasCandidates, oilCandidates) {
|
||||
// Phase 1: prepare + validate BOTH. If either throws, neither file is
|
||||
// touched on disk.
|
||||
const gas = prepareMerge('pipelines-gas.json', gasCandidates);
|
||||
const oil = prepareMerge('pipelines-oil.json', oilCandidates);
|
||||
|
||||
// Phase 2: both validated → write both.
|
||||
writeFileSync(gas.path, JSON.stringify(gas.mergedEnvelope, null, 2) + '\n');
|
||||
writeFileSync(oil.path, JSON.stringify(oil.mergedEnvelope, null, 2) + '\n');
|
||||
|
||||
return { gas, oil };
|
||||
}
|
||||
|
||||
// CLI entry point: only fires when this file is the entry script.
|
||||
if (process.argv[1] && process.argv[1].endsWith('import-gem-pipelines.mjs')) {
|
||||
const filePath = process.env.GEM_PIPELINES_FILE;
|
||||
if (!filePath) {
|
||||
console.error('GEM_PIPELINES_FILE env var not set. See script header for operator runbook.');
|
||||
process.exit(1);
|
||||
}
|
||||
const args = new Set(process.argv.slice(2));
|
||||
const { gas, oil } = loadGemPipelinesFromFile(filePath);
|
||||
if (args.has('--print-candidates')) {
|
||||
process.stdout.write(JSON.stringify({ gas, oil }, null, 2) + '\n');
|
||||
} else if (args.has('--merge')) {
|
||||
try {
|
||||
// mergeBothRegistries validates BOTH envelopes before writing
|
||||
// either — so a validation failure on oil after gas succeeded
|
||||
// leaves neither file modified on disk. Prevents the half-imported
|
||||
// state the previous per-file flow could produce.
|
||||
const { gas: gasResult, oil: oilResult } = mergeBothRegistries(gas, oil);
|
||||
console.error(`gas: +${gasResult.added} added, ${gasResult.skipped} duplicates skipped, ${gasResult.total} total`);
|
||||
console.error(`oil: +${oilResult.added} added, ${oilResult.skipped} duplicates skipped, ${oilResult.total} total`);
|
||||
console.error(
|
||||
`Wrote merged data to scripts/data/pipelines-{gas,oil}.json. ` +
|
||||
`Inspect the diff before committing. Per the operator runbook, ` +
|
||||
`also update MIN_PIPELINES_PER_REGISTRY in scripts/_pipeline-registry.mjs ` +
|
||||
`to a sensible new floor (e.g. 200) once the data is in.`,
|
||||
);
|
||||
} catch (err) {
|
||||
console.error(err instanceof Error ? err.message : String(err));
|
||||
process.exit(2);
|
||||
}
|
||||
} else {
|
||||
console.error('Pass --print-candidates (dry run) or --merge (write to data files).');
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
@@ -28,7 +28,12 @@ import {
|
||||
|
||||
const SENSITIVITY_RANK = { all: 0, high: 1, critical: 2 };
|
||||
|
||||
function compareRules(a, b) {
|
||||
// Exported so the cron orchestration's two-pass winner walk
|
||||
// (sortedDue / sortedAll) can sort each pass identically to how
|
||||
// `groupEligibleRulesByUser` already orders candidates here. Kept as
|
||||
// a same-shape function so callers can reuse it without re-deriving
|
||||
// the priority key.
|
||||
export function compareRules(a, b) {
|
||||
const aFull = a.variant === 'full' ? 0 : 1;
|
||||
const bFull = b.variant === 'full' ? 0 : 1;
|
||||
if (aFull !== bFull) return aFull - bFull;
|
||||
@@ -151,7 +156,36 @@ export function userDisplayNameFromId(userId) {
|
||||
|
||||
// ── Compose a full brief for a single rule ──────────────────────────────────
|
||||
|
||||
const MAX_STORIES_PER_USER = 12;
|
||||
// Cap on stories shown per user per brief.
|
||||
//
|
||||
// Default 12 — kept at the historical value because the offline sweep
|
||||
// harness (scripts/sweep-topic-thresholds.mjs) showed bumping the cap
|
||||
// to 16 against 2026-04-24 production replay data DROPPED visible
|
||||
// quality at the active 0.45 threshold (visible_quality 0.916 → 0.716;
|
||||
// positions 13-16 are mostly singletons or members of "should-separate"
|
||||
// clusters at this threshold, so they dilute without helping adjacency).
|
||||
//
|
||||
// Env-tunable via DIGEST_MAX_STORIES_PER_USER so future sweep evidence
|
||||
// (different threshold, different label set, different pool composition)
|
||||
// can be acted on with a Railway env flip without a redeploy. Any
|
||||
// invalid / non-positive value falls back to the 12 default.
|
||||
//
|
||||
// "Are we getting better" signal: re-run scripts/sweep-topic-thresholds.mjs
|
||||
// with --cap N before flipping the env, and the daily
|
||||
// scripts/brief-quality-report.mjs after.
|
||||
function readMaxStoriesPerUser() {
|
||||
const raw = process.env.DIGEST_MAX_STORIES_PER_USER;
|
||||
if (raw == null || raw === '') return 12;
|
||||
const n = Number.parseInt(raw, 10);
|
||||
return Number.isFinite(n) && n > 0 ? n : 12;
|
||||
}
|
||||
// Exported so brief-llm.mjs (buildDigestPrompt + hashDigestInput) can
|
||||
// slice to the same cap. Hard-coding `slice(0, 12)` there would mean
|
||||
// the LLM prose only references the first 12 stories even when the
|
||||
// brief envelope carries more — a quiet mismatch between what the
|
||||
// reader sees as story cards vs the AI summary above them. Reviewer
|
||||
// P1 on PR #3389.
|
||||
export const MAX_STORIES_PER_USER = readMaxStoriesPerUser();
|
||||
|
||||
/**
|
||||
* Filter + assemble a BriefEnvelope for one alert rule from a
|
||||
@@ -268,6 +302,17 @@ function digestStoryToUpstreamTopStory(s) {
|
||||
// to 'General' / 'Global' via filterTopStories defaults.
|
||||
category: typeof s?.category === 'string' ? s.category : undefined,
|
||||
countryCode: typeof s?.countryCode === 'string' ? s.countryCode : undefined,
|
||||
// Stable digest story hash. Carried through so:
|
||||
// (a) the canonical synthesis prompt can emit `rankedStoryHashes`
|
||||
// referencing each story by hash (not position, not title),
|
||||
// (b) `filterTopStories` can re-order the pool by ranking BEFORE
|
||||
// applying the MAX_STORIES_PER_USER cap, so the model's
|
||||
// editorial judgment of importance survives the cap.
|
||||
// Falls back to titleHash when the digest path didn't materialise
|
||||
// a primary `hash` (rare; shape varies across producer versions).
|
||||
hash: typeof s?.hash === 'string' && s.hash.length > 0
|
||||
? s.hash
|
||||
: (typeof s?.titleHash === 'string' ? s.titleHash : undefined),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -279,15 +324,37 @@ function digestStoryToUpstreamTopStory(s) {
|
||||
* Returns null when no story survives the sensitivity filter — caller
|
||||
* falls back to another variant or skips the user.
|
||||
*
|
||||
* Pure / synchronous. The cron orchestration layer pre-resolves the
|
||||
* canonical synthesis (`exec` from `generateDigestProse`) and the
|
||||
* non-personalised `publicLead` (`generateDigestProsePublic`) and
|
||||
* passes them in via `opts.synthesis` — this module performs no LLM
|
||||
* I/O.
|
||||
*
|
||||
* @param {object} rule — enabled alertRule row
|
||||
* @param {unknown[]} digestStories — output of buildDigest(rule, windowStart)
|
||||
* @param {{ clusters: number; multiSource: number }} insightsNumbers
|
||||
* @param {{ nowMs?: number, onDrop?: import('../../shared/brief-filter.js').DropMetricsFn }} [opts]
|
||||
* @param {{
|
||||
* nowMs?: number,
|
||||
* onDrop?: import('../../shared/brief-filter.js').DropMetricsFn,
|
||||
* synthesis?: {
|
||||
* lead?: string,
|
||||
* threads?: Array<{ tag: string, teaser: string }>,
|
||||
* signals?: string[],
|
||||
* rankedStoryHashes?: string[],
|
||||
* publicLead?: string,
|
||||
* publicSignals?: string[],
|
||||
* publicThreads?: Array<{ tag: string, teaser: string }>,
|
||||
* },
|
||||
* }} [opts]
|
||||
* `onDrop` is forwarded to filterTopStories so the seeder can
|
||||
* aggregate per-user filter-drop counts without this module knowing
|
||||
* how they are reported.
|
||||
* `synthesis` (when provided) substitutes envelope.digest.lead /
|
||||
* threads / signals / publicLead with the canonical synthesis from
|
||||
* the orchestration layer, and re-orders the candidate pool by
|
||||
* `synthesis.rankedStoryHashes` before applying the cap.
|
||||
*/
|
||||
export function composeBriefFromDigestStories(rule, digestStories, insightsNumbers, { nowMs = Date.now(), onDrop } = {}) {
|
||||
export function composeBriefFromDigestStories(rule, digestStories, insightsNumbers, { nowMs = Date.now(), onDrop, synthesis } = {}) {
|
||||
if (!Array.isArray(digestStories) || digestStories.length === 0) return null;
|
||||
// Default to 'high' (NOT 'all') for undefined sensitivity, aligning
|
||||
// with buildDigest at scripts/seed-digest-notifications.mjs:392 and
|
||||
@@ -306,10 +373,11 @@ export function composeBriefFromDigestStories(rule, digestStories, insightsNumbe
|
||||
sensitivity,
|
||||
maxStories: MAX_STORIES_PER_USER,
|
||||
onDrop,
|
||||
rankedStoryHashes: synthesis?.rankedStoryHashes,
|
||||
});
|
||||
if (stories.length === 0) return null;
|
||||
const issueDate = issueDateInTz(nowMs, tz);
|
||||
return assembleStubbedBriefEnvelope({
|
||||
const envelope = assembleStubbedBriefEnvelope({
|
||||
user: { name: userDisplayNameFromId(rule.userId), tz },
|
||||
stories,
|
||||
issueDate,
|
||||
@@ -319,4 +387,35 @@ export function composeBriefFromDigestStories(rule, digestStories, insightsNumbe
|
||||
issuedAt: nowMs,
|
||||
localHour: localHourInTz(nowMs, tz),
|
||||
});
|
||||
// Splice canonical synthesis into the envelope's digest. Done as a
|
||||
// shallow merge so the assembleStubbedBriefEnvelope path stays the
|
||||
// single source for greeting/numbers/threads-default. We only
|
||||
// override the LLM-driven fields when the orchestrator supplied
|
||||
// them; missing fields fall back to the stub for graceful
|
||||
// degradation when synthesis fails.
|
||||
if (synthesis && envelope?.data?.digest) {
|
||||
if (typeof synthesis.lead === 'string' && synthesis.lead.length > 0) {
|
||||
envelope.data.digest.lead = synthesis.lead;
|
||||
}
|
||||
if (Array.isArray(synthesis.threads) && synthesis.threads.length > 0) {
|
||||
envelope.data.digest.threads = synthesis.threads;
|
||||
}
|
||||
if (Array.isArray(synthesis.signals)) {
|
||||
envelope.data.digest.signals = synthesis.signals;
|
||||
}
|
||||
if (typeof synthesis.publicLead === 'string' && synthesis.publicLead.length > 0) {
|
||||
envelope.data.digest.publicLead = synthesis.publicLead;
|
||||
}
|
||||
// Public signals/threads are non-personalised siblings produced by
|
||||
// generateDigestProsePublic. Captured separately from the
|
||||
// personalised signals/threads above so the share-URL renderer
|
||||
// never has to choose between leaking and omitting a whole page.
|
||||
if (Array.isArray(synthesis.publicSignals) && synthesis.publicSignals.length > 0) {
|
||||
envelope.data.digest.publicSignals = synthesis.publicSignals;
|
||||
}
|
||||
if (Array.isArray(synthesis.publicThreads) && synthesis.publicThreads.length > 0) {
|
||||
envelope.data.digest.publicThreads = synthesis.publicThreads;
|
||||
}
|
||||
}
|
||||
return envelope;
|
||||
}
|
||||
|
||||
@@ -15,15 +15,21 @@
|
||||
// through to the original stub — the brief must always ship.
|
||||
//
|
||||
// Cache semantics:
|
||||
// - brief:llm:whymatters:v1:{storyHash} — 24h, shared across users.
|
||||
// whyMatters is editorial global-stakes commentary, not user
|
||||
// personalisation, so per-story caching collapses N×U LLM calls
|
||||
// to N.
|
||||
// - brief:llm:digest:v1:{userId}:{poolHash} — 4h, per user.
|
||||
// The executive summary IS personalised to a user's sensitivity
|
||||
// and surfaced story pool, so cache keys include a hash of both.
|
||||
// 4h balances cost vs freshness — hourly cron pays at most once
|
||||
// per 4 ticks per user.
|
||||
// - brief:llm:whymatters:v3:{storyHash}:{leadHash} — 24h, shared
|
||||
// across users for the same (story, lead) pair. v3 includes
|
||||
// SHA-256 of the resolved digest lead so per-story rationales
|
||||
// re-generate when the lead changes (rationales must align with
|
||||
// the headline frame). v2 rows were lead-blind and could drift.
|
||||
// - brief:llm:digest:v3:{userId|public}:{sensitivity}:{poolHash}
|
||||
// — 4h. The canonical synthesis is now ALWAYS produced through
|
||||
// this path (formerly split with `generateAISummary` in the
|
||||
// digest cron). Material includes profile-SHA, greeting bucket,
|
||||
// isPublic flag, and per-story hash so cache hits never serve a
|
||||
// differently-ranked or differently-personalised prompt.
|
||||
// When isPublic=true, the userId slot in the key is the literal
|
||||
// string 'public' so all public-share readers of the same
|
||||
// (date, sensitivity, story-pool) hit the same row — no PII in
|
||||
// the public cache key. v2 rows ignored on rollout.
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
|
||||
@@ -34,6 +40,10 @@ import {
|
||||
parseWhyMatters,
|
||||
} from '../../shared/brief-llm-core.js';
|
||||
import { sanitizeForPrompt } from '../../server/_shared/llm-sanitize.js';
|
||||
// Single source of truth for the brief story cap. Both buildDigestPrompt
|
||||
// and hashDigestInput must slice to this value or the LLM prose drifts
|
||||
// from the rendered story cards (PR #3389 reviewer P1).
|
||||
import { MAX_STORIES_PER_USER } from './brief-compose.mjs';
|
||||
|
||||
/**
|
||||
* Sanitize the story fields that flow into buildWhyMattersUserPrompt and
|
||||
@@ -299,43 +309,122 @@ export async function generateStoryDescription(story, deps) {
|
||||
return parsed;
|
||||
}
|
||||
|
||||
// ── Digest prose (per user) ────────────────────────────────────────────────
|
||||
// ── Digest prose (canonical synthesis) ─────────────────────────────────────
|
||||
//
|
||||
// This is the single LLM call that produces the brief's executive summary.
|
||||
// All channels (email HTML, plain-text, Telegram, Slack, Discord, webhook)
|
||||
// AND the magazine's `digest.lead` read the same string from this output.
|
||||
// The cron orchestration layer also produces a separate non-personalised
|
||||
// `publicLead` via `generateDigestProsePublic` for the share-URL surface.
|
||||
|
||||
const DIGEST_PROSE_SYSTEM =
|
||||
const DIGEST_PROSE_SYSTEM_BASE =
|
||||
'You are the chief editor of WorldMonitor Brief. Given a ranked list of ' +
|
||||
"today's top stories for a reader, produce EXACTLY this JSON and nothing " +
|
||||
'else (no markdown, no code fences, no preamble):\n' +
|
||||
'{\n' +
|
||||
' "lead": "<2–3 sentence executive summary, editorial tone, references ' +
|
||||
'the most important 1–2 threads, addresses the reader in the third person>",\n' +
|
||||
'the most important 1–2 threads, addresses the reader directly>",\n' +
|
||||
' "threads": [\n' +
|
||||
' { "tag": "<one-word editorial category e.g. Energy, Diplomacy, Climate>", ' +
|
||||
'"teaser": "<one sentence describing what is developing>" }\n' +
|
||||
' ],\n' +
|
||||
' "signals": ["<forward-looking imperative phrase, <=14 words>"]\n' +
|
||||
' "signals": ["<forward-looking imperative phrase, <=14 words>"],\n' +
|
||||
' "rankedStoryHashes": ["<short hash from the [h:XXXX] prefix of the most ' +
|
||||
'important story>", "..."]\n' +
|
||||
'}\n' +
|
||||
'Threads: 3–6 items reflecting actual clusters in the stories. ' +
|
||||
'Signals: 2–4 items, forward-looking.';
|
||||
'Signals: 2–4 items, forward-looking. ' +
|
||||
'rankedStoryHashes: at least the top 3 stories by editorial importance, ' +
|
||||
'using the short hash from each story line (the value inside [h:...]). ' +
|
||||
'Lead with the single most impactful development. Lead under 250 words.';
|
||||
|
||||
/**
|
||||
* @param {Array<{ headline: string; threatLevel: string; category: string; country: string; source: string }>} stories
|
||||
* Compute a coarse greeting bucket for cache-key stability.
|
||||
* Greeting strings can vary in punctuation/capitalisation across
|
||||
* locales; the bucket collapses them to one of three slots so the
|
||||
* cache key only changes when the time-of-day window changes.
|
||||
*
|
||||
* Unrecognised greetings (locale-specific phrases the keyword
|
||||
* heuristic doesn't match, empty strings after locale changes,
|
||||
* non-string inputs) collapse to the literal `''` slot. This is
|
||||
* INTENTIONAL — it's a stable fourth bucket, not a sentinel for
|
||||
* "missing data". A user whose greeting flips between a recognised
|
||||
* value (e.g. "Good morning") and an unrecognised one (e.g. a
|
||||
* locale-specific phrase) will get different cache keys, which is
|
||||
* correct: those produce visibly different leads. Greptile P2 on
|
||||
* PR #3396 raised the visibility, kept the behaviour.
|
||||
*
|
||||
* @param {string|null|undefined} greeting
|
||||
* @returns {'morning' | 'afternoon' | 'evening' | ''}
|
||||
*/
|
||||
export function greetingBucket(greeting) {
|
||||
if (typeof greeting !== 'string') return '';
|
||||
const g = greeting.toLowerCase();
|
||||
if (g.includes('morning')) return 'morning';
|
||||
if (g.includes('afternoon')) return 'afternoon';
|
||||
if (g.includes('evening') || g.includes('night')) return 'evening';
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {object} DigestPromptCtx
|
||||
* @property {string|null} [profile] formatted user profile lines, or null for non-personalised
|
||||
* @property {string|null} [greeting] e.g. "Good morning", or null for non-personalised
|
||||
* @property {boolean} [isPublic] true = strip personalisation, build a generic lead
|
||||
*/
|
||||
|
||||
/**
|
||||
* Build the digest-prose prompt. When `ctx.profile` / `ctx.greeting`
|
||||
* are present (and `ctx.isPublic !== true`), the prompt asks the
|
||||
* model to address the reader by their watched assets/regions and
|
||||
* open with the greeting. Otherwise the prompt produces a generic
|
||||
* editorial brief safe for share-URL surfaces.
|
||||
*
|
||||
* Per-story line format includes a stable short-hash prefix:
|
||||
* `01 [h:abc12345] [CRITICAL] Headline — Category · Country · Source`
|
||||
* The model emits `rankedStoryHashes` referencing those short hashes
|
||||
* so the cron can re-order envelope.stories before the cap.
|
||||
*
|
||||
* @param {Array<{ hash?: string; headline: string; threatLevel: string; category: string; country: string; source: string }>} stories
|
||||
* @param {string} sensitivity
|
||||
* @param {DigestPromptCtx} [ctx]
|
||||
* @returns {{ system: string; user: string }}
|
||||
*/
|
||||
export function buildDigestPrompt(stories, sensitivity) {
|
||||
const lines = stories.slice(0, 12).map((s, i) => {
|
||||
export function buildDigestPrompt(stories, sensitivity, ctx = {}) {
|
||||
const isPublic = ctx?.isPublic === true;
|
||||
const profile = !isPublic && typeof ctx?.profile === 'string' ? ctx.profile.trim() : '';
|
||||
const greeting = !isPublic && typeof ctx?.greeting === 'string' ? ctx.greeting.trim() : '';
|
||||
|
||||
const lines = stories.slice(0, MAX_STORIES_PER_USER).map((s, i) => {
|
||||
const n = String(i + 1).padStart(2, '0');
|
||||
return `${n}. [${s.threatLevel}] ${s.headline} — ${s.category} · ${s.country} · ${s.source}`;
|
||||
const sev = (s.threatLevel ?? '').toUpperCase();
|
||||
// Short hash prefix — first 8 chars of digest story hash. Keeps
|
||||
// the prompt compact while remaining collision-free for ≤30
|
||||
// stories. Stories without a hash fall back to position-based
|
||||
// 'p<NN>' so the prompt is always well-formed.
|
||||
const shortHash = typeof s.hash === 'string' && s.hash.length >= 8
|
||||
? s.hash.slice(0, 8)
|
||||
: `p${n}`;
|
||||
return `${n}. [h:${shortHash}] [${sev}] ${s.headline} — ${s.category} · ${s.country} · ${s.source}`;
|
||||
});
|
||||
const user = [
|
||||
|
||||
const userParts = [
|
||||
`Reader sensitivity level: ${sensitivity}`,
|
||||
'',
|
||||
"Today's surfaced stories (ranked):",
|
||||
...lines,
|
||||
].join('\n');
|
||||
return { system: DIGEST_PROSE_SYSTEM, user };
|
||||
];
|
||||
if (greeting) {
|
||||
userParts.push('', `Open the lead with: "${greeting}."`);
|
||||
}
|
||||
if (profile) {
|
||||
userParts.push('', 'Reader profile (use to personalise lead and signals):', profile);
|
||||
}
|
||||
userParts.push('', "Today's surfaced stories (ranked):", ...lines);
|
||||
|
||||
return { system: DIGEST_PROSE_SYSTEM_BASE, user: userParts.join('\n') };
|
||||
}
|
||||
|
||||
// Back-compat alias for tests that import the old constant name.
|
||||
export const DIGEST_PROSE_SYSTEM = DIGEST_PROSE_SYSTEM_BASE;
|
||||
|
||||
/**
|
||||
* Strict shape check for a parsed digest-prose object. Used by BOTH
|
||||
* parseDigestProse (fresh LLM output) AND generateDigestProse's
|
||||
@@ -345,14 +434,20 @@ export function buildDigestPrompt(stories, sensitivity) {
|
||||
* returns the caller's object by reference so downstream writes
|
||||
* can't observe internal state.
|
||||
*
|
||||
* v3 (2026-04-25): adds optional `rankedStoryHashes` — short hashes
|
||||
* (≥4 chars each) that the orchestration layer maps back to digest
|
||||
* story `hash` values to re-order envelope.stories before the cap.
|
||||
* Field is optional so v2-shaped cache rows still pass validation
|
||||
* during the rollout window — they just don't carry ranking signal.
|
||||
*
|
||||
* @param {unknown} obj
|
||||
* @returns {{ lead: string; threads: Array<{tag:string;teaser:string}>; signals: string[] } | null}
|
||||
* @returns {{ lead: string; threads: Array<{tag:string;teaser:string}>; signals: string[]; rankedStoryHashes: string[] } | null}
|
||||
*/
|
||||
export function validateDigestProseShape(obj) {
|
||||
if (!obj || typeof obj !== 'object' || Array.isArray(obj)) return null;
|
||||
|
||||
const lead = typeof obj.lead === 'string' ? obj.lead.trim() : '';
|
||||
if (lead.length < 40 || lead.length > 800) return null;
|
||||
if (lead.length < 40 || lead.length > 1500) return null;
|
||||
|
||||
const rawThreads = Array.isArray(obj.threads) ? obj.threads : [];
|
||||
const threads = rawThreads
|
||||
@@ -383,7 +478,18 @@ export function validateDigestProseShape(obj) {
|
||||
})
|
||||
.slice(0, 6);
|
||||
|
||||
return { lead, threads, signals };
|
||||
// rankedStoryHashes: optional. When present, must be array of
|
||||
// non-empty short-hash strings (≥4 chars). Each entry trimmed and
|
||||
// capped to 16 chars (the prompt emits 8). Length capped to
|
||||
// MAX_STORIES_PER_USER × 2 to bound prompt drift.
|
||||
const rawRanked = Array.isArray(obj.rankedStoryHashes) ? obj.rankedStoryHashes : [];
|
||||
const rankedStoryHashes = rawRanked
|
||||
.filter((x) => typeof x === 'string')
|
||||
.map((x) => x.trim().slice(0, 16))
|
||||
.filter((x) => x.length >= 4)
|
||||
.slice(0, MAX_STORIES_PER_USER * 2);
|
||||
|
||||
return { lead, threads, signals, rankedStoryHashes };
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -415,17 +521,39 @@ export function parseDigestProse(text) {
|
||||
* about cache-hit rate; that optimisation is the wrong tradeoff for
|
||||
* an editorial product whose correctness bar is "matches the email".
|
||||
*
|
||||
* v2 key space so pre-fix cache rows (under the looser key) are
|
||||
* ignored on rollout — a one-tick cost to pay for clean semantics.
|
||||
* v3 key space (2026-04-25): material now includes the digest-story
|
||||
* `hash` (per-story rankability), `ctx.profile` SHA-256, greeting
|
||||
* bucket, and isPublic flag. When `ctx.isPublic === true` the userId
|
||||
* slot is replaced with the literal `'public'` so all public-share
|
||||
* readers of the same (sensitivity, story-pool) hit ONE cache row
|
||||
* regardless of caller — no PII in public cache keys, no per-user
|
||||
* inflation. v2 rows are ignored on rollout (paid for once).
|
||||
*
|
||||
* @param {string} userId
|
||||
* @param {Array} stories
|
||||
* @param {string} sensitivity
|
||||
* @param {DigestPromptCtx} [ctx]
|
||||
*/
|
||||
function hashDigestInput(userId, stories, sensitivity) {
|
||||
function hashDigestInput(userId, stories, sensitivity, ctx = {}) {
|
||||
const isPublic = ctx?.isPublic === true;
|
||||
const profileSha = isPublic ? '' : (typeof ctx?.profile === 'string' && ctx.profile.length > 0
|
||||
? createHash('sha256').update(ctx.profile).digest('hex').slice(0, 16)
|
||||
: '');
|
||||
const greetingSlot = isPublic ? '' : greetingBucket(ctx?.greeting);
|
||||
// Canonicalise as JSON of the fields the prompt actually references,
|
||||
// in the prompt's ranked order. Stable stringification via an array
|
||||
// of tuples keeps field ordering deterministic without relying on
|
||||
// JS object-key iteration order.
|
||||
// JS object-key iteration order. Slice MUST match buildDigestPrompt's
|
||||
// slice or the cache key drifts from the prompt content.
|
||||
const material = JSON.stringify([
|
||||
sensitivity ?? '',
|
||||
...stories.slice(0, 12).map((s) => [
|
||||
profileSha,
|
||||
greetingSlot,
|
||||
isPublic ? 'public' : 'private',
|
||||
...stories.slice(0, MAX_STORIES_PER_USER).map((s) => [
|
||||
// hash drives ranking (model emits rankedStoryHashes); without
|
||||
// it the cache ignores re-ranking and stale ordering is served.
|
||||
typeof s.hash === 'string' ? s.hash.slice(0, 8) : '',
|
||||
s.headline ?? '',
|
||||
s.threatLevel ?? '',
|
||||
s.category ?? '',
|
||||
@@ -434,20 +562,29 @@ function hashDigestInput(userId, stories, sensitivity) {
|
||||
]),
|
||||
]);
|
||||
const h = createHash('sha256').update(material).digest('hex').slice(0, 16);
|
||||
return `${userId}:${sensitivity}:${h}`;
|
||||
// userId-slot substitution for public mode — one cache row per
|
||||
// (sensitivity, story-pool) shared across ALL public readers.
|
||||
const userSlot = isPublic ? 'public' : userId;
|
||||
return `${userSlot}:${sensitivity}:${h}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the digest prose object via cache → LLM.
|
||||
*
|
||||
* Backward-compatible signature: existing 4-arg callers behave like
|
||||
* today (no profile/greeting → non-personalised lead). New callers
|
||||
* pass `ctx` to enable canonical synthesis with greeting + profile.
|
||||
*
|
||||
* @param {string} userId
|
||||
* @param {Array} stories
|
||||
* @param {string} sensitivity
|
||||
* @param {object} deps — { callLLM, cacheGet, cacheSet }
|
||||
* @param {{ callLLM: Function; cacheGet: Function; cacheSet: Function }} deps
|
||||
* @param {DigestPromptCtx} [ctx]
|
||||
*/
|
||||
export async function generateDigestProse(userId, stories, sensitivity, deps) {
|
||||
// v2 key: see hashDigestInput() comment. Full-prompt hash + strict
|
||||
export async function generateDigestProse(userId, stories, sensitivity, deps, ctx = {}) {
|
||||
// v3 key: see hashDigestInput() comment. Full-prompt hash + strict
|
||||
// shape validation on every cache hit.
|
||||
const key = `brief:llm:digest:v2:${hashDigestInput(userId, stories, sensitivity)}`;
|
||||
const key = `brief:llm:digest:v3:${hashDigestInput(userId, stories, sensitivity, ctx)}`;
|
||||
try {
|
||||
const hit = await deps.cacheGet(key);
|
||||
// CRITICAL: re-run the shape validator on cache hits. Without
|
||||
@@ -462,11 +599,11 @@ export async function generateDigestProse(userId, stories, sensitivity, deps) {
|
||||
if (validated) return validated;
|
||||
}
|
||||
} catch { /* cache miss fine */ }
|
||||
const { system, user } = buildDigestPrompt(stories, sensitivity);
|
||||
const { system, user } = buildDigestPrompt(stories, sensitivity, ctx);
|
||||
let text = null;
|
||||
try {
|
||||
text = await deps.callLLM(system, user, {
|
||||
maxTokens: 700,
|
||||
maxTokens: 900,
|
||||
temperature: 0.4,
|
||||
timeoutMs: 15_000,
|
||||
skipProviders: BRIEF_LLM_SKIP_PROVIDERS,
|
||||
@@ -482,6 +619,33 @@ export async function generateDigestProse(userId, stories, sensitivity, deps) {
|
||||
return parsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Non-personalised wrapper for share-URL surfaces. Strips profile
|
||||
* and greeting; substitutes 'public' for userId in the cache key
|
||||
* (see hashDigestInput) so all public-share readers of the same
|
||||
* (sensitivity, story-pool) hit one cache row.
|
||||
*
|
||||
* Note the missing `userId` parameter — by design. Callers MUST
|
||||
* NOT thread their authenticated user's id through this function;
|
||||
* the public lead must never carry per-user salt.
|
||||
*
|
||||
* @param {Array} stories
|
||||
* @param {string} sensitivity
|
||||
* @param {{ callLLM: Function; cacheGet: Function; cacheSet: Function }} deps
|
||||
* @returns {ReturnType<typeof generateDigestProse>}
|
||||
*/
|
||||
export async function generateDigestProsePublic(stories, sensitivity, deps) {
|
||||
// userId param to generateDigestProse is unused when isPublic=true
|
||||
// (see hashDigestInput's userSlot logic). Pass an empty string so
|
||||
// a typo on a future caller can't accidentally salt the public
|
||||
// cache.
|
||||
return generateDigestProse('', stories, sensitivity, deps, {
|
||||
profile: null,
|
||||
greeting: null,
|
||||
isPublic: true,
|
||||
});
|
||||
}
|
||||
|
||||
// ── Envelope enrichment ────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
|
||||
201
scripts/lib/digest-orchestration-helpers.mjs
Normal file
@@ -0,0 +1,201 @@
|
||||
// Pure helpers for the digest cron's per-user compose loop.
|
||||
//
|
||||
// Extracted from scripts/seed-digest-notifications.mjs so they can be
|
||||
// unit-tested without dragging the cron's env-checking side effects
|
||||
// (DIGEST_CRON_ENABLED check, Upstash REST helper, Convex relay
|
||||
// auth) into the test runtime. The cron imports back from here.
|
||||
|
||||
import { compareRules, MAX_STORIES_PER_USER } from './brief-compose.mjs';
|
||||
import { generateDigestProse } from './brief-llm.mjs';
|
||||
|
||||
/**
|
||||
* Build the email subject string. Extracted so the synthesis-level
|
||||
* → subject ternary can be unit-tested without standing up the whole
|
||||
* cron loop. (Plan acceptance criterion A6.i.)
|
||||
*
|
||||
* Rules:
|
||||
* - synthesisLevel 1 or 2 + non-empty briefLead → "Intelligence Brief"
|
||||
* - synthesisLevel 3 OR empty/null briefLead → "Digest"
|
||||
*
|
||||
* Mirrors today's UX where the editorial subject only appeared when
|
||||
* a real LLM-produced lead was available; the L3 stub falls back to
|
||||
* the plain "Digest" subject to set reader expectations correctly.
|
||||
*
|
||||
* @param {{ briefLead: string | null | undefined; synthesisLevel: number; shortDate: string }} input
|
||||
* @returns {string}
|
||||
*/
|
||||
export function subjectForBrief({ briefLead, synthesisLevel, shortDate }) {
|
||||
if (briefLead && synthesisLevel >= 1 && synthesisLevel <= 2) {
|
||||
return `WorldMonitor Intelligence Brief — ${shortDate}`;
|
||||
}
|
||||
return `WorldMonitor Digest — ${shortDate}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Single source of truth for the digest's story window. Used by BOTH
|
||||
* the compose path (digestFor closure in the cron) and the send loop.
|
||||
* Without this, the brief lead can be synthesized from a 24h pool
|
||||
* while the channel body ships 7d / 12h of stories — reintroducing
|
||||
* the cross-surface divergence the canonical-brain refactor is meant
|
||||
* to eliminate, just in a different shape.
|
||||
*
|
||||
* `lastSentAt` is the rule's previous successful send timestamp (ms
|
||||
* since epoch) or null on first send. `defaultLookbackMs` is the
|
||||
* first-send fallback (today: 24h).
|
||||
*
|
||||
* @param {number | null | undefined} lastSentAt
|
||||
* @param {number} nowMs
|
||||
* @param {number} defaultLookbackMs
|
||||
* @returns {number}
|
||||
*/
|
||||
export function digestWindowStartMs(lastSentAt, nowMs, defaultLookbackMs) {
|
||||
return lastSentAt ?? (nowMs - defaultLookbackMs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk an annotated rule list and return the winning candidate +
|
||||
* its non-empty story pool. Two-pass: due rules first (so the
|
||||
* synthesis comes from a rule that's actually sending), then ALL
|
||||
* eligible rules (compose-only tick — keeps the dashboard brief
|
||||
* fresh for weekly/twice_daily users). Within each pass, walk by
|
||||
* compareRules priority and pick the FIRST candidate whose pool is
|
||||
* non-empty AND survives `tryCompose` (when provided).
|
||||
*
|
||||
* Returns null when every candidate is rejected — caller skips the
|
||||
* user (same as today's behavior on empty-pool exhaustion).
|
||||
*
|
||||
* Plan acceptance criteria A6.l (compose-only tick still works for
|
||||
* weekly user) + A6.m (winner walks past empty-pool top-priority
|
||||
* candidate). Codex Round-3 High #1 + Round-4 High #1 + Round-4
|
||||
* Medium #2.
|
||||
*
|
||||
* `tryCompose` (optional): called with `(cand, stories)` after a
|
||||
* non-empty pool is found. Returning a truthy value claims the
|
||||
* candidate as winner and the value is forwarded as `composeResult`.
|
||||
* Returning a falsy value (e.g. composeBriefFromDigestStories
|
||||
* dropped every story via its URL/headline/shape filters) walks to
|
||||
* the next candidate. Without this callback, the helper preserves
|
||||
* the original "first non-empty pool wins" semantics, which let a
|
||||
* filter-rejected top-priority candidate suppress the brief for the
|
||||
* user even when a lower-priority candidate would have shipped one.
|
||||
*
|
||||
* `digestFor` receives the full annotated candidate (not just the
|
||||
* rule) so callers can derive a per-candidate story window from
|
||||
* `cand.lastSentAt` — see `digestWindowStartMs`.
|
||||
*
|
||||
* `log` is the per-rejected-candidate log emitter — passed in so
|
||||
* tests can capture lines without reaching for console.log.
|
||||
*
|
||||
* @param {Array<{ rule: object; lastSentAt: number | null; due: boolean }>} annotated
|
||||
* @param {(cand: { rule: object; lastSentAt: number | null; due: boolean }) => Promise<unknown[] | null | undefined>} digestFor
|
||||
* @param {(line: string) => void} log
|
||||
* @param {string} userId
|
||||
* @param {((cand: { rule: object; lastSentAt: number | null; due: boolean }, stories: unknown[]) => Promise<unknown> | unknown)} [tryCompose]
|
||||
* @returns {Promise<{ winner: { rule: object; lastSentAt: number | null; due: boolean }; stories: unknown[]; composeResult?: unknown } | null>}
|
||||
*/
|
||||
export async function pickWinningCandidateWithPool(annotated, digestFor, log, userId, tryCompose) {
|
||||
if (!Array.isArray(annotated) || annotated.length === 0) return null;
|
||||
const sortedDue = annotated.filter((a) => a.due).sort((a, b) => compareRules(a.rule, b.rule));
|
||||
const sortedAll = [...annotated].sort((a, b) => compareRules(a.rule, b.rule));
|
||||
// Build the walk order, deduping by rule reference so the same
|
||||
// rule isn't tried twice (a due rule appears in both sortedDue
|
||||
// and sortedAll).
|
||||
const seen = new Set();
|
||||
const walkOrder = [];
|
||||
for (const cand of [...sortedDue, ...sortedAll]) {
|
||||
if (seen.has(cand.rule)) continue;
|
||||
seen.add(cand.rule);
|
||||
walkOrder.push(cand);
|
||||
}
|
||||
for (const cand of walkOrder) {
|
||||
const stories = await digestFor(cand);
|
||||
if (!stories || stories.length === 0) {
|
||||
log(
|
||||
`[digest] brief filter drops user=${userId} ` +
|
||||
`sensitivity=${cand.rule.sensitivity ?? 'high'} ` +
|
||||
`variant=${cand.rule.variant ?? 'full'} ` +
|
||||
`due=${cand.due} ` +
|
||||
`outcome=empty-pool ` +
|
||||
`in=0 dropped_severity=0 dropped_url=0 dropped_headline=0 dropped_shape=0 dropped_cap=0 out=0`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
if (typeof tryCompose === 'function') {
|
||||
const composeResult = await tryCompose(cand, stories);
|
||||
if (!composeResult) {
|
||||
log(
|
||||
`[digest] brief filter drops user=${userId} ` +
|
||||
`sensitivity=${cand.rule.sensitivity ?? 'high'} ` +
|
||||
`variant=${cand.rule.variant ?? 'full'} ` +
|
||||
`due=${cand.due} ` +
|
||||
`outcome=filter-rejected ` +
|
||||
`in=${stories.length} out=0`,
|
||||
);
|
||||
continue;
|
||||
}
|
||||
return { winner: cand, stories, composeResult };
|
||||
}
|
||||
return { winner: cand, stories };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Run the three-level canonical synthesis fallback chain.
|
||||
* L1: full pre-cap pool + ctx (profile, greeting, !public) — canonical.
|
||||
* L2: envelope-sized slice + empty ctx — degraded fallback (mirrors
|
||||
* today's enrichBriefEnvelopeWithLLM behaviour).
|
||||
* L3: null synthesis — caller composes from stub.
|
||||
*
|
||||
* Returns { synthesis, level } with `synthesis` matching
|
||||
* generateDigestProse's output shape (or null on L3) and `level`
|
||||
* one of {1, 2, 3}.
|
||||
*
|
||||
* Pure helper — no I/O beyond the deps.callLLM the inner functions
|
||||
* already perform. Errors at L1 propagate to L2; L2 errors propagate
|
||||
* to L3 (null/stub). `trace` callback fires per level transition so
|
||||
* callers can quantify failure-mode distribution in production logs.
|
||||
*
|
||||
* Plan acceptance criterion A6.h (3-level fallback triggers).
|
||||
*
|
||||
* @param {string} userId
|
||||
* @param {Array} stories — full pre-cap pool
|
||||
* @param {string} sensitivity
|
||||
* @param {{ profile: string | null; greeting: string | null }} ctx
|
||||
* @param {{ callLLM: Function; cacheGet: Function; cacheSet: Function }} deps
|
||||
* @param {(level: 1 | 2 | 3, kind: 'success' | 'fall' | 'throw', err?: unknown) => void} [trace]
|
||||
* @returns {Promise<{ synthesis: object | null; level: 1 | 2 | 3 }>}
|
||||
*/
|
||||
export async function runSynthesisWithFallback(userId, stories, sensitivity, ctx, deps, trace) {
|
||||
const noteTrace = typeof trace === 'function' ? trace : () => {};
|
||||
// L1 — canonical
|
||||
try {
|
||||
const l1 = await generateDigestProse(userId, stories, sensitivity, deps, {
|
||||
profile: ctx?.profile ?? null,
|
||||
greeting: ctx?.greeting ?? null,
|
||||
isPublic: false,
|
||||
});
|
||||
if (l1) {
|
||||
noteTrace(1, 'success');
|
||||
return { synthesis: l1, level: 1 };
|
||||
}
|
||||
noteTrace(1, 'fall');
|
||||
} catch (err) {
|
||||
noteTrace(1, 'throw', err);
|
||||
}
|
||||
// L2 — degraded fallback
|
||||
try {
|
||||
const cappedSlice = (Array.isArray(stories) ? stories : []).slice(0, MAX_STORIES_PER_USER);
|
||||
const l2 = await generateDigestProse(userId, cappedSlice, sensitivity, deps);
|
||||
if (l2) {
|
||||
noteTrace(2, 'success');
|
||||
return { synthesis: l2, level: 2 };
|
||||
}
|
||||
noteTrace(2, 'fall');
|
||||
} catch (err) {
|
||||
noteTrace(2, 'throw', err);
|
||||
}
|
||||
// L3 — stub
|
||||
noteTrace(3, 'success');
|
||||
return { synthesis: null, level: 3 };
|
||||
}
|
||||
@@ -33,12 +33,25 @@ const { normalizeResendSender } = require('./lib/resend-from.cjs');
|
||||
import { readRawJsonFromUpstash, redisPipeline } from '../api/_upstash-json.js';
|
||||
import {
|
||||
composeBriefFromDigestStories,
|
||||
compareRules,
|
||||
extractInsights,
|
||||
groupEligibleRulesByUser,
|
||||
MAX_STORIES_PER_USER,
|
||||
shouldExitNonZero as shouldExitOnBriefFailures,
|
||||
} from './lib/brief-compose.mjs';
|
||||
import {
|
||||
digestWindowStartMs,
|
||||
pickWinningCandidateWithPool,
|
||||
runSynthesisWithFallback,
|
||||
subjectForBrief,
|
||||
} from './lib/digest-orchestration-helpers.mjs';
|
||||
import { issueSlotInTz } from '../shared/brief-filter.js';
|
||||
import { enrichBriefEnvelopeWithLLM } from './lib/brief-llm.mjs';
|
||||
import {
|
||||
enrichBriefEnvelopeWithLLM,
|
||||
generateDigestProse,
|
||||
generateDigestProsePublic,
|
||||
greetingBucket,
|
||||
} from './lib/brief-llm.mjs';
|
||||
import { parseDigestOnlyUser } from './lib/digest-only-user.mjs';
|
||||
import { assertBriefEnvelope } from '../server/_shared/brief-render.js';
|
||||
import { signBriefUrl, BriefUrlError } from './lib/brief-url-sign.mjs';
|
||||
@@ -93,7 +106,6 @@ const DIGEST_LOOKBACK_MS = 24 * 60 * 60 * 1000; // 24h default lookback on first
|
||||
const DIGEST_CRITICAL_LIMIT = Infinity;
|
||||
const DIGEST_HIGH_LIMIT = 15;
|
||||
const DIGEST_MEDIUM_LIMIT = 10;
|
||||
const AI_SUMMARY_CACHE_TTL = 3600; // 1h
|
||||
const AI_DIGEST_ENABLED = process.env.AI_DIGEST_ENABLED !== '0';
|
||||
const ENTITLEMENT_CACHE_TTL = 900; // 15 min
|
||||
|
||||
@@ -116,12 +128,13 @@ const BRIEF_URL_SIGNING_SECRET = process.env.BRIEF_URL_SIGNING_SECRET ?? '';
|
||||
const WORLDMONITOR_PUBLIC_BASE_URL =
|
||||
process.env.WORLDMONITOR_PUBLIC_BASE_URL ?? 'https://worldmonitor.app';
|
||||
const BRIEF_TTL_SECONDS = 7 * 24 * 60 * 60; // 7 days
|
||||
// The brief is a once-per-day editorial snapshot. 24h is the natural
|
||||
// window regardless of a user's email cadence (daily / twice_daily /
|
||||
// weekly) — weekly subscribers still expect a fresh brief each day
|
||||
// in the dashboard panel. Matches DIGEST_LOOKBACK_MS so first-send
|
||||
// users see identical story pools in brief and email.
|
||||
const BRIEF_STORY_WINDOW_MS = 24 * 60 * 60 * 1000;
|
||||
// Brief story window: derived per-rule from the rule's lastSentAt via
|
||||
// digestWindowStartMs, identical to the send-loop window. The previous
|
||||
// fixed-24h constant decoupled the canonical brief lead from the
|
||||
// stories the email/Slack body actually shipped, reintroducing the
|
||||
// cross-surface divergence the canonical-brain refactor is designed to
|
||||
// eliminate (especially severe for weekly users — 7d email body vs 24h
|
||||
// lead).
|
||||
const INSIGHTS_KEY = 'news:insights:v1';
|
||||
|
||||
// Operator kill switch — used to intentionally silence brief compose
|
||||
@@ -309,6 +322,66 @@ function toLocalHour(nowMs, timezone) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read digest:last-sent:v1:{userId}:{variant} from Upstash. Returns
|
||||
* null on miss / parse error / network hiccup so the caller can treat
|
||||
* "first send" and "transient lookup failure" the same way (both fall
|
||||
* through to isDue's `lastSentAt === null` branch). Extracted so the
|
||||
* compose-flow's per-rule annotation pass and the send loop can share
|
||||
* one source of truth — Codex Round-3 High #1 + Round-4 fixes.
|
||||
*
|
||||
* @param {{ userId: string; variant?: string }} rule
|
||||
* @returns {Promise<number | null>}
|
||||
*/
|
||||
async function getLastSentAt(rule) {
|
||||
if (!rule?.userId || !rule.variant) return null;
|
||||
const key = `digest:last-sent:v1:${rule.userId}:${rule.variant}`;
|
||||
try {
|
||||
const raw = await upstashRest('GET', key);
|
||||
if (!raw) return null;
|
||||
const parsed = JSON.parse(raw);
|
||||
return typeof parsed.sentAt === 'number' ? parsed.sentAt : null;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build the synthesis context (profile, greeting) for the canonical
|
||||
* synthesis call. profile is the formatted user-context line block;
|
||||
* greeting is the time-of-day-appropriate opener. Both are stripped
|
||||
* by `generateDigestProsePublic` for the share-URL surface; this
|
||||
* function is for the personalised path only.
|
||||
*
|
||||
* Defensive: prefs lookup failures degrade to a non-personalised
|
||||
* synthesis (profile=null) rather than blocking the brief — same
|
||||
* pattern the legacy generateAISummary used.
|
||||
*
|
||||
* @param {{ userId: string; variant?: string; digestTimezone?: string }} rule
|
||||
* @param {number} nowMs
|
||||
* @returns {Promise<{ profile: string | null; greeting: string | null }>}
|
||||
*/
|
||||
async function buildSynthesisCtx(rule, nowMs) {
|
||||
if (!rule?.userId) return { profile: null, greeting: null };
|
||||
let profile = null;
|
||||
try {
|
||||
const { data: prefs } = await fetchUserPreferences(rule.userId, rule.variant ?? 'full');
|
||||
if (prefs) {
|
||||
const ctx = extractUserContext(prefs);
|
||||
profile = formatUserProfile(ctx, rule.variant ?? 'full');
|
||||
}
|
||||
} catch {
|
||||
/* prefs unavailable — degrade to non-personalised */
|
||||
}
|
||||
const tz = rule.digestTimezone ?? 'UTC';
|
||||
const localHour = toLocalHour(nowMs, tz);
|
||||
const greeting = localHour >= 5 && localHour < 12 ? 'Good morning'
|
||||
: localHour >= 12 && localHour < 17 ? 'Good afternoon'
|
||||
: localHour >= 17 && localHour < 22 ? 'Good evening'
|
||||
: 'Good evening';
|
||||
return { profile, greeting };
|
||||
}
|
||||
|
||||
function isDue(rule, lastSentAt) {
|
||||
const nowMs = Date.now();
|
||||
const tz = rule.digestTimezone ?? 'UTC';
|
||||
@@ -701,95 +774,23 @@ function formatDigestHtml(stories, nowMs) {
|
||||
</div>`;
|
||||
}
|
||||
|
||||
// ── AI summary generation ────────────────────────────────────────────────────
|
||||
|
||||
function hashShort(str) {
|
||||
return createHash('sha256').update(str).digest('hex').slice(0, 16);
|
||||
}
|
||||
|
||||
async function generateAISummary(stories, rule) {
|
||||
if (!AI_DIGEST_ENABLED) return null;
|
||||
if (!stories || stories.length === 0) return null;
|
||||
|
||||
// rule.aiDigestEnabled (from alertRules) is the user's explicit opt-in for
|
||||
// AI summaries. userPreferences is a SEPARATE table (SPA app settings blob:
|
||||
// watchlist, airports, panels). A user can have alertRules without having
|
||||
// ever saved userPreferences — or under a different variant. Missing prefs
|
||||
// must NOT silently disable the feature the user just enabled; degrade to
|
||||
// a non-personalized summary instead.
|
||||
// error: true = transient fetch failure (network, non-OK HTTP, env missing)
|
||||
// error: false = the (userId, variant) row genuinely does not exist
|
||||
// Both cases degrade to a non-personalized summary, but log them distinctly
|
||||
// so transient fetch failures are visible in observability.
|
||||
const { data: prefs, error: prefsFetchError } = await fetchUserPreferences(rule.userId, rule.variant ?? 'full');
|
||||
if (!prefs) {
|
||||
console.log(
|
||||
prefsFetchError
|
||||
? `[digest] Prefs fetch failed for ${rule.userId} — generating non-personalized AI summary`
|
||||
: `[digest] No stored preferences for ${rule.userId} — generating non-personalized AI summary`,
|
||||
);
|
||||
}
|
||||
const ctx = extractUserContext(prefs);
|
||||
const profile = formatUserProfile(ctx, rule.variant ?? 'full');
|
||||
|
||||
const variant = rule.variant ?? 'full';
|
||||
const tz = rule.digestTimezone ?? 'UTC';
|
||||
const localHour = toLocalHour(Date.now(), tz);
|
||||
if (localHour === -1) console.warn(`[digest] Bad timezone "${tz}" for ${rule.userId} — defaulting to evening greeting`);
|
||||
const greeting = localHour >= 5 && localHour < 12 ? 'Good morning'
|
||||
: localHour >= 12 && localHour < 17 ? 'Good afternoon'
|
||||
: 'Good evening';
|
||||
const storiesHash = hashShort(stories.map(s =>
|
||||
`${s.titleHash ?? s.title}:${s.severity ?? ''}:${s.phase ?? ''}:${(s.sources ?? []).slice(0, 3).join(',')}`
|
||||
).sort().join('|'));
|
||||
const ctxHash = hashShort(JSON.stringify(ctx));
|
||||
const cacheKey = `digest:ai-summary:v1:${variant}:${greeting}:${storiesHash}:${ctxHash}`;
|
||||
|
||||
try {
|
||||
const cached = await upstashRest('GET', cacheKey);
|
||||
if (cached) {
|
||||
console.log(`[digest] AI summary cache hit for ${rule.userId}`);
|
||||
return cached;
|
||||
}
|
||||
} catch { /* miss */ }
|
||||
|
||||
const dateStr = new Date().toISOString().split('T')[0];
|
||||
const storyList = stories.slice(0, 20).map((s, i) => {
|
||||
const phase = s.phase ? ` [${s.phase}]` : '';
|
||||
const src = s.sources?.length > 0 ? ` (${s.sources.slice(0, 2).join(', ')})` : '';
|
||||
return `${i + 1}. [${(s.severity ?? 'high').toUpperCase()}]${phase} ${s.title}${src}`;
|
||||
}).join('\n');
|
||||
|
||||
const systemPrompt = `You are WorldMonitor's intelligence analyst. Today is ${dateStr} UTC.
|
||||
Write a personalized daily brief for a user focused on ${rule.variant ?? 'full'} intelligence.
|
||||
The user's local time greeting is "${greeting}" — use this exact greeting to open the brief.
|
||||
|
||||
User profile:
|
||||
${profile}
|
||||
|
||||
Rules:
|
||||
- Open with "${greeting}." followed by the brief
|
||||
- Lead with the single most impactful development for this user
|
||||
- Connect events to watched assets/regions where relevant
|
||||
- 3-5 bullet points, 1-2 sentences each
|
||||
- Flag anything directly affecting watched assets
|
||||
- Separate facts from assessment
|
||||
- End with "Signals to watch:" (1-2 items)
|
||||
- Under 250 words`;
|
||||
|
||||
const summary = await callLLM(systemPrompt, storyList, { maxTokens: 600, temperature: 0.3, timeoutMs: 15_000, skipProviders: ['groq'] });
|
||||
if (!summary) {
|
||||
console.warn(`[digest] AI summary generation failed for ${rule.userId}`);
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
await upstashRest('SET', cacheKey, summary, 'EX', String(AI_SUMMARY_CACHE_TTL));
|
||||
} catch { /* best-effort cache write */ }
|
||||
|
||||
console.log(`[digest] AI summary generated for ${rule.userId} (${summary.length} chars)`);
|
||||
return summary;
|
||||
}
|
||||
// ── (Removed) standalone generateAISummary ───────────────────────────────────
|
||||
//
|
||||
// Prior to 2026-04-25 a separate `generateAISummary()` here ran a
|
||||
// second LLM call per send to produce the email's exec-summary
|
||||
// block, independent of the brief envelope's `digest.lead`. That
|
||||
// asymmetry was the root cause of the email/brief contradiction
|
||||
// (different inputs, different leads, different ranked stories).
|
||||
//
|
||||
// The synthesis is now produced ONCE per user by
|
||||
// `generateDigestProse(userId, fullPool, sensitivity, deps, ctx)`
|
||||
// in composeAndStoreBriefForUser, written into
|
||||
// `envelope.data.digest.lead`, and read by every channel
|
||||
// (email HTML, plain-text, Telegram, Slack, Discord, webhook). See
|
||||
// docs/plans/2026-04-25-002-fix-brief-email-two-brain-divergence-plan.md.
|
||||
//
|
||||
// The `digest:ai-summary:v1:*` cache rows from the legacy code path
|
||||
// expire on their existing 1h TTL — no cleanup pass needed.
|
||||
|
||||
// ── Channel deactivation ──────────────────────────────────────────────────────
|
||||
|
||||
@@ -1254,22 +1255,50 @@ async function composeBriefsForRun(rules, nowMs) {
|
||||
// inherits a looser populator's pool (the earlier populator "wins"
|
||||
// and decides which severity tiers enter the pool, so stricter
|
||||
// users get a pool that contains severities they never wanted).
|
||||
const windowStart = nowMs - BRIEF_STORY_WINDOW_MS;
|
||||
//
|
||||
// windowStart is derived per-candidate from `lastSentAt`, matching
|
||||
// the send loop's formula exactly (digestWindowStartMs). Without
|
||||
// this, the canonical brief lead would be synthesized from a fixed
|
||||
// 24h pool while the email/Slack body ships the actual cadence's
|
||||
// window (7d for weekly, 12h for twice_daily) — a different flavor
|
||||
// of the cross-surface divergence the canonical-brain refactor is
|
||||
// designed to eliminate.
|
||||
const digestCache = new Map();
|
||||
async function digestFor(candidate) {
|
||||
const key = `${candidate.variant ?? 'full'}:${candidate.lang ?? 'en'}:${candidate.sensitivity ?? 'high'}:${windowStart}`;
|
||||
async function digestFor(cand) {
|
||||
const windowStart = digestWindowStartMs(cand.lastSentAt, nowMs, DIGEST_LOOKBACK_MS);
|
||||
const key = `${cand.rule.variant ?? 'full'}:${cand.rule.lang ?? 'en'}:${cand.rule.sensitivity ?? 'high'}:${windowStart}`;
|
||||
if (digestCache.has(key)) return digestCache.get(key);
|
||||
const stories = await buildDigest(candidate, windowStart);
|
||||
const stories = await buildDigest(cand.rule, windowStart);
|
||||
digestCache.set(key, stories ?? []);
|
||||
return stories ?? [];
|
||||
}
|
||||
|
||||
const eligibleByUser = groupEligibleRulesByUser(rules);
|
||||
// Pre-annotate every eligible rule with its lastSentAt + isDue
|
||||
// status. The compose flow uses this to prefer a "due-this-tick"
|
||||
// candidate as the canonical synthesis source, falling back to any
|
||||
// eligible candidate when nothing is due (preserving today's
|
||||
// dashboard refresh contract for weekly users on non-due ticks).
|
||||
// Codex Round-3 High #1 + Round-4 High #1 + Round-4 Medium #2.
|
||||
//
|
||||
// One Upstash GET per rule per tick; with caching across rules of
|
||||
// the same user this is cheap. The send loop in main() reads from
|
||||
// this same map (via getLastSentAt) so compose + send agree on
|
||||
// lastSentAt for every rule.
|
||||
const annotatedByUser = new Map();
|
||||
for (const [userId, candidates] of groupEligibleRulesByUser(rules)) {
|
||||
const annotated = [];
|
||||
for (const rule of candidates) {
|
||||
const lastSentAt = await getLastSentAt(rule);
|
||||
annotated.push({ rule, lastSentAt, due: isDue(rule, lastSentAt) });
|
||||
}
|
||||
annotatedByUser.set(userId, annotated);
|
||||
}
|
||||
|
||||
let composeSuccess = 0;
|
||||
let composeFailed = 0;
|
||||
for (const [userId, candidates] of eligibleByUser) {
|
||||
for (const [userId, annotated] of annotatedByUser) {
|
||||
try {
|
||||
const hit = await composeAndStoreBriefForUser(userId, candidates, insightsNumbers, digestFor, nowMs);
|
||||
const hit = await composeAndStoreBriefForUser(userId, annotated, insightsNumbers, digestFor, nowMs);
|
||||
if (hit) {
|
||||
briefByUser.set(userId, hit);
|
||||
composeSuccess++;
|
||||
@@ -1284,114 +1313,188 @@ async function composeBriefsForRun(rules, nowMs) {
|
||||
}
|
||||
}
|
||||
console.log(
|
||||
`[digest] brief: compose_success=${composeSuccess} compose_failed=${composeFailed} total_users=${eligibleByUser.size}`,
|
||||
`[digest] brief: compose_success=${composeSuccess} compose_failed=${composeFailed} total_users=${annotatedByUser.size}`,
|
||||
);
|
||||
return { briefByUser, composeSuccess, composeFailed };
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-user: walk candidates, for each pull the per-variant digest
|
||||
* story pool (same pool buildDigest feeds to the email), and compose
|
||||
* the brief envelope from the first candidate that yields non-empty
|
||||
* stories. SETEX the envelope, sign the magazine URL. Returns the
|
||||
* entry the caller should stash in briefByUser, or null when no
|
||||
* candidate had stories.
|
||||
* Per-user: pick a winning candidate (DUE rules first, then any
|
||||
* eligible rule), pull its digest pool, run canonical synthesis
|
||||
* over the FULL pre-cap pool, then compose the envelope with the
|
||||
* synthesis spliced in. SETEX the envelope, sign the magazine URL.
|
||||
*
|
||||
* Returns the entry the caller should stash in briefByUser, or null
|
||||
* when no candidate had stories. The entry's `synthesisLevel` field
|
||||
* tells the send loop which fallback path produced the lead (1 =
|
||||
* canonical, 2 = degraded, 3 = stub) — drives the email subject-line
|
||||
* ternary and the parity log.
|
||||
*
|
||||
* @param {string} userId
|
||||
* @param {Array<{ rule: object; lastSentAt: number | null; due: boolean }>} annotated
|
||||
* @param {{ clusters: number; multiSource: number }} insightsNumbers
|
||||
* @param {(rule: object) => Promise<unknown[]>} digestFor
|
||||
* @param {number} nowMs
|
||||
*/
|
||||
async function composeAndStoreBriefForUser(userId, candidates, insightsNumbers, digestFor, nowMs) {
|
||||
let envelope = null;
|
||||
let chosenVariant = null;
|
||||
let chosenCandidate = null;
|
||||
for (const candidate of candidates) {
|
||||
const digestStories = await digestFor(candidate);
|
||||
if (!digestStories || digestStories.length === 0) continue;
|
||||
const dropStats = { severity: 0, headline: 0, url: 0, shape: 0, cap: 0, in: digestStories.length };
|
||||
const composed = composeBriefFromDigestStories(
|
||||
candidate,
|
||||
digestStories,
|
||||
insightsNumbers,
|
||||
{
|
||||
nowMs,
|
||||
onDrop: (ev) => { dropStats[ev.reason] = (dropStats[ev.reason] ?? 0) + 1; },
|
||||
async function composeAndStoreBriefForUser(userId, annotated, insightsNumbers, digestFor, nowMs) {
|
||||
// Two-pass walk extracted to a pure helper so it can be unit-tested
|
||||
// (A6.l + A6.m). When no candidate has a non-empty pool — OR when
|
||||
// every non-empty candidate has its stories filtered out by the
|
||||
// composer (URL/headline/shape filters) — returns null.
|
||||
//
|
||||
// The `tryCompose` callback is the filter-rejection fall-through:
|
||||
// before the original PR, the legacy loop kept trying lower-priority
|
||||
// candidates whenever compose returned null. Without this hook the
|
||||
// helper would claim the first non-empty pool as winner and the
|
||||
// caller would bail on filter-drop, suppressing briefs that a
|
||||
// lower-priority candidate would have produced.
|
||||
//
|
||||
// We compose WITHOUT synthesis here (cheap — pure JS, no I/O) just
|
||||
// to check filter survival; the real composition with synthesis
|
||||
// splice-in happens once below, after the winner is locked in.
|
||||
const log = (line) => console.log(line);
|
||||
const winnerResult = await pickWinningCandidateWithPool(
|
||||
annotated,
|
||||
digestFor,
|
||||
log,
|
||||
userId,
|
||||
(cand, stories) => {
|
||||
const test = composeBriefFromDigestStories(
|
||||
cand.rule,
|
||||
stories,
|
||||
insightsNumbers,
|
||||
{ nowMs },
|
||||
);
|
||||
return test ?? null;
|
||||
},
|
||||
);
|
||||
if (!winnerResult) return null;
|
||||
const { winner, stories: winnerStories } = winnerResult;
|
||||
|
||||
// ── Canonical synthesis (3-level fallback chain) ────────────────────
|
||||
//
|
||||
// L1: full pre-cap pool + personalised ctx (profile, greeting). The
|
||||
// desired outcome — single LLM call per user, lead anchored on
|
||||
// the wider story set the model has the most signal from.
|
||||
// L2: post-cap envelope-only + empty ctx. Mirrors today's
|
||||
// enrichBriefEnvelopeWithLLM behavior — used when L1 returns
|
||||
// null (LLM down across all providers, parse failure).
|
||||
// L3: stub from assembleStubbedBriefEnvelope. The brief still
|
||||
// ships; only the lead text degrades. Email subject downgrades
|
||||
// from "Intelligence Brief" to "Digest" (driven by
|
||||
// synthesisLevel === 3 in the send loop).
|
||||
const sensitivity = winner.rule.sensitivity ?? 'high';
|
||||
let synthesis = null;
|
||||
let publicLead = null;
|
||||
let synthesisLevel = 3; // pessimistic default; bumped on success
|
||||
if (BRIEF_LLM_ENABLED) {
|
||||
const ctx = await buildSynthesisCtx(winner.rule, nowMs);
|
||||
const result = await runSynthesisWithFallback(
|
||||
userId,
|
||||
winnerStories,
|
||||
sensitivity,
|
||||
ctx,
|
||||
briefLlmDeps,
|
||||
(level, kind, err) => {
|
||||
if (kind === 'throw') {
|
||||
console.warn(
|
||||
`[digest] brief: synthesis L${level} threw for ${userId} — falling to L${level + 1}:`,
|
||||
err?.message,
|
||||
);
|
||||
} else if (kind === 'success' && level === 2) {
|
||||
console.log(`[digest] synthesis level=2_degraded user=${userId}`);
|
||||
} else if (kind === 'success' && level === 3) {
|
||||
console.log(`[digest] synthesis level=3_stub user=${userId}`);
|
||||
}
|
||||
},
|
||||
);
|
||||
|
||||
// Per-attempt filter-drop line. Emits one structured row for every
|
||||
// candidate whose digest pool was non-empty, tagged with that
|
||||
// candidate's own sensitivity and variant. See Solution 0 in
|
||||
// docs/plans/2026-04-24-004-fix-brief-topic-adjacency-defects-plan.md
|
||||
// for why this log exists (deciding whether Solution 3 is warranted).
|
||||
//
|
||||
// Emitting per attempt — not per user — because:
|
||||
// - A user can have multiple rules with different sensitivities;
|
||||
// a single-row-per-user log would have to either pick one
|
||||
// sensitivity arbitrarily or label as 'mixed', hiding drops
|
||||
// from the non-winning candidates.
|
||||
// - An earlier candidate wiped out by post-group filtering (the
|
||||
// exact signal Sol-0 targets) is invisible if only the winner
|
||||
// is logged. Every attempt emits its own row so the fallback
|
||||
// chain is visible.
|
||||
//
|
||||
// Outcomes per row:
|
||||
// outcome=shipped — this candidate's envelope shipped; loop breaks.
|
||||
// outcome=rejected — composed was null (every story filtered out);
|
||||
// loop continues to the next candidate.
|
||||
//
|
||||
// A user whose every row is `outcome=rejected` is a wipeout —
|
||||
// operators detect it by grouping rows by user and checking for
|
||||
// absence of `outcome=shipped` within the tick.
|
||||
const out = composed?.data?.stories?.length ?? 0;
|
||||
console.log(
|
||||
`[digest] brief filter drops user=${userId} ` +
|
||||
`sensitivity=${candidate.sensitivity ?? 'high'} ` +
|
||||
`variant=${candidate.variant ?? 'full'} ` +
|
||||
`outcome=${composed ? 'shipped' : 'rejected'} ` +
|
||||
`in=${dropStats.in} ` +
|
||||
`dropped_severity=${dropStats.severity} ` +
|
||||
`dropped_url=${dropStats.url} ` +
|
||||
`dropped_headline=${dropStats.headline} ` +
|
||||
`dropped_shape=${dropStats.shape} ` +
|
||||
`dropped_cap=${dropStats.cap} ` +
|
||||
`out=${out}`,
|
||||
);
|
||||
|
||||
if (composed) {
|
||||
envelope = composed;
|
||||
chosenVariant = candidate.variant;
|
||||
chosenCandidate = candidate;
|
||||
break;
|
||||
synthesis = result.synthesis;
|
||||
synthesisLevel = result.level;
|
||||
// Public synthesis — parallel call. Profile-stripped; cache-
|
||||
// shared across all users for the same (date, sensitivity,
|
||||
// story-pool). Captures the FULL prose object (lead + signals +
|
||||
// threads) since each personalised counterpart in the envelope
|
||||
// can carry profile bias and the public surface needs sibling
|
||||
// safe-versions of all three. Failure is non-fatal — the
|
||||
// renderer's public-mode fail-safes (omit pull-quote / omit
|
||||
// signals page / category-derived threads stub) handle absence
|
||||
// rather than leaking the personalised version.
|
||||
try {
|
||||
const pub = await generateDigestProsePublic(winnerStories, sensitivity, briefLlmDeps);
|
||||
if (pub) publicLead = pub; // { lead, threads, signals, rankedStoryHashes }
|
||||
} catch (err) {
|
||||
console.warn(`[digest] brief: publicLead generation failed for ${userId}:`, err?.message);
|
||||
}
|
||||
}
|
||||
|
||||
// Compose envelope with synthesis pre-baked. The composer applies
|
||||
// rankedStoryHashes-aware ordering BEFORE the cap, so the model's
|
||||
// editorial judgment of importance survives MAX_STORIES_PER_USER.
|
||||
const dropStats = { severity: 0, headline: 0, url: 0, shape: 0, cap: 0, in: winnerStories.length };
|
||||
const envelope = composeBriefFromDigestStories(
|
||||
winner.rule,
|
||||
winnerStories,
|
||||
insightsNumbers,
|
||||
{
|
||||
nowMs,
|
||||
onDrop: (ev) => { dropStats[ev.reason] = (dropStats[ev.reason] ?? 0) + 1; },
|
||||
synthesis: synthesis || publicLead
|
||||
? {
|
||||
...(synthesis ?? {}),
|
||||
publicLead: publicLead?.lead ?? undefined,
|
||||
publicSignals: publicLead?.signals ?? undefined,
|
||||
publicThreads: publicLead?.threads ?? undefined,
|
||||
}
|
||||
: undefined,
|
||||
},
|
||||
);
|
||||
|
||||
// Per-attempt filter-drop line for the winning candidate. Same
|
||||
// shape today's log emits — operators can keep their existing
|
||||
// queries. The `due` field is new; legacy parsers ignore unknown
|
||||
// fields.
|
||||
const out = envelope?.data?.stories?.length ?? 0;
|
||||
console.log(
|
||||
`[digest] brief filter drops user=${userId} ` +
|
||||
`sensitivity=${sensitivity} ` +
|
||||
`variant=${winner.rule.variant ?? 'full'} ` +
|
||||
`due=${winner.due} ` +
|
||||
`outcome=${envelope ? 'shipped' : 'rejected'} ` +
|
||||
`in=${dropStats.in} ` +
|
||||
`dropped_severity=${dropStats.severity} ` +
|
||||
`dropped_url=${dropStats.url} ` +
|
||||
`dropped_headline=${dropStats.headline} ` +
|
||||
`dropped_shape=${dropStats.shape} ` +
|
||||
`dropped_cap=${dropStats.cap} ` +
|
||||
`out=${out}`,
|
||||
);
|
||||
|
||||
if (!envelope) return null;
|
||||
|
||||
// Phase 3b — LLM enrichment. Substitutes the stubbed whyMatters /
|
||||
// lead / threads / signals fields with Gemini 2.5 Flash output.
|
||||
// Pure passthrough on any failure: the baseline envelope has
|
||||
// already passed validation and is safe to ship as-is. Do NOT
|
||||
// abort composition if the LLM is down; the stub is better than
|
||||
// no brief.
|
||||
if (BRIEF_LLM_ENABLED && chosenCandidate) {
|
||||
const baseline = envelope;
|
||||
// Per-story whyMatters enrichment. The synthesis is already in the
|
||||
// envelope; this pass only fills per-story rationales. Failures
|
||||
// fall through cleanly — the stub `whyMatters` from the composer
|
||||
// is acceptable.
|
||||
let finalEnvelope = envelope;
|
||||
if (BRIEF_LLM_ENABLED) {
|
||||
try {
|
||||
const enriched = await enrichBriefEnvelopeWithLLM(envelope, chosenCandidate, briefLlmDeps);
|
||||
const enriched = await enrichBriefEnvelopeWithLLM(envelope, winner.rule, briefLlmDeps);
|
||||
// Defence in depth: re-validate the enriched envelope against
|
||||
// the renderer's strict contract before we SETEX it. If
|
||||
// enrichment produced a structurally broken shape (bad cache
|
||||
// row, code bug, upstream type drift) we'd otherwise SETEX it
|
||||
// and /api/brief would 404 the user's brief at read time. Fall
|
||||
// back to the unenriched baseline — which is already known to
|
||||
// back to the unenriched envelope — which is already known to
|
||||
// pass assertBriefEnvelope() because composeBriefFromDigestStories
|
||||
// asserted on construction.
|
||||
try {
|
||||
assertBriefEnvelope(enriched);
|
||||
envelope = enriched;
|
||||
finalEnvelope = enriched;
|
||||
} catch (assertErr) {
|
||||
console.warn(`[digest] brief: enriched envelope failed assertion for ${userId} — shipping stubbed:`, assertErr?.message);
|
||||
envelope = baseline;
|
||||
console.warn(`[digest] brief: enriched envelope failed assertion for ${userId} — shipping unenriched:`, assertErr?.message);
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn(`[digest] brief: LLM enrichment threw for ${userId} — shipping stubbed envelope:`, err?.message);
|
||||
envelope = baseline;
|
||||
console.warn(`[digest] brief: per-story enrichment threw for ${userId} — shipping unenriched envelope:`, err?.message);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1400,7 +1503,7 @@ async function composeAndStoreBriefForUser(userId, candidates, insightsNumbers,
|
||||
// produce envelope.data.date guarantees the slot's date portion
|
||||
// matches the displayed date. Two same-day compose runs produce
|
||||
// distinct slots so each digest dispatch freezes its own URL.
|
||||
const briefTz = chosenCandidate?.digestTimezone ?? 'UTC';
|
||||
const briefTz = winner.rule?.digestTimezone ?? 'UTC';
|
||||
const issueSlot = issueSlotInTz(nowMs, briefTz);
|
||||
const key = `brief:${userId}:${issueSlot}`;
|
||||
// The latest-pointer lets readers (dashboard panel, share-url
|
||||
@@ -1409,7 +1512,7 @@ async function composeAndStoreBriefForUser(userId, candidates, insightsNumbers,
|
||||
const latestPointerKey = `brief:latest:${userId}`;
|
||||
const latestPointerValue = JSON.stringify({ issueSlot });
|
||||
const pipelineResult = await redisPipeline([
|
||||
['SETEX', key, String(BRIEF_TTL_SECONDS), JSON.stringify(envelope)],
|
||||
['SETEX', key, String(BRIEF_TTL_SECONDS), JSON.stringify(finalEnvelope)],
|
||||
['SETEX', latestPointerKey, String(BRIEF_TTL_SECONDS), latestPointerValue],
|
||||
]);
|
||||
if (!pipelineResult || !Array.isArray(pipelineResult) || pipelineResult.length < 2) {
|
||||
@@ -1427,7 +1530,15 @@ async function composeAndStoreBriefForUser(userId, candidates, insightsNumbers,
|
||||
baseUrl: WORLDMONITOR_PUBLIC_BASE_URL,
|
||||
secret: BRIEF_URL_SIGNING_SECRET,
|
||||
});
|
||||
return { envelope, magazineUrl, chosenVariant };
|
||||
return {
|
||||
envelope: finalEnvelope,
|
||||
magazineUrl,
|
||||
chosenVariant: winner.rule.variant,
|
||||
// synthesisLevel goes here — NOT in the envelope (renderer's
|
||||
// assertNoExtraKeys would reject it). Read by the send loop for
|
||||
// the email subject-line ternary and the parity log.
|
||||
synthesisLevel,
|
||||
};
|
||||
}
|
||||
|
||||
// ── Main ──────────────────────────────────────────────────────────────────────
|
||||
@@ -1522,14 +1633,10 @@ async function main() {
|
||||
if (!rule.userId || !rule.variant) continue;
|
||||
|
||||
const lastSentKey = `digest:last-sent:v1:${rule.userId}:${rule.variant}`;
|
||||
let lastSentAt = null;
|
||||
try {
|
||||
const raw = await upstashRest('GET', lastSentKey);
|
||||
if (raw) {
|
||||
const parsed = JSON.parse(raw);
|
||||
lastSentAt = typeof parsed.sentAt === 'number' ? parsed.sentAt : null;
|
||||
}
|
||||
} catch { /* first send */ }
|
||||
// Reuse the same getLastSentAt helper the compose pass used so
|
||||
// the two flows agree on lastSentAt for every rule. Codex Round-3
|
||||
// High #1 — winner-from-due-candidates pre-condition.
|
||||
const lastSentAt = await getLastSentAt(rule);
|
||||
|
||||
if (!isDue(rule, lastSentAt)) continue;
|
||||
|
||||
@@ -1539,7 +1646,7 @@ async function main() {
|
||||
continue;
|
||||
}
|
||||
|
||||
const windowStart = lastSentAt ?? (nowMs - DIGEST_LOOKBACK_MS);
|
||||
const windowStart = digestWindowStartMs(lastSentAt, nowMs, DIGEST_LOOKBACK_MS);
|
||||
const stories = await buildDigest(rule, windowStart);
|
||||
if (!stories) {
|
||||
console.log(`[digest] No stories in window for ${rule.userId} (${rule.variant})`);
|
||||
@@ -1570,27 +1677,57 @@ async function main() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let aiSummary = null;
|
||||
// Per-rule synthesis: each due rule's channel body must be
|
||||
// internally consistent (lead derived from THIS rule's pool, not
|
||||
// some other rule's). For multi-rule users, the compose flow
|
||||
// picked ONE winning rule for the magazine envelope, but the
|
||||
// send-loop body for a non-winner rule needs ITS OWN lead — else
|
||||
// the email leads with one pool's narrative while listing stories
|
||||
// from another pool. Cache absorbs the cost: when this is the
|
||||
// winning rule, generateDigestProse hits the cache row written
|
||||
// during the compose pass (same userId/sensitivity/pool/ctx) and
|
||||
// no extra LLM call fires.
|
||||
//
|
||||
// The magazineUrl still points at the winner's envelope — that
|
||||
// surface is the share-worthy alpha and remains a single brief
|
||||
// per user per slot. Channel-body lead vs magazine lead may
|
||||
// therefore differ for non-winner rules; users on those rules
|
||||
// see their own coherent email + a magazine that shows the
|
||||
// winner's editorial. Acceptable trade-off given multi-rule
|
||||
// users are rare and the `(userId, issueSlot)` URL contract
|
||||
// can't represent multiple per-rule briefs without an
|
||||
// architectural change to the URL signer + Redis key.
|
||||
const brief = briefByUser.get(rule.userId);
|
||||
let briefLead = null;
|
||||
let synthesisLevel = 3;
|
||||
if (AI_DIGEST_ENABLED && rule.aiDigestEnabled !== false) {
|
||||
aiSummary = await generateAISummary(stories, rule);
|
||||
const ruleCtx = await buildSynthesisCtx(rule, nowMs);
|
||||
const ruleResult = await runSynthesisWithFallback(
|
||||
rule.userId,
|
||||
stories,
|
||||
rule.sensitivity ?? 'high',
|
||||
ruleCtx,
|
||||
briefLlmDeps,
|
||||
);
|
||||
briefLead = ruleResult.synthesis?.lead ?? null;
|
||||
synthesisLevel = ruleResult.level;
|
||||
}
|
||||
|
||||
const storyListPlain = formatDigest(stories, nowMs);
|
||||
if (!storyListPlain) continue;
|
||||
const htmlRaw = formatDigestHtml(stories, nowMs);
|
||||
|
||||
const brief = briefByUser.get(rule.userId);
|
||||
const magazineUrl = brief?.magazineUrl ?? null;
|
||||
const { text, telegramText, slackText, discordText } = buildChannelBodies(
|
||||
storyListPlain,
|
||||
aiSummary,
|
||||
briefLead,
|
||||
magazineUrl,
|
||||
);
|
||||
const htmlWithSummary = injectEmailSummary(htmlRaw, aiSummary);
|
||||
const htmlWithSummary = injectEmailSummary(htmlRaw, briefLead);
|
||||
const html = injectBriefCta(htmlWithSummary, magazineUrl);
|
||||
|
||||
const shortDate = new Intl.DateTimeFormat('en-US', { month: 'short', day: 'numeric' }).format(new Date(nowMs));
|
||||
const subject = aiSummary ? `WorldMonitor Intelligence Brief — ${shortDate}` : `WorldMonitor Digest — ${shortDate}`;
|
||||
const subject = subjectForBrief({ briefLead, synthesisLevel, shortDate });
|
||||
|
||||
let anyDelivered = false;
|
||||
|
||||
@@ -1613,7 +1750,11 @@ async function main() {
|
||||
} else if (ch.channelType === 'email' && ch.email) {
|
||||
ok = await sendEmail(ch.email, subject, text, html);
|
||||
} else if (ch.channelType === 'webhook' && ch.webhookEnvelope) {
|
||||
ok = await sendWebhook(rule.userId, ch.webhookEnvelope, stories, aiSummary);
|
||||
// Webhook payload's `summary` field reads the canonical
|
||||
// briefLead — same string the email exec block + magazine
|
||||
// pull-quote use. Codex Round-1 Medium #6 (channel-scope
|
||||
// parity).
|
||||
ok = await sendWebhook(rule.userId, ch.webhookEnvelope, stories, briefLead);
|
||||
}
|
||||
if (ok) anyDelivered = true;
|
||||
}
|
||||
@@ -1626,6 +1767,61 @@ async function main() {
|
||||
console.log(
|
||||
`[digest] Sent ${stories.length} stories to ${rule.userId} (${rule.variant}, ${rule.digestMode})`,
|
||||
);
|
||||
// Parity observability. Gated on AI_DIGEST_ENABLED + per-rule
|
||||
// aiDigestEnabled — without this guard, opt-out users (briefLead
|
||||
// is intentionally null) trigger PARITY REGRESSION every tick
|
||||
// (null !== '<envelope stub lead>'), flooding Sentry with
|
||||
// false positives. Greptile P1 on PR #3396.
|
||||
//
|
||||
// Two distinct properties to track:
|
||||
//
|
||||
// 1. CHANNEL parity (load-bearing): for ONE send, every channel
|
||||
// body of THIS rule (email HTML + plain text + Telegram +
|
||||
// Slack + Discord + webhook) reads the same `briefLead`
|
||||
// string. Verifiable by code review (single variable threaded
|
||||
// everywhere); logged here as `exec_len` for telemetry.
|
||||
//
|
||||
// 2. WINNER parity (informational): when `winner_match=true`,
|
||||
// THIS rule is the same one the magazine envelope was
|
||||
// composed from — so channel lead == magazine lead (cache-
|
||||
// shared via generateDigestProse). When `winner_match=false`,
|
||||
// this is a non-winner rule send; channel lead reflects this
|
||||
// rule's pool while the magazine URL points at the winner's
|
||||
// editorial. Expected divergence, not a regression.
|
||||
//
|
||||
// PARITY REGRESSION fires only when winner_match=true AND the
|
||||
// channel lead differs from the envelope lead (the canonical-
|
||||
// synthesis cache row has drifted between compose and send
|
||||
// passes — a real contract break).
|
||||
if (AI_DIGEST_ENABLED && rule.aiDigestEnabled !== false) {
|
||||
const envLead = brief?.envelope?.data?.digest?.lead ?? '';
|
||||
const winnerVariant = brief?.chosenVariant ?? '';
|
||||
const winnerMatch = winnerVariant === (rule.variant ?? 'full');
|
||||
const channelsEqual = briefLead === envLead;
|
||||
const publicLead = brief?.envelope?.data?.digest?.publicLead ?? '';
|
||||
console.log(
|
||||
`[digest] brief lead parity user=${rule.userId} ` +
|
||||
`rule=${rule.variant ?? 'full'}:${rule.sensitivity ?? 'high'}:${rule.lang ?? 'en'} ` +
|
||||
`winner_match=${winnerMatch} ` +
|
||||
`synthesis_level=${synthesisLevel} ` +
|
||||
`exec_len=${(briefLead ?? '').length} ` +
|
||||
`brief_lead_len=${envLead.length} ` +
|
||||
`channels_equal=${channelsEqual} ` +
|
||||
`public_lead_len=${publicLead.length}`,
|
||||
);
|
||||
if (winnerMatch && !channelsEqual && briefLead && envLead) {
|
||||
// Sentry alert candidate — winner_match=true means this rule
|
||||
// composed the envelope, so its channel lead MUST match the
|
||||
// envelope lead. Mismatch = canonical-synthesis cache drift
|
||||
// or code regression. Logged loudly so Sentry's console-
|
||||
// breadcrumb hook surfaces it without an explicit
|
||||
// captureMessage call.
|
||||
console.warn(
|
||||
`[digest] PARITY REGRESSION user=${rule.userId} — winner-rule channel lead != envelope lead. ` +
|
||||
`Investigate: cache drift between compose pass and send pass?`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -655,6 +655,56 @@ async function fetchWikipediaInfobox(fund, fxRates) {
|
||||
|
||||
// ── Aggregation ──
|
||||
|
||||
/**
|
||||
* Pure predicate: should this manifest fund be SKIPPED from the
|
||||
* SWF buffer calculation? Returns the skip reason string or null.
|
||||
*
|
||||
* Two skip conditions (Phase 1 §schema):
|
||||
* - `excluded_overlaps_with_reserves: true` — AUM already counted
|
||||
* in central-bank FX reserves (SAFE-IC, HKMA-EF). Excluding
|
||||
* prevents double-counting against reserveAdequacy /
|
||||
* liquidReserveAdequacy.
|
||||
* - `aum_verified: false` — fund AUM not primary-source-confirmed.
|
||||
* Loaded for documentation; excluded from scoring per the
|
||||
* data-integrity rule (Codex Round 1 #7).
|
||||
*
|
||||
* Pure function — exported for tests.
|
||||
*
|
||||
* @param {{ classification?: { excludedOverlapsWithReserves?: boolean }, aumVerified?: boolean }} fund
|
||||
* @returns {'excluded_overlaps_with_reserves' | 'aum_unverified' | null}
|
||||
*/
|
||||
export function shouldSkipFundForBuffer(fund) {
|
||||
if (fund?.classification?.excludedOverlapsWithReserves === true) {
|
||||
return 'excluded_overlaps_with_reserves';
|
||||
}
|
||||
if (fund?.aumVerified === false) {
|
||||
return 'aum_unverified';
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pure helper: apply the `aum_pct_of_audited` multiplier to a
|
||||
* resolved AUM value. When the fund's classification has no
|
||||
* `aum_pct_of_audited`, returns the AUM unchanged.
|
||||
*
|
||||
* Used for fund-of-funds split entries (e.g. KIA-GRF is ~5% of the
|
||||
* audited KIA total; KIA-FGF is ~95%).
|
||||
*
|
||||
* Pure function — exported for tests.
|
||||
*
|
||||
* @param {number} resolvedAumUsd
|
||||
* @param {{ classification?: { aumPctOfAudited?: number } }} fund
|
||||
* @returns {number}
|
||||
*/
|
||||
export function applyAumPctOfAudited(resolvedAumUsd, fund) {
|
||||
const pct = fund?.classification?.aumPctOfAudited;
|
||||
if (typeof pct === 'number' && pct > 0 && pct <= 1) {
|
||||
return resolvedAumUsd * pct;
|
||||
}
|
||||
return resolvedAumUsd;
|
||||
}
|
||||
|
||||
async function fetchFundAum(fund, wikipediaCache, fxRates) {
|
||||
// Source priority: official → IFSWF → Wikipedia list → Wikipedia
|
||||
// per-fund infobox. Short-circuit on first non-null return so the
|
||||
@@ -779,22 +829,41 @@ export async function fetchSovereignWealth() {
|
||||
|
||||
const fundRecords = [];
|
||||
for (const fund of funds) {
|
||||
const aum = await fetchFundAum(fund, wikipediaCache, fxRates);
|
||||
const skipReason = shouldSkipFundForBuffer(fund);
|
||||
if (skipReason) {
|
||||
console.log(`[seed-sovereign-wealth] ${fund.country}:${fund.fund} skipped — ${skipReason}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
// AUM resolution: prefer manifest-provided primary-source AUM
|
||||
// when verified; fall back to the existing Wikipedia/IFSWF
|
||||
// resolution chain otherwise (existing entries that pre-date
|
||||
// the schema extension still work unchanged).
|
||||
let aum = null;
|
||||
if (fund.aumVerified === true && typeof fund.aumUsd === 'number') {
|
||||
aum = { aum: fund.aumUsd, aumYear: fund.aumYear ?? null, source: 'manifest_primary' };
|
||||
} else {
|
||||
aum = await fetchFundAum(fund, wikipediaCache, fxRates);
|
||||
}
|
||||
if (!aum) {
|
||||
unmatched.push(`${fund.country}:${fund.fund}`);
|
||||
continue;
|
||||
}
|
||||
|
||||
const adjustedAum = applyAumPctOfAudited(aum.aum, fund);
|
||||
const aumPct = fund.classification?.aumPctOfAudited;
|
||||
sourceMix[aum.source] = (sourceMix[aum.source] ?? 0) + 1;
|
||||
|
||||
const { access, liquidity, transparency } = fund.classification;
|
||||
const rawMonths = (aum.aum / denominatorImports) * 12;
|
||||
const rawMonths = (adjustedAum / denominatorImports) * 12;
|
||||
const effectiveMonths = rawMonths * access * liquidity * transparency;
|
||||
|
||||
fundRecords.push({
|
||||
fund: fund.fund,
|
||||
aum: aum.aum,
|
||||
aum: adjustedAum,
|
||||
aumYear: aum.aumYear,
|
||||
source: aum.source,
|
||||
...(aumPct != null ? { aumPctOfAudited: aumPct } : {}),
|
||||
access,
|
||||
liquidity,
|
||||
transparency,
|
||||
@@ -805,9 +874,23 @@ export async function fetchSovereignWealth() {
|
||||
|
||||
if (fundRecords.length === 0) continue;
|
||||
const totalEffectiveMonths = fundRecords.reduce((s, f) => s + f.effectiveMonths, 0);
|
||||
const expectedFunds = funds.length;
|
||||
// Completeness denominator excludes funds that were INTENTIONALLY
|
||||
// skipped from buffer scoring (excluded_overlaps_with_reserves OR
|
||||
// aum_verified=false). Without this, manifest entries that exist
|
||||
// for documentation only would artificially depress completeness
|
||||
// for countries with mixed scorable + non-scorable funds — e.g.
|
||||
// UAE (4 scorable + EIA unverified) would show completeness=0.8
|
||||
// even when every scorable fund matched, and CN (CIC + NSSF
|
||||
// scorable + SAFE-IC excluded) would show 0.67.
|
||||
//
|
||||
// The right denominator is "scorable funds for this country":
|
||||
// funds where shouldSkipFundForBuffer returns null. Documentation-
|
||||
// only entries are neither matched nor expected; they don't appear
|
||||
// in the ratio at all.
|
||||
const scorableFunds = funds.filter((f) => shouldSkipFundForBuffer(f) === null);
|
||||
const expectedFunds = scorableFunds.length;
|
||||
const matchedFunds = fundRecords.length;
|
||||
const completeness = matchedFunds / expectedFunds;
|
||||
const completeness = expectedFunds > 0 ? matchedFunds / expectedFunds : 0;
|
||||
// `completeness` signals partial-seed on multi-fund countries (AE,
|
||||
// SG). Downstream scorer must derate the country when completeness
|
||||
// < 1.0 — silently emitting partial totalEffectiveMonths would
|
||||
@@ -816,7 +899,7 @@ export async function fetchSovereignWealth() {
|
||||
// use the partial number for IMPUTE-level coverage), but only
|
||||
// completeness=1.0 countries count toward recordCount / health.
|
||||
if (completeness < 1.0) {
|
||||
console.warn(`[seed-sovereign-wealth] ${iso2} partial: ${matchedFunds}/${expectedFunds} funds matched — completeness=${completeness.toFixed(2)}`);
|
||||
console.warn(`[seed-sovereign-wealth] ${iso2} partial: ${matchedFunds}/${expectedFunds} scorable funds matched — completeness=${completeness.toFixed(2)}`);
|
||||
}
|
||||
countries[iso2] = {
|
||||
funds: fundRecords,
|
||||
@@ -886,8 +969,16 @@ export async function fetchSovereignWealth() {
|
||||
* @param {Record<string, { matchedFunds: number, expectedFunds: number, completeness: number }>} countries Seeded country payload
|
||||
*/
|
||||
export function buildCoverageSummary(manifest, imports, countries) {
|
||||
const expectedFundsTotal = manifest.funds.length;
|
||||
const expectedCountries = new Set(manifest.funds.map((f) => f.country));
|
||||
// Coverage denominator excludes manifest entries that are
|
||||
// documentation-only by design — funds with
|
||||
// `excluded_overlaps_with_reserves: true` (SAFE-IC, HKMA-EF) or
|
||||
// `aum_verified: false` (EIA). Counting them as "expected" would
|
||||
// depress the headline coverage ratio for countries with mixed
|
||||
// scorable + non-scorable fund rosters. Same fix as the per-country
|
||||
// completeness denominator above; see comment there.
|
||||
const scorableManifestFunds = manifest.funds.filter((f) => shouldSkipFundForBuffer(f) === null);
|
||||
const expectedFundsTotal = scorableManifestFunds.length;
|
||||
const expectedCountries = new Set(scorableManifestFunds.map((f) => f.country));
|
||||
let matchedFundsTotal = 0;
|
||||
for (const entry of Object.values(countries)) matchedFundsTotal += entry.matchedFunds;
|
||||
// Every status carries a `reason` field so downstream consumers that
|
||||
@@ -925,8 +1016,18 @@ export function buildCoverageSummary(manifest, imports, countries) {
|
||||
}
|
||||
|
||||
function countManifestFundsForCountry(manifest, iso2) {
|
||||
// Counts SCORABLE funds for the given country (excludes documentation-
|
||||
// only entries: `excluded_overlaps_with_reserves: true` and
|
||||
// `aum_verified: false`). Used by buildCoverageSummary's missing-
|
||||
// country path so the "expected" figure on a missing country reflects
|
||||
// what the seeder would actually try to score, not all manifest
|
||||
// entries.
|
||||
let n = 0;
|
||||
for (const f of manifest.funds) if (f.country === iso2) n++;
|
||||
for (const f of manifest.funds) {
|
||||
if (f.country !== iso2) continue;
|
||||
if (shouldSkipFundForBuffer(f) !== null) continue;
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
@@ -166,54 +166,70 @@ function describeErr(err) {
|
||||
return causeCode ? `${err.message} (cause: ${causeCode})` : (err.message || String(err));
|
||||
}
|
||||
|
||||
async function fetchGdeltDirect(url) {
|
||||
const resp = await fetch(url, {
|
||||
headers: { Accept: 'application/json', 'User-Agent': CHROME_UA },
|
||||
signal: AbortSignal.timeout(30_000),
|
||||
});
|
||||
if (!resp.ok) throw Object.assign(new Error(`GDELT API error: ${resp.status}`), { httpStatus: resp.status });
|
||||
return resp.json();
|
||||
// Direct fetch from Railway has 0% success — every attempt errors with
|
||||
// UND_ERR_CONNECT_TIMEOUT or ECONNRESET. Path is always proxy-only here.
|
||||
// Decodo→Cloudflare→GDELT occasionally returns 522 or RSTs the TLS handshake
|
||||
// (~80% per single attempt in production); retry-with-jitter recovers most of
|
||||
// it without touching the cron interval.
|
||||
//
|
||||
// Test seams:
|
||||
// _proxyFetcher — replaces httpsProxyFetchRaw (default production wiring).
|
||||
// _sleep — replaces the inter-attempt jitter delay.
|
||||
// _maxAttempts — replaces the default 3 (lets tests bound iterations).
|
||||
// _jitter — replaces Math.random()-based jitter (deterministic in tests).
|
||||
export async function fetchGdeltViaProxy(url, proxyAuth, opts = {}) {
|
||||
const {
|
||||
_proxyFetcher = httpsProxyFetchRaw,
|
||||
_sleep = (ms) => new Promise((r) => setTimeout(r, ms)),
|
||||
_maxAttempts = 3,
|
||||
_jitter = () => 1500 + Math.random() * 1500,
|
||||
} = opts;
|
||||
let lastErr;
|
||||
for (let attempt = 1; attempt <= _maxAttempts; attempt++) {
|
||||
try {
|
||||
const { buffer } = await _proxyFetcher(url, proxyAuth, {
|
||||
accept: 'application/json',
|
||||
timeoutMs: 45_000,
|
||||
});
|
||||
return JSON.parse(buffer.toString('utf8'));
|
||||
} catch (err) {
|
||||
lastErr = err;
|
||||
// JSON.parse on a successfully fetched body is deterministic — retrying
|
||||
// can't recover. Bail immediately so we don't burn three attempts on
|
||||
// a malformed-but-cached upstream response.
|
||||
if (err instanceof SyntaxError) throw err;
|
||||
if (attempt < _maxAttempts) {
|
||||
console.warn(` [GDELT] proxy attempt ${attempt}/${_maxAttempts} failed (${describeErr(err)}); retrying`);
|
||||
await _sleep(_jitter());
|
||||
}
|
||||
}
|
||||
}
|
||||
throw lastErr;
|
||||
}
|
||||
|
||||
async function fetchGdeltViaProxy(url, proxyAuth) {
|
||||
// GDELT v1 gkg_geojson responds in ~19s when degraded; 20s timeout caused
|
||||
// chronic "HTTP 522" / "CONNECT tunnel timeout" from Decodo, freezing
|
||||
// seed-meta and firing STALE_SEED across health. 45s absorbs that variance
|
||||
// with headroom.
|
||||
const { buffer } = await httpsProxyFetchRaw(url, proxyAuth, {
|
||||
accept: 'application/json',
|
||||
timeoutMs: 45_000,
|
||||
});
|
||||
return JSON.parse(buffer.toString('utf8'));
|
||||
}
|
||||
|
||||
async function fetchGdeltEvents() {
|
||||
export async function fetchGdeltEvents(opts = {}) {
|
||||
const { _resolveProxyForConnect = resolveProxyForConnect, ..._proxyOpts } = opts;
|
||||
const params = new URLSearchParams({
|
||||
query: 'protest OR riot OR demonstration OR strike',
|
||||
maxrows: '2500',
|
||||
});
|
||||
const url = `${GDELT_GKG_URL}?${params}`;
|
||||
|
||||
const proxyAuth = _resolveProxyForConnect();
|
||||
if (!proxyAuth) {
|
||||
// Direct fetch hasn't worked from Railway since PR #3256; this seeder
|
||||
// hard-requires a CONNECT proxy. Surface the env var ops needs to set.
|
||||
throw new Error('GDELT requires CONNECT proxy: PROXY_URL env var is not set on this Railway service');
|
||||
}
|
||||
|
||||
let data;
|
||||
try {
|
||||
data = await fetchGdeltDirect(url);
|
||||
} catch (directErr) {
|
||||
// Upstream HTTP error (4xx/5xx) — proxy routes to the same GDELT endpoint so
|
||||
// it won't change the response. Save the 20s proxy timeout and bubble up.
|
||||
if (directErr.httpStatus) throw directErr;
|
||||
const proxyAuth = resolveProxyForConnect();
|
||||
if (!proxyAuth) {
|
||||
throw Object.assign(new Error(`GDELT direct failed (no proxy configured): ${describeErr(directErr)}`), { cause: directErr });
|
||||
}
|
||||
console.warn(` [GDELT] direct failed (${describeErr(directErr)}); retrying via proxy`);
|
||||
try {
|
||||
data = await fetchGdeltViaProxy(url, proxyAuth);
|
||||
} catch (proxyErr) {
|
||||
throw Object.assign(
|
||||
new Error(`GDELT both paths failed — direct: ${describeErr(directErr)}; proxy: ${describeErr(proxyErr)}`),
|
||||
{ cause: proxyErr },
|
||||
);
|
||||
}
|
||||
data = await fetchGdeltViaProxy(url, proxyAuth, _proxyOpts);
|
||||
} catch (proxyErr) {
|
||||
throw Object.assign(
|
||||
new Error(`GDELT proxy failed (3 attempts): ${describeErr(proxyErr)}`),
|
||||
{ cause: proxyErr },
|
||||
);
|
||||
}
|
||||
|
||||
const features = data?.features || [];
|
||||
@@ -298,15 +314,22 @@ export function declareRecords(data) {
|
||||
return Array.isArray(data?.events) ? data.events.length : 0;
|
||||
}
|
||||
|
||||
runSeed('unrest', 'events', CANONICAL_KEY, fetchUnrestEvents, {
|
||||
validateFn: validate,
|
||||
ttlSeconds: CACHE_TTL,
|
||||
sourceVersion: 'acled+gdelt',
|
||||
// Gate the runSeed entry-point so this module is importable from tests
|
||||
// without triggering a real seed run. process.argv[1] is set when this file
|
||||
// is invoked as a script (`node scripts/seed-unrest-events.mjs`); under
|
||||
// `node --test`, argv[1] is the test runner, not this file.
|
||||
const isMain = import.meta.url === `file://${process.argv[1]}`;
|
||||
if (isMain) {
|
||||
runSeed('unrest', 'events', CANONICAL_KEY, fetchUnrestEvents, {
|
||||
validateFn: validate,
|
||||
ttlSeconds: CACHE_TTL,
|
||||
sourceVersion: 'acled+gdelt',
|
||||
|
||||
declareRecords,
|
||||
schemaVersion: 1,
|
||||
maxStaleMin: 120,
|
||||
}).catch((err) => {
|
||||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; console.error('FATAL:', (err.message || err) + _cause);
|
||||
process.exit(1);
|
||||
});
|
||||
declareRecords,
|
||||
schemaVersion: 1,
|
||||
maxStaleMin: 120,
|
||||
}).catch((err) => {
|
||||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; console.error('FATAL:', (err.message || err) + _cause);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
# re-runs the seeder against the new entry to confirm 8/N live match.
|
||||
|
||||
manifest_version: 1
|
||||
last_reviewed: 2026-04-23
|
||||
last_reviewed: 2026-04-25
|
||||
# REVIEWED means: coefficients derive from the committed rationale +
|
||||
# sources block and the seeder end-to-end matches the expected funds
|
||||
# against the live Wikipedia / IFSWF / official-disclosure surfaces.
|
||||
@@ -111,13 +111,26 @@ funds:
|
||||
abbrev: ADIA
|
||||
fund_name: Abu Dhabi Investment Authority
|
||||
classification:
|
||||
access: 0.3
|
||||
# Phase 1E re-audit (Plan 2026-04-25-001): bumped from 0.3 → 0.4.
|
||||
# ADIA's official mandate is intergenerational, but its
|
||||
# ruler-discretionary deployment pattern (2009 Mubadala bailout
|
||||
# precedent, recurring budget-support contributions to Abu Dhabi
|
||||
# treasury) reflects higher empirical access than the 0.3
|
||||
# "intergenerational" tier suggests. The 0.4 mid-tier value sits
|
||||
# between intergenerational (0.3) and hybrid-constrained (0.5).
|
||||
access: 0.4
|
||||
liquidity: 0.7
|
||||
transparency: 0.5
|
||||
rationale:
|
||||
access: |
|
||||
Intergenerational savings mandate; no explicit stabilization
|
||||
access rule. Ruler-discretionary deployment. Low-medium access.
|
||||
Official mandate is long-horizon intergenerational savings,
|
||||
but ruler-discretionary deployment has been demonstrated:
|
||||
Mubadala 2009 bailout precedent, periodic budget-support
|
||||
contributions to Abu Dhabi treasury, strategic infusions
|
||||
during Dubai's 2009 GCC crisis. Empirical access falls
|
||||
between the strict intergenerational tier (0.3) and the
|
||||
hybrid-constrained tier (0.5). Phase 1E re-audit bumped
|
||||
the score from 0.3 → 0.4 to reflect this.
|
||||
liquidity: |
|
||||
ADIA 2024 review discloses ~55-70% public-market (equities +
|
||||
bonds) allocation, balance in alternatives and real assets.
|
||||
@@ -134,24 +147,143 @@ funds:
|
||||
wikipedia:
|
||||
fund_name: Mubadala Investment Company
|
||||
classification:
|
||||
access: 0.4
|
||||
# Phase 1E re-audit (Plan 2026-04-25-001): rubric flagged
|
||||
# Mubadala's pre-PR access=0.4 as below the 0.5 hybrid-tier
|
||||
# midpoint and transparency=0.6 as under-rated for an LM=10
|
||||
# IFSWF full member. Both bumped to align with the rubric.
|
||||
access: 0.5
|
||||
liquidity: 0.5
|
||||
transparency: 0.6
|
||||
transparency: 0.7
|
||||
rationale:
|
||||
access: |
|
||||
Strategic + financial hybrid mandate — combines economic-
|
||||
diversification assets with financial investments. Medium
|
||||
access for fiscal support; constrained by strategic holdings.
|
||||
diversification assets with financial investments. The 2024
|
||||
ADQ-related corporate restructuring (consolidation of Abu
|
||||
Dhabi state investment vehicles) reinforces Mubadala's
|
||||
treatment as a hybrid-constrained 0.5-tier vehicle: deployable
|
||||
for fiscal support, constrained by strategic holdings.
|
||||
Access bumped from 0.4 → 0.5 in Phase 1E re-audit.
|
||||
liquidity: |
|
||||
Mixed: ~50% public equities + credit, ~50% private equity,
|
||||
real estate, infrastructure (Mubadala 2024 annual report).
|
||||
transparency: |
|
||||
Audited AUM published, asset-mix disclosed annually. IFSWF
|
||||
member. LM index = 10.
|
||||
full member. LM index = 10. Bumped from 0.6 → 0.7 in
|
||||
Phase 1E re-audit to align with the rubric tier (0.7 = audited
|
||||
AUM + asset-class mix + returns disclosed annually).
|
||||
sources:
|
||||
- https://www.mubadala.com/en/annual-review
|
||||
- https://www.ifswf.org/member-profiles/mubadala-investment-company
|
||||
|
||||
# Investment Corporation of Dubai — Dubai-government holding
|
||||
# company. Owns Emirates Airlines, ENBD, Dubai Holdings, and
|
||||
# significant Emaar / DEWA stakes. Distinct from ADIA (Abu Dhabi
|
||||
# emirate) and Mubadala (Abu Dhabi strategic). Phase 1A addition.
|
||||
- country: AE
|
||||
fund: icd
|
||||
display_name: Investment Corporation of Dubai (ICD)
|
||||
wikipedia:
|
||||
abbrev: ICD
|
||||
fund_name: Investment Corporation of Dubai
|
||||
aum_usd: 320000000000
|
||||
aum_year: 2023
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.5
|
||||
liquidity: 0.5
|
||||
transparency: 0.4
|
||||
rationale:
|
||||
access: |
|
||||
Dubai-government holding company; finances Dubai-emirate
|
||||
budget directly during shocks. 2009 GCC bailout precedent
|
||||
when ICD-affiliated entities (Dubai World, Nakheel) were
|
||||
actively rolled into broader fiscal support. Hybrid-
|
||||
constrained 0.5 tier — deployment is mechanically possible
|
||||
but constrained by strategic holdings (Emirates Airlines,
|
||||
ENBD).
|
||||
liquidity: |
|
||||
Mixed portfolio: Emaar + ENBD + DEWA + Borse Dubai listed
|
||||
equity stakes (~50% public market) balanced against
|
||||
Emirates Airlines + Dubai Holdings real estate (~50%
|
||||
private). 0.5 mid-liquidity tier.
|
||||
transparency: |
|
||||
ICD publishes consolidated audited financial highlights but
|
||||
does not disclose holdings-level detail. LM index ~4.
|
||||
Annual review available via icd.gov.ae but constituent
|
||||
AUM disclosure is partial.
|
||||
sources:
|
||||
- https://www.icd.gov.ae/en/about-icd/our-portfolio/
|
||||
- https://www.icd.gov.ae/en/news-and-publications/
|
||||
|
||||
# ADQ (Abu Dhabi Developmental Holding Company) — strategic
|
||||
# investment vehicle established 2018, distinct from ADIA and
|
||||
# Mubadala. Phase 1A addition.
|
||||
- country: AE
|
||||
fund: adq
|
||||
display_name: Abu Dhabi Developmental Holding Company (ADQ)
|
||||
wikipedia:
|
||||
abbrev: ADQ
|
||||
fund_name: ADQ
|
||||
aum_usd: 199000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.5
|
||||
liquidity: 0.4
|
||||
transparency: 0.4
|
||||
rationale:
|
||||
access: |
|
||||
Hybrid strategic-investment vehicle under Abu Dhabi
|
||||
government control. Mandate covers economic diversification
|
||||
+ strategic asset stewardship. Medium access for fiscal
|
||||
support; access discipline closer to Mubadala than to ADIA's
|
||||
long-horizon savings tier.
|
||||
liquidity: |
|
||||
Heavy in private equity, food + agriculture (Agthia, Modern
|
||||
Bakery), real estate, healthcare, and energy. Limited public-
|
||||
market exposure relative to ADIA. ~0.4 mid-low liquidity.
|
||||
transparency: |
|
||||
ADQ publishes AUM and asset-class summaries via corporate
|
||||
press releases and select press disclosures (Reuters, FT
|
||||
primary reporting). IFSWF observer-only. LM index ~4.
|
||||
sources:
|
||||
- https://www.adq.ae/about-us/
|
||||
- https://www.adq.ae/news-and-insights/
|
||||
|
||||
# Emirates Investment Authority (EIA) — federal-level UAE wealth
|
||||
# vehicle distinct from emirate-level (ADIA, ICD, ADQ, Mubadala)
|
||||
# funds. Limited public disclosure — primary-source AUM not
|
||||
# verifiable as of 2026-04-25; entry loaded for documentation but
|
||||
# excluded from buffer scoring per data-integrity rule
|
||||
# (`aum_verified: false`). Phase 1A addition; revisit when EIA
|
||||
# publishes audited consolidated statements.
|
||||
- country: AE
|
||||
fund: eia
|
||||
display_name: Emirates Investment Authority (EIA)
|
||||
aum_verified: false
|
||||
classification:
|
||||
access: 0.4
|
||||
liquidity: 0.5
|
||||
transparency: 0.2
|
||||
rationale:
|
||||
access: |
|
||||
Federal-level UAE reserves vehicle (cf. emirate-level ADIA/
|
||||
ICD). Mandate covers federal fiscal stabilization for
|
||||
emirate-fiscal-shock smoothing. Access is constrained by
|
||||
federal/emirate political coordination; rated mid-low.
|
||||
liquidity: |
|
||||
Limited public disclosure; mix presumed to mirror federal-
|
||||
reserves convention (majority public-market) but not
|
||||
verified.
|
||||
transparency: |
|
||||
Limited public disclosure. Reuters cites federal-level AUM
|
||||
figures but EIA itself does not publish audited annual
|
||||
statements at the level of ADIA / Mubadala / ICD. LM index
|
||||
~2. Marked `aum_verified: false` until primary disclosure
|
||||
materializes.
|
||||
sources:
|
||||
- https://www.eia.gov.ae/en/
|
||||
|
||||
# ── Saudi Arabia ──
|
||||
# PIF combines stabilization, strategic-diversification, and domestic
|
||||
# development mandates. Asset mix is heavily domestic-strategic
|
||||
@@ -182,29 +314,97 @@ funds:
|
||||
- https://www.ifswf.org/members
|
||||
|
||||
# ── Kuwait ──
|
||||
# KIA runs two legally distinct funds: General Reserve Fund (budget-
|
||||
# financing) and Future Generations Fund (intergenerational). Combined
|
||||
# here since audited AUM is reported at the KIA level.
|
||||
# KIA's audited $1.072T AUM is split here per Kuwaiti Public Funds
|
||||
# Law and Decree 106 of 1976 (FGF) into two sleeves with materially
|
||||
# different access profiles. Phase 1B addition (Plan 2026-04-25-001).
|
||||
# Combined-AUM modeling overstated the crisis-deployable balance by
|
||||
# ~18× (2026-04-24 audit) because GRF's `access=0.7` haircut was
|
||||
# applied to the full $1.072T, when ~95% of that AUM is FGF and
|
||||
# constitutionally gated. The split correctly attributes GRF's
|
||||
# stabilization mandate to its own ~5% sleeve and FGF's
|
||||
# statutorily-gated long-horizon profile to the remaining ~95%.
|
||||
- country: KW
|
||||
fund: kia
|
||||
display_name: Kuwait Investment Authority (KIA)
|
||||
fund: kia-grf
|
||||
display_name: Kuwait Investment Authority — General Reserve Fund (KIA-GRF)
|
||||
wikipedia:
|
||||
# Wikipedia/SWFI report the COMBINED audited KIA AUM; the loader
|
||||
# multiplies by aum_pct_of_audited (5% for GRF, 95% for FGF) to
|
||||
# recover per-sleeve effective balance. No Wikipedia abbrev
|
||||
# specifically for GRF.
|
||||
abbrev: KIA
|
||||
fund_name: Kuwait Investment Authority
|
||||
classification:
|
||||
access: 0.7
|
||||
liquidity: 0.8
|
||||
transparency: 0.4
|
||||
aum_pct_of_audited: 0.05
|
||||
rationale:
|
||||
access: |
|
||||
General Reserve Fund explicitly finances budget shortfalls from
|
||||
oil-revenue swings. Strongest stabilization access in the Gulf.
|
||||
Kuwaiti Public Finance Law explicitly directs GRF to absorb
|
||||
oil-revenue shortfalls and finance budget deficits. Drained
|
||||
to negative balance during 2020 COVID shock; refilled by
|
||||
domestic + international borrowing. The 2020 deployment
|
||||
required active Council-of-Ministers authorization (NOT
|
||||
post-hoc/symbolic), which keeps GRF in the rubric's 0.7
|
||||
"Explicit stabilization with rule" tier — a legislated
|
||||
mechanism with deployment precedent — rather than the 0.9
|
||||
"automatic stabilization" tier (which requires
|
||||
rule-triggered automatic deployment, e.g. Chile ESSF).
|
||||
The original combined-KIA `access=0.7` matched this tier;
|
||||
kept here for the GRF sleeve.
|
||||
liquidity: |
|
||||
Predominantly public-market (~75-85% listed equities + fixed
|
||||
income). Private-asset sleeve is a minority allocation.
|
||||
Same portfolio profile as KIA-FGF — classification independent.
|
||||
transparency: |
|
||||
Financials reported to National Assembly but sealed from
|
||||
public; partial IFSWF engagement. LM index = 6.
|
||||
aum_pct_of_audited: |
|
||||
GRF receives oil revenues and finances the budget; its
|
||||
steady-state balance is roughly 5% of KIA's combined audited
|
||||
AUM. The fraction varies year-to-year (negative in 2020-21
|
||||
during COVID, refilled by 2022-23). Using a representative
|
||||
steady-state share avoids dependency on year-specific
|
||||
balances that aren't separately disclosed.
|
||||
sources:
|
||||
- https://www.kia.gov.kw/en/
|
||||
- https://www.ifswf.org/member-profiles/kuwait-investment-authority
|
||||
|
||||
- country: KW
|
||||
fund: kia-fgf
|
||||
display_name: Kuwait Investment Authority — Future Generations Fund (KIA-FGF)
|
||||
wikipedia:
|
||||
abbrev: KIA
|
||||
fund_name: Kuwait Investment Authority
|
||||
classification:
|
||||
access: 0.20
|
||||
liquidity: 0.8
|
||||
transparency: 0.4
|
||||
aum_pct_of_audited: 0.95
|
||||
rationale:
|
||||
access: |
|
||||
FGF withdrawals require Council-of-Ministers + Emir decree
|
||||
(Decree 106 of 1976; 2020 amendment did NOT remove the gate,
|
||||
only added an emergency-pathway provision used once during
|
||||
COVID for a small, capped draw). Not legally accessible for
|
||||
ordinary stabilization. Score 0.20 reflects: (a) below the
|
||||
0.3 "intergenerational/ruler-discretionary" tier because
|
||||
the gate is bicameral-equivalent + statutory, (b) above
|
||||
the 0.1 "sanctions/frozen" tier because the gate has been
|
||||
crossed in extremis. Anchors a NEW rubric tier between 0.1
|
||||
and 0.3 (see swf-classification-rubric.md "Statutorily-gated
|
||||
long-horizon" tier added in Phase 1B).
|
||||
liquidity: |
|
||||
Same portfolio profile as GRF; classification independent.
|
||||
transparency: |
|
||||
Same portfolio profile as GRF; reported to National Assembly
|
||||
but sealed from public; partial IFSWF engagement. LM index = 6.
|
||||
aum_pct_of_audited: |
|
||||
FGF receives 10% of state revenue annually + accumulated
|
||||
returns; in steady state holds ~95% of KIA's combined
|
||||
audited AUM. The 5/95 split is per Kuwait's Public Funds
|
||||
Law and is the canonical proportion published by KIA in
|
||||
IFSWF disclosures.
|
||||
sources:
|
||||
- https://www.kia.gov.kw/en/
|
||||
- https://www.ifswf.org/member-profiles/kuwait-investment-authority
|
||||
@@ -302,6 +502,304 @@ funds:
|
||||
- https://www.temasekreview.com.sg/
|
||||
- https://www.temasek.com.sg/en/our-financials
|
||||
|
||||
# ── China ──
|
||||
# Phase 1C addition (Plan 2026-04-25-001). Three CN sovereign-wealth
|
||||
# vehicles tracked by SWFI/IFSWF; SAFE Investment Co is excluded
|
||||
# from the buffer dim because its AUM is part of the SAFE umbrella
|
||||
# consolidated FX reserves (already counted in reserveAdequacy /
|
||||
# liquidReserveAdequacy).
|
||||
- country: CN
|
||||
fund: cic
|
||||
display_name: China Investment Corporation (CIC)
|
||||
wikipedia:
|
||||
abbrev: CIC
|
||||
fund_name: China Investment Corporation
|
||||
aum_usd: 1350000000000
|
||||
aum_year: 2023
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.4
|
||||
liquidity: 0.5
|
||||
transparency: 0.3
|
||||
rationale:
|
||||
access: |
|
||||
Hybrid strategic-investment vehicle with PBOC + MOF
|
||||
coordination required for major redeployment. Long-horizon
|
||||
external mandate (CIC International) plus state-directed
|
||||
domestic financial holdings (Central Huijin) — flexible in
|
||||
principle but politically constrained. 0.4 mid-tier.
|
||||
liquidity: |
|
||||
~50% public-market (listed equities + bonds via CIC
|
||||
International), ~50% private (Central Huijin domestic banking
|
||||
stakes, alternative investments). Mid-liquidity.
|
||||
transparency: |
|
||||
Annual report publishes AUM and asset-class summary; holdings-
|
||||
level disclosure limited to large public stakes (13F-equivalent
|
||||
for U.S. holdings). LM index ~3-4.
|
||||
sources:
|
||||
- http://www.china-inv.cn/en/
|
||||
- https://www.swfinstitute.org/profile/cic
|
||||
|
||||
- country: CN
|
||||
fund: nssf
|
||||
display_name: National Council for Social Security Fund (NSSF)
|
||||
wikipedia:
|
||||
fund_name: National Council for Social Security Fund
|
||||
aum_usd: 400000000000
|
||||
aum_year: 2023
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.20
|
||||
liquidity: 0.5
|
||||
transparency: 0.4
|
||||
rationale:
|
||||
access: |
|
||||
Pension-purpose: NSSF holdings are statutorily reserved for
|
||||
social-security payment obligations. Withdrawals for
|
||||
non-pension fiscal stabilization are not permitted under
|
||||
current Chinese law. Maps to the 0.20 "statutorily-gated
|
||||
long-horizon" tier added in Phase 1B (KIA-FGF analogue).
|
||||
liquidity: |
|
||||
Mix of listed equities, fixed income, and strategic
|
||||
unlisted holdings (banking IPO seedings). Mid-liquidity.
|
||||
transparency: |
|
||||
Annual report publishes AUM totals + broad allocation; per-
|
||||
holding disclosure limited.
|
||||
sources:
|
||||
- http://www.ssf.gov.cn/
|
||||
- https://www.swfinstitute.org/profile/nssf
|
||||
|
||||
- country: CN
|
||||
fund: safe-ic
|
||||
display_name: SAFE Investment Company Limited
|
||||
aum_usd: 417000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.5
|
||||
liquidity: 0.7
|
||||
transparency: 0.3
|
||||
excluded_overlaps_with_reserves: true
|
||||
rationale:
|
||||
access: |
|
||||
Subsidiary of the State Administration of Foreign Exchange
|
||||
(SAFE); manages a portion of China's FX reserves abroad.
|
||||
Documentation-only — see excluded rationale.
|
||||
liquidity: |
|
||||
Predominantly listed equities + sovereign bonds (FX-reserve-
|
||||
like portfolio composition).
|
||||
transparency: |
|
||||
Limited public disclosure; AUM tracked via SWFI third-party
|
||||
estimates. LM index ~2.
|
||||
excluded_overlaps_with_reserves: |
|
||||
SAFE Investment Co's AUM is part of China's State Administration
|
||||
of Foreign Exchange consolidated reserves. Including it in the
|
||||
SWF buffer would double-count against `reserveAdequacy` /
|
||||
`liquidReserveAdequacy`, both of which already capture
|
||||
SAFE-managed FX reserves. Documentation-only entry; excluded
|
||||
from buffer scoring.
|
||||
sources:
|
||||
- https://www.swfinstitute.org/profile/safe-investment-company
|
||||
|
||||
# ── Hong Kong ──
|
||||
# HKMA Exchange Fund. Backing Portfolio is reserves-equivalent
|
||||
# (already in reserveAdequacy); Investment Portfolio + Future Fund
|
||||
# branch could be SWF-relevant but the consolidated AUM is
|
||||
# reported as one figure. Excluded from buffer to avoid double-
|
||||
# counting against HK monetary-authority reserves.
|
||||
- country: HK
|
||||
fund: hkma-ef
|
||||
display_name: HKMA Exchange Fund
|
||||
aum_usd: 498000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.7
|
||||
liquidity: 0.9
|
||||
transparency: 0.7
|
||||
excluded_overlaps_with_reserves: true
|
||||
rationale:
|
||||
access: |
|
||||
Statutory mandate: maintain HKD peg + back banking system.
|
||||
High access for monetary stabilization; stabilization is the
|
||||
primary mandate. Documentation-only — see excluded rationale.
|
||||
liquidity: |
|
||||
Predominantly listed equities + sovereign bonds + USD cash.
|
||||
Highly liquid by design (Backing Portfolio is reserves).
|
||||
transparency: |
|
||||
HKMA publishes monthly Exchange Fund balance sheet + annual
|
||||
report. LM index ~8.
|
||||
excluded_overlaps_with_reserves: |
|
||||
HKMA Exchange Fund's Backing Portfolio is reserves-equivalent
|
||||
and is captured under `reserveAdequacy` / `liquidReserveAdequacy`.
|
||||
Investment Portfolio + Future Fund are not separately disclosed
|
||||
at AUM level. To avoid double-counting, excluded from buffer.
|
||||
Implementation-Time Unknown #2 in the parent plan flagged this
|
||||
for follow-up: when HKMA discloses Investment Portfolio AUM
|
||||
separately, the Investment Portfolio sleeve could be added as
|
||||
a non-excluded entry.
|
||||
sources:
|
||||
- https://www.hkma.gov.hk/eng/data-publications-and-research/
|
||||
|
||||
# ── South Korea ──
|
||||
- country: KR
|
||||
fund: kic
|
||||
display_name: Korea Investment Corporation (KIC)
|
||||
wikipedia:
|
||||
abbrev: KIC
|
||||
fund_name: Korea Investment Corporation
|
||||
aum_usd: 182000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.3
|
||||
liquidity: 0.7
|
||||
transparency: 0.7
|
||||
rationale:
|
||||
access: |
|
||||
Long-horizon mandate; KIC manages assets under MOEF + Bank
|
||||
of Korea entrustment but does not have an explicit
|
||||
stabilization mandate. Withdrawals require entrustment
|
||||
agreement modification. Intergenerational tier.
|
||||
liquidity: |
|
||||
~70% public-market (listed equities + sovereign + corporate
|
||||
bonds), ~30% alternatives (private equity, real estate,
|
||||
infrastructure, hedge funds).
|
||||
transparency: |
|
||||
IFSWF full member. Annual report published with detailed
|
||||
asset allocation, returns, and partial holdings. LM index ~7.
|
||||
sources:
|
||||
- https://www.kic.kr/en/
|
||||
- https://www.ifswf.org/member-profiles/korea-investment-corporation
|
||||
|
||||
# ── Oman ──
|
||||
# OIA established 2020 by merging State General Reserve Fund (SGRF)
|
||||
# + Oman Investment Fund (OIF). IFSWF member; rubric pre-flagged
|
||||
# as "Shippable". Phase 1D addition.
|
||||
- country: OM
|
||||
fund: oia
|
||||
display_name: Oman Investment Authority (OIA)
|
||||
wikipedia:
|
||||
abbrev: OIA
|
||||
fund_name: Oman Investment Authority
|
||||
aum_usd: 50000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.5
|
||||
liquidity: 0.5
|
||||
transparency: 0.5
|
||||
rationale:
|
||||
access: |
|
||||
Hybrid mandate: SGRF (now part of OIA) historically funded
|
||||
budget shortfalls during oil downturns; OIF (now part of OIA)
|
||||
was the strategic vehicle. Combined OIA inherits both;
|
||||
deployment for fiscal support is mechanically possible but
|
||||
coordinated through Ministry of Finance. Mid-tier hybrid.
|
||||
liquidity: |
|
||||
Mixed: external public-market (managed by external managers)
|
||||
+ domestic strategic stakes + alternative investments. Mid.
|
||||
transparency: |
|
||||
OIA publishes annual review post-2020 merger; IFSWF full
|
||||
member. LM index ~6.
|
||||
sources:
|
||||
- https://www.oia.gov.om/
|
||||
- https://www.ifswf.org/member-profiles/oman-investment-authority
|
||||
|
||||
# ── Bahrain ──
|
||||
- country: BH
|
||||
fund: mumtalakat
|
||||
display_name: Mumtalakat Holding Company
|
||||
wikipedia:
|
||||
fund_name: Mumtalakat Holding Company
|
||||
aum_usd: 19000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.4
|
||||
liquidity: 0.4
|
||||
transparency: 0.6
|
||||
rationale:
|
||||
access: |
|
||||
Bahraini state strategic-investment vehicle. Holdings
|
||||
concentrated in domestic banking (BBK, NBB), aluminum
|
||||
(ALBA), telecoms (Batelco). Deployable for fiscal support
|
||||
via dividend flow but not via primary-asset liquidation
|
||||
without disrupting strategic positions.
|
||||
liquidity: |
|
||||
Domestic strategic holdings + foreign-listed equity stakes.
|
||||
Mid-low liquidity.
|
||||
transparency: |
|
||||
Audited annual report, asset-class disclosures. IFSWF
|
||||
member. LM index ~7.
|
||||
sources:
|
||||
- https://www.mumtalakat.bh/en
|
||||
- https://www.ifswf.org/member-profiles/mumtalakat-holding-company
|
||||
|
||||
# ── Timor-Leste ──
|
||||
# Petroleum Fund of Timor-Leste — rubric flagged as
|
||||
# "high-transparency textbook fit". High-transparency fund
|
||||
# benchmark; Banco Central de Timor-Leste publishes monthly
|
||||
# statements + annual reports. Phase 1D addition.
|
||||
- country: TL
|
||||
fund: petroleum-fund
|
||||
display_name: Petroleum Fund of Timor-Leste
|
||||
aum_usd: 22000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.7
|
||||
liquidity: 0.9
|
||||
transparency: 0.9
|
||||
rationale:
|
||||
access: |
|
||||
Norwegian-model petroleum fund: fiscal-rule-based annual
|
||||
Estimated Sustainable Income (ESI) drawdown for budget
|
||||
support. Statutorily codified deployment trigger; closer
|
||||
to GPFG's rule-based stabilization than to KIA-FGF's
|
||||
long-horizon lock. 0.7 explicit-stabilization tier.
|
||||
liquidity: |
|
||||
~60% sovereign bonds, ~40% global equities. Highly liquid.
|
||||
transparency: |
|
||||
Monthly statements published by Banco Central de Timor-Leste;
|
||||
annual report with full holdings disclosure. LM index ~9.
|
||||
sources:
|
||||
- https://www.bancocentral.tl/en/petroleum-fund
|
||||
- https://www.swfinstitute.org/profile/petroleum-fund-of-timor-leste
|
||||
|
||||
# ── Australia ──
|
||||
- country: AU
|
||||
fund: future-fund
|
||||
display_name: Australian Future Fund
|
||||
wikipedia:
|
||||
fund_name: Future Fund (Australia)
|
||||
aum_usd: 192000000000
|
||||
aum_year: 2024
|
||||
aum_verified: true
|
||||
classification:
|
||||
access: 0.3
|
||||
liquidity: 0.5
|
||||
transparency: 0.8
|
||||
rationale:
|
||||
access: |
|
||||
Established 2006 to fund Commonwealth of Australia
|
||||
unfunded superannuation liabilities. Statutorily restricted
|
||||
from drawdown until 2027 (originally — extended). Long-
|
||||
horizon savings tier. Australian fiscal practice has used
|
||||
Future Fund AUM as a buffer signal in budget discussions
|
||||
but no operational drawdown has occurred for stabilization.
|
||||
liquidity: |
|
||||
Mixed: ~30% listed equities, ~25% alternatives, ~20% bonds,
|
||||
~15% real estate + infrastructure, ~10% private equity.
|
||||
Mid-liquidity.
|
||||
transparency: |
|
||||
Quarterly portfolio updates with asset-class breakdowns;
|
||||
annual report with detailed performance + holdings discussion.
|
||||
LM index ~9.
|
||||
sources:
|
||||
- https://www.futurefund.gov.au/
|
||||
- https://www.swfinstitute.org/profile/australia-future-fund
|
||||
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
# CANDIDATES DEFERRED FROM V1
|
||||
# ────────────────────────────────────────────────────────────────────
|
||||
|
||||
@@ -31,6 +31,23 @@ const MANIFEST_PATH = resolve(here, './swf-classification-manifest.yaml');
|
||||
* @property {number} access 0..1 inclusive
|
||||
* @property {number} liquidity 0..1 inclusive
|
||||
* @property {number} transparency 0..1 inclusive
|
||||
* @property {number} [aumPctOfAudited] OPTIONAL 0..1; multiplier applied
|
||||
* to the matched audited AUM, used
|
||||
* when one entry represents only a
|
||||
* fraction of a combined audited
|
||||
* fund (e.g. KIA-GRF vs KIA-FGF
|
||||
* split of audited KIA AUM).
|
||||
* @property {boolean} [excludedOverlapsWithReserves] OPTIONAL; when true,
|
||||
* the seeder loads the entry for
|
||||
* documentation but EXCLUDES it
|
||||
* from buffer calculation. Used
|
||||
* for funds whose AUM is already
|
||||
* counted in central-bank FX
|
||||
* reserves (SAFE Investment Co,
|
||||
* HKMA Exchange Fund) to avoid
|
||||
* double-counting against the
|
||||
* reserveAdequacy /
|
||||
* liquidReserveAdequacy dims.
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -52,8 +69,20 @@ const MANIFEST_PATH = resolve(here, './swf-classification-manifest.yaml');
|
||||
* @property {string} displayName human-readable fund name
|
||||
* @property {SwfWikipediaHints} [wikipedia] optional lookup hints for the
|
||||
* Wikipedia fallback scraper
|
||||
* @property {number} [aumUsd] OPTIONAL primary-source AUM in USD.
|
||||
* When present AND `aumVerified === true`,
|
||||
* the seeder uses this value directly
|
||||
* instead of resolving via Wikipedia.
|
||||
* @property {number} [aumYear] OPTIONAL year of the primary-source
|
||||
* AUM disclosure (e.g. 2024).
|
||||
* @property {boolean} [aumVerified] OPTIONAL primary-source-confirmed flag.
|
||||
* When false, the entry is loaded for
|
||||
* documentation but EXCLUDED from buffer
|
||||
* scoring (data-integrity rule).
|
||||
* @property {SwfClassification} classification
|
||||
* @property {{ access: string, liquidity: string, transparency: string }} rationale
|
||||
* @property {{ access: string, liquidity: string, transparency: string,
|
||||
* [aum_pct_of_audited]: string,
|
||||
* [excluded_overlaps_with_reserves]: string }} rationale
|
||||
* @property {string[]} sources
|
||||
*/
|
||||
|
||||
@@ -93,7 +122,35 @@ function validateClassification(cls, path) {
|
||||
assertZeroToOne(c.access, `${path}.access`);
|
||||
assertZeroToOne(c.liquidity, `${path}.liquidity`);
|
||||
assertZeroToOne(c.transparency, `${path}.transparency`);
|
||||
return { access: c.access, liquidity: c.liquidity, transparency: c.transparency };
|
||||
|
||||
// OPTIONAL: aum_pct_of_audited multiplier (KIA-GRF/FGF split case).
|
||||
let aumPctOfAudited;
|
||||
if (c.aum_pct_of_audited != null) {
|
||||
if (typeof c.aum_pct_of_audited !== 'number'
|
||||
|| Number.isNaN(c.aum_pct_of_audited)
|
||||
|| c.aum_pct_of_audited <= 0
|
||||
|| c.aum_pct_of_audited > 1) {
|
||||
fail(`${path}.aum_pct_of_audited: expected number in (0, 1], got ${JSON.stringify(c.aum_pct_of_audited)}`);
|
||||
}
|
||||
aumPctOfAudited = c.aum_pct_of_audited;
|
||||
}
|
||||
|
||||
// OPTIONAL: excluded_overlaps_with_reserves flag (SAFE-IC / HKMA case).
|
||||
let excludedOverlapsWithReserves;
|
||||
if (c.excluded_overlaps_with_reserves != null) {
|
||||
if (typeof c.excluded_overlaps_with_reserves !== 'boolean') {
|
||||
fail(`${path}.excluded_overlaps_with_reserves: expected boolean, got ${JSON.stringify(c.excluded_overlaps_with_reserves)}`);
|
||||
}
|
||||
excludedOverlapsWithReserves = c.excluded_overlaps_with_reserves;
|
||||
}
|
||||
|
||||
return {
|
||||
access: c.access,
|
||||
liquidity: c.liquidity,
|
||||
transparency: c.transparency,
|
||||
...(aumPctOfAudited != null ? { aumPctOfAudited } : {}),
|
||||
...(excludedOverlapsWithReserves != null ? { excludedOverlapsWithReserves } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
function validateRationale(rat, path) {
|
||||
@@ -102,7 +159,19 @@ function validateRationale(rat, path) {
|
||||
assertNonEmptyString(r.access, `${path}.access`);
|
||||
assertNonEmptyString(r.liquidity, `${path}.liquidity`);
|
||||
assertNonEmptyString(r.transparency, `${path}.transparency`);
|
||||
return { access: r.access, liquidity: r.liquidity, transparency: r.transparency };
|
||||
// Optional rationale paragraphs for the new schema fields. Required
|
||||
// ONLY when the corresponding classification field is present (paired
|
||||
// with a rationale in validateFundEntry).
|
||||
const out = { access: r.access, liquidity: r.liquidity, transparency: r.transparency };
|
||||
if (r.aum_pct_of_audited != null) {
|
||||
assertNonEmptyString(r.aum_pct_of_audited, `${path}.aum_pct_of_audited`);
|
||||
out.aumPctOfAudited = r.aum_pct_of_audited;
|
||||
}
|
||||
if (r.excluded_overlaps_with_reserves != null) {
|
||||
assertNonEmptyString(r.excluded_overlaps_with_reserves, `${path}.excluded_overlaps_with_reserves`);
|
||||
out.excludedOverlapsWithReserves = r.excluded_overlaps_with_reserves;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function validateSources(sources, path) {
|
||||
@@ -154,6 +223,19 @@ function validateFundEntry(raw, idx, seenFundKeys) {
|
||||
if (!raw || typeof raw !== 'object') fail(`${path}: expected object`);
|
||||
const f = /** @type {Record<string, unknown>} */ (raw);
|
||||
|
||||
// Misplacement gate. `aum_pct_of_audited` and
|
||||
// `excluded_overlaps_with_reserves` are CLASSIFICATION fields.
|
||||
// If they appear at the top level of a fund entry, the loader
|
||||
// rejects with a clear error rather than silently accepting the
|
||||
// misplaced field (which would be ignored by the schema and
|
||||
// produce wrong scoring). Codex Round 1 #4.
|
||||
if (f.aum_pct_of_audited !== undefined) {
|
||||
fail(`${path}: aum_pct_of_audited must be placed under classification:, not top-level`);
|
||||
}
|
||||
if (f.excluded_overlaps_with_reserves !== undefined) {
|
||||
fail(`${path}: excluded_overlaps_with_reserves must be placed under classification:, not top-level`);
|
||||
}
|
||||
|
||||
assertIso2(f.country, `${path}.country`);
|
||||
assertNonEmptyString(f.fund, `${path}.fund`);
|
||||
assertNonEmptyString(f.display_name, `${path}.display_name`);
|
||||
@@ -162,16 +244,59 @@ function validateFundEntry(raw, idx, seenFundKeys) {
|
||||
if (seenFundKeys.has(dedupeKey)) fail(`${path}: duplicate fund identifier ${dedupeKey}`);
|
||||
seenFundKeys.add(dedupeKey);
|
||||
|
||||
// OPTIONAL primary-source AUM fields. When `aum_verified === true`
|
||||
// AND `aum_usd` present, the seeder uses these directly without
|
||||
// querying Wikipedia. When `aum_verified === false`, the entry
|
||||
// is loaded for documentation but EXCLUDED from buffer scoring
|
||||
// (data-integrity rule from plan §Phase 1A).
|
||||
let aumUsd;
|
||||
if (f.aum_usd != null) {
|
||||
if (typeof f.aum_usd !== 'number' || !Number.isFinite(f.aum_usd) || f.aum_usd <= 0) {
|
||||
fail(`${path}.aum_usd: expected positive finite number, got ${JSON.stringify(f.aum_usd)}`);
|
||||
}
|
||||
aumUsd = f.aum_usd;
|
||||
}
|
||||
let aumYear;
|
||||
if (f.aum_year != null) {
|
||||
if (typeof f.aum_year !== 'number' || !Number.isInteger(f.aum_year) || f.aum_year < 2000 || f.aum_year > 2100) {
|
||||
fail(`${path}.aum_year: expected integer year in [2000, 2100], got ${JSON.stringify(f.aum_year)}`);
|
||||
}
|
||||
aumYear = f.aum_year;
|
||||
}
|
||||
let aumVerified;
|
||||
if (f.aum_verified != null) {
|
||||
if (typeof f.aum_verified !== 'boolean') {
|
||||
fail(`${path}.aum_verified: expected boolean, got ${JSON.stringify(f.aum_verified)}`);
|
||||
}
|
||||
aumVerified = f.aum_verified;
|
||||
}
|
||||
// Coherence: if aum_verified === true, both aum_usd and aum_year MUST be present.
|
||||
// (A "verified" entry without an actual value is meaningless.)
|
||||
if (aumVerified === true && (aumUsd == null || aumYear == null)) {
|
||||
fail(`${path}: aum_verified=true requires both aum_usd and aum_year to be present`);
|
||||
}
|
||||
|
||||
const classification = validateClassification(f.classification, `${path}.classification`);
|
||||
const rationale = validateRationale(f.rationale, `${path}.rationale`);
|
||||
const sources = validateSources(f.sources, `${path}.sources`);
|
||||
const wikipedia = validateWikipediaHints(f.wikipedia, `${path}.wikipedia`);
|
||||
|
||||
// Coherence: rationale MUST cover any classification field that is set.
|
||||
if (classification.aumPctOfAudited != null && rationale.aumPctOfAudited == null) {
|
||||
fail(`${path}.rationale.aum_pct_of_audited: required when classification.aum_pct_of_audited is set`);
|
||||
}
|
||||
if (classification.excludedOverlapsWithReserves === true && rationale.excludedOverlapsWithReserves == null) {
|
||||
fail(`${path}.rationale.excluded_overlaps_with_reserves: required when classification.excluded_overlaps_with_reserves is true`);
|
||||
}
|
||||
|
||||
return {
|
||||
country: f.country,
|
||||
fund: f.fund,
|
||||
displayName: f.display_name,
|
||||
...(wikipedia ? { wikipedia } : {}),
|
||||
...(aumUsd != null ? { aumUsd } : {}),
|
||||
...(aumYear != null ? { aumYear } : {}),
|
||||
...(aumVerified != null ? { aumVerified } : {}),
|
||||
classification,
|
||||
rationale,
|
||||
sources,
|
||||
|
||||
537
scripts/sweep-topic-thresholds.mjs
Normal file
@@ -0,0 +1,537 @@
|
||||
#!/usr/bin/env node
|
||||
// Offline threshold sweep for the brief topic-grouping pass.
|
||||
//
|
||||
// Reads the per-tick replay log captured by writeReplayLog (opt-in via
|
||||
// DIGEST_DEDUP_REPLAY_LOG=1, key prefix `digest:replay-log:v1:`),
|
||||
// reconstructs each tick's reps + cached embeddings, re-runs
|
||||
// groupTopicsPostDedup at multiple cosine thresholds, and scores the
|
||||
// resulting topic assignments against the labeled adjacency pairs in
|
||||
// scripts/data/brief-adjacency-pairs.json.
|
||||
//
|
||||
// "Are we getting better" output: a markdown table — one row per
|
||||
// candidate threshold — with pair_recall, false_adjacency, topic_count,
|
||||
// avg_topic_size, and a composite quality_score. Pick the row with the
|
||||
// highest quality_score; flip DIGEST_DEDUP_TOPIC_THRESHOLD on Railway
|
||||
// to that value.
|
||||
//
|
||||
// Usage:
|
||||
// node --import tsx/esm scripts/sweep-topic-thresholds.mjs # today, full:en:all
|
||||
// node --import tsx/esm scripts/sweep-topic-thresholds.mjs --date 2026-04-24 # specific date
|
||||
// node --import tsx/esm scripts/sweep-topic-thresholds.mjs --rule full:en:critical # specific rule
|
||||
// node --import tsx/esm scripts/sweep-topic-thresholds.mjs --thresholds 0.30,0.35,0.40 # custom sweep
|
||||
// node --import tsx/esm scripts/sweep-topic-thresholds.mjs --json > sweep-result.json # machine-readable
|
||||
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { loadEnvFile, getRedisCredentials } from './_seed-utils.mjs';
|
||||
import { singleLinkCluster } from './lib/brief-dedup-embed.mjs';
|
||||
import { normalizeForEmbedding } from './lib/brief-embedding.mjs';
|
||||
|
||||
loadEnvFile(import.meta.url);
|
||||
|
||||
// ── CLI args ───────────────────────────────────────────────────────────
|
||||
|
||||
// Resolve floor + cap + topN from production env, falling back to
|
||||
// documented defaults. CLI flags override env. The replay log's
|
||||
// tickConfig does not currently capture these (see PR #3390 follow-up
|
||||
// to add scoreFloor/topN/maxStoriesPerUser to the writer's record);
|
||||
// until then, env is the most-faithful source.
|
||||
const SCORE_FLOOR_DEFAULT = 63; // matches production DIGEST_SCORE_MIN
|
||||
const TOP_N_DEFAULT = 30; // matches production DIGEST_MAX_ITEMS
|
||||
// Default 12 — matches production MAX_STORIES_PER_USER. PR #3389 kept
|
||||
// the historical default after sweep evidence showed cap=16 hurts
|
||||
// visible_quality at threshold 0.45. Override locally with
|
||||
// DIGEST_MAX_STORIES_PER_USER env var or `--cap N` flag.
|
||||
const MAX_STORIES_DEFAULT = 12;
|
||||
|
||||
function envInt(name, fallback) {
|
||||
const raw = process.env[name];
|
||||
if (raw == null || raw === '') return fallback;
|
||||
const n = Number.parseInt(raw, 10);
|
||||
return Number.isFinite(n) && n > 0 ? n : fallback;
|
||||
}
|
||||
|
||||
function parseArgs(argv) {
|
||||
const out = {
|
||||
date: new Date().toISOString().slice(0, 10),
|
||||
rule: 'full:en:all',
|
||||
thresholds: [0.30, 0.32, 0.35, 0.38, 0.40, 0.42, 0.45],
|
||||
scoreFloor: envInt('DIGEST_SCORE_MIN', SCORE_FLOOR_DEFAULT),
|
||||
topN: envInt('DIGEST_MAX_ITEMS', TOP_N_DEFAULT),
|
||||
maxStoriesPerUser: envInt('DIGEST_MAX_STORIES_PER_USER', MAX_STORIES_DEFAULT),
|
||||
json: false,
|
||||
};
|
||||
for (let i = 2; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
if (a === '--date') out.date = argv[++i];
|
||||
else if (a === '--rule') out.rule = argv[++i];
|
||||
else if (a === '--thresholds') {
|
||||
out.thresholds = argv[++i].split(',').map((x) => Number(x.trim())).filter(Number.isFinite);
|
||||
} else if (a === '--score-floor') out.scoreFloor = Number(argv[++i]);
|
||||
else if (a === '--top-n') out.topN = Number(argv[++i]);
|
||||
else if (a === '--max-stories' || a === '--cap') out.maxStoriesPerUser = Number(argv[++i]);
|
||||
else if (a === '--json') out.json = true;
|
||||
else if (a === '--help' || a === '-h') {
|
||||
console.log(readFileSync(fileURLToPath(import.meta.url), 'utf8').split('\n').slice(0, 23).join('\n'));
|
||||
process.exit(0);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ── Redis helpers ───────────────────────────────────────────────────────
|
||||
|
||||
const REPLAY_KEY_PREFIX = 'digest:replay-log:v1';
|
||||
|
||||
async function redisLrangeAll(url, token, key) {
|
||||
// Pull entire list. Page size 1000 to keep individual responses bounded.
|
||||
const out = [];
|
||||
const PAGE = 1000;
|
||||
let start = 0;
|
||||
while (true) {
|
||||
const stop = start + PAGE - 1;
|
||||
const res = await fetch(`${url}/lrange/${encodeURIComponent(key)}/${start}/${stop}`, {
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`LRANGE failed: HTTP ${res.status} ${await res.text()}`);
|
||||
}
|
||||
const body = await res.json();
|
||||
const items = Array.isArray(body?.result) ? body.result : [];
|
||||
out.push(...items);
|
||||
if (items.length < PAGE) break;
|
||||
start += PAGE;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
async function redisMget(url, token, keys) {
|
||||
// Upstash MGET via REST. Returns array same length as keys; null for missing.
|
||||
if (keys.length === 0) return [];
|
||||
const path = keys.map((k) => encodeURIComponent(k)).join('/');
|
||||
const res = await fetch(`${url}/mget/${path}`, {
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
});
|
||||
if (!res.ok) {
|
||||
throw new Error(`MGET failed: HTTP ${res.status} ${await res.text()}`);
|
||||
}
|
||||
const body = await res.json();
|
||||
return Array.isArray(body?.result) ? body.result : new Array(keys.length).fill(null);
|
||||
}
|
||||
|
||||
// ── Replay record helpers ───────────────────────────────────────────────
|
||||
|
||||
function parseReplayRecords(rawList) {
|
||||
const recs = [];
|
||||
for (const raw of rawList) {
|
||||
if (typeof raw !== 'string') continue;
|
||||
try {
|
||||
const r = JSON.parse(raw);
|
||||
if (r && typeof r === 'object' && r.briefTickId) recs.push(r);
|
||||
} catch { /* swallow malformed entries */ }
|
||||
}
|
||||
return recs;
|
||||
}
|
||||
|
||||
function groupByTick(records) {
|
||||
const ticks = new Map();
|
||||
for (const r of records) {
|
||||
if (!ticks.has(r.briefTickId)) ticks.set(r.briefTickId, []);
|
||||
ticks.get(r.briefTickId).push(r);
|
||||
}
|
||||
return ticks;
|
||||
}
|
||||
|
||||
// ── Pair labels ─────────────────────────────────────────────────────────
|
||||
|
||||
function loadLabeledPairs() {
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const path = resolve(__dirname, 'data', 'brief-adjacency-pairs.json');
|
||||
const raw = JSON.parse(readFileSync(path, 'utf8'));
|
||||
return Array.isArray(raw?.pairs) ? raw.pairs : [];
|
||||
}
|
||||
|
||||
// Apply normalizeForEmbedding to each label so titles match what was
|
||||
// actually embedded in the replay log.
|
||||
function indexLabelsByNormalizedTitle(pairs) {
|
||||
const out = [];
|
||||
for (const p of pairs) {
|
||||
if (!p.title_a || !p.title_b) continue;
|
||||
out.push({
|
||||
a: normalizeForEmbedding(p.title_a),
|
||||
b: normalizeForEmbedding(p.title_b),
|
||||
expected: p.expected,
|
||||
rationale: p.rationale,
|
||||
source_brief: p.source_brief,
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// ── Threshold scoring ───────────────────────────────────────────────────
|
||||
|
||||
// Mirror the production slice: groupTopicsPostDedup runs on the
|
||||
// top-DIGEST_MAX_ITEMS reps by score, NOT the full deduped set.
|
||||
// scripts/seed-digest-notifications.mjs:479 — `deduped.slice(0, 30)`.
|
||||
const MIN_SURVIVING_REPS = 5; // skip ticks with fewer hydrated reps
|
||||
|
||||
function scoreOneTick({ reps, embeddingByHash, labels, thresholds, scoreFloor, topN, maxStoriesPerUser, missingEmbedReporter }) {
|
||||
// Apply production-equivalent floor + slice so the sweep reflects
|
||||
// what topic-grouping actually sees in prod, not the 800-rep raw pool.
|
||||
const floored = reps.filter((r) => Number(r.currentScore ?? 0) >= scoreFloor);
|
||||
const slicedReplay = [...floored]
|
||||
.sort((a, b) => Number(b.currentScore ?? 0) - Number(a.currentScore ?? 0))
|
||||
.slice(0, topN);
|
||||
if (slicedReplay.length <= 1) {
|
||||
return thresholds.map((t) => ({ threshold: t, topic_count: slicedReplay.length, sizes: [], pair_results: [], pair_results_visible: [] }));
|
||||
}
|
||||
|
||||
// Remap replay-record shape (storyHash, normalizedTitle, …) to the
|
||||
// shape brief-dedup expects (hash, title, currentScore). Filter out
|
||||
// reps whose embedding is missing from the cache (transient eviction
|
||||
// or a rep written before the cache was populated). Skip the tick
|
||||
// entirely if too few reps survive.
|
||||
const remapped = slicedReplay.map((r) => ({
|
||||
hash: r.storyHash,
|
||||
title: r.normalizedTitle,
|
||||
currentScore: r.currentScore,
|
||||
}));
|
||||
const survivors = remapped.filter((r) => Array.isArray(embeddingByHash.get(r.hash)));
|
||||
const dropped = remapped.length - survivors.length;
|
||||
if (dropped > 0 && missingEmbedReporter) missingEmbedReporter(dropped);
|
||||
if (survivors.length < MIN_SURVIVING_REPS) return null;
|
||||
const sliced = survivors;
|
||||
|
||||
const out = [];
|
||||
for (const threshold of thresholds) {
|
||||
// Run the same single-link cluster groupTopicsPostDedup uses
|
||||
// internally. We compute the partition directly so the
|
||||
// topic-membership labels are byte-identical to what production
|
||||
// would produce at this threshold (no leader-only approximation).
|
||||
const items = sliced.map((r) => ({
|
||||
title: r.title,
|
||||
embedding: embeddingByHash.get(r.hash),
|
||||
}));
|
||||
const { clusters } = singleLinkCluster(items, { cosineThreshold: threshold, vetoFn: null });
|
||||
|
||||
// Map sliced index → topicId
|
||||
const topicOfIdx = new Array(sliced.length).fill(-1);
|
||||
clusters.forEach((members, tIdx) => {
|
||||
for (const i of members) topicOfIdx[i] = tIdx;
|
||||
});
|
||||
|
||||
// Title → topic membership for label scoring
|
||||
const titleToTopic = new Map();
|
||||
for (let i = 0; i < sliced.length; i++) titleToTopic.set(sliced[i].title, topicOfIdx[i]);
|
||||
|
||||
const topicCount = clusters.length;
|
||||
const sizes = clusters.map((c) => c.length);
|
||||
// singleLinkCluster IS the partition algorithm groupTopicsPostDedup
|
||||
// uses internally (scripts/lib/brief-dedup.mjs:336 — clusterFn
|
||||
// defaults to singleLinkCluster). No second pass needed; we get
|
||||
// the same partition production would compute, faithfully.
|
||||
|
||||
// Reproduce groupTopicsPostDedup's ordering so we can answer the
|
||||
// cap-related question: which members survive the post-cluster
|
||||
// top-N truncation? Order = topics by (size DESC, max-score DESC),
|
||||
// members within a topic by (score DESC). Tiebreaks are
|
||||
// deterministic by input order — close enough for evaluation.
|
||||
const topicMaxScore = clusters.map((members) =>
|
||||
Math.max(...members.map((i) => Number(sliced[i].currentScore ?? 0))),
|
||||
);
|
||||
const topicOrder = [...clusters.keys()].sort((a, b) => {
|
||||
if (sizes[a] !== sizes[b]) return sizes[b] - sizes[a];
|
||||
return topicMaxScore[b] - topicMaxScore[a];
|
||||
});
|
||||
const orderedIdx = [];
|
||||
for (const tIdx of topicOrder) {
|
||||
const members = [...clusters[tIdx]].sort(
|
||||
(a, b) => Number(sliced[b].currentScore ?? 0) - Number(sliced[a].currentScore ?? 0),
|
||||
);
|
||||
orderedIdx.push(...members);
|
||||
}
|
||||
const visibleIdxSet = new Set(orderedIdx.slice(0, maxStoriesPerUser));
|
||||
// Title → sliced index, for visibility lookup
|
||||
const titleToIdx = new Map();
|
||||
for (let i = 0; i < sliced.length; i++) titleToIdx.set(sliced[i].title, i);
|
||||
|
||||
const pair_results = [];
|
||||
const pair_results_visible = [];
|
||||
for (const lab of labels) {
|
||||
const tA = titleToTopic.get(lab.a);
|
||||
const tB = titleToTopic.get(lab.b);
|
||||
if (tA == null || tB == null) continue; // pair not present in this tick
|
||||
const clustered = tA === tB;
|
||||
pair_results.push({ expected: lab.expected, clustered });
|
||||
|
||||
// Visible-window evaluation: did BOTH labeled stories survive
|
||||
// the post-cluster top-N truncation? This is what users actually
|
||||
// see. Drives the cap-bump validation question (PR #3389):
|
||||
// does bumping cap=12 → 16 cause more cluster-pairs to land
|
||||
// visibly adjacent?
|
||||
const iA = titleToIdx.get(lab.a);
|
||||
const iB = titleToIdx.get(lab.b);
|
||||
if (visibleIdxSet.has(iA) && visibleIdxSet.has(iB)) {
|
||||
pair_results_visible.push({ expected: lab.expected, clustered });
|
||||
}
|
||||
}
|
||||
|
||||
out.push({
|
||||
threshold,
|
||||
topic_count: topicCount,
|
||||
sizes: [...sizes].sort((a, b) => b - a),
|
||||
pair_results,
|
||||
pair_results_visible,
|
||||
visible_count: Math.min(orderedIdx.length, maxStoriesPerUser),
|
||||
});
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
// ── Aggregation across ticks ────────────────────────────────────────────
|
||||
|
||||
function aggregateByThreshold(perTickRows, thresholds) {
|
||||
const summary = new Map();
|
||||
for (const t of thresholds) summary.set(t, {
|
||||
threshold: t,
|
||||
ticks: 0,
|
||||
avg_topic_count: 0,
|
||||
avg_max_topic_size: 0,
|
||||
avg_visible_count: 0,
|
||||
multi_member_topic_share: 0,
|
||||
pair_recall_cluster: 0, // partition-only (whole tick)
|
||||
false_adjacency: 0, // partition-only (whole tick)
|
||||
pair_recall_visible: 0, // both members visible AND clustered
|
||||
false_adjacency_visible: 0, // both members visible AND clustered (separate-labeled)
|
||||
quality_score: 0,
|
||||
visible_quality_score: 0,
|
||||
samples: 0,
|
||||
visible_samples: 0,
|
||||
});
|
||||
for (const tickRows of perTickRows) {
|
||||
if (!tickRows) continue;
|
||||
for (const row of tickRows) {
|
||||
const s = summary.get(row.threshold);
|
||||
if (!s) continue;
|
||||
s.ticks += 1;
|
||||
s.avg_topic_count += row.topic_count;
|
||||
s.avg_max_topic_size += row.sizes[0] ?? 0;
|
||||
s.avg_visible_count += row.visible_count ?? 0;
|
||||
const multiMember = row.sizes.filter((x) => x > 1).length;
|
||||
s.multi_member_topic_share += row.topic_count > 0 ? multiMember / row.topic_count : 0;
|
||||
for (const p of row.pair_results) {
|
||||
if (p.expected === 'cluster') {
|
||||
s.pair_recall_cluster += p.clustered ? 1 : 0;
|
||||
s._cluster_total = (s._cluster_total ?? 0) + 1;
|
||||
} else {
|
||||
s.false_adjacency += p.clustered ? 1 : 0;
|
||||
s._separate_total = (s._separate_total ?? 0) + 1;
|
||||
}
|
||||
s.samples += 1;
|
||||
}
|
||||
for (const p of (row.pair_results_visible ?? [])) {
|
||||
if (p.expected === 'cluster') {
|
||||
s.pair_recall_visible += p.clustered ? 1 : 0;
|
||||
s._cluster_total_visible = (s._cluster_total_visible ?? 0) + 1;
|
||||
} else {
|
||||
s.false_adjacency_visible += p.clustered ? 1 : 0;
|
||||
s._separate_total_visible = (s._separate_total_visible ?? 0) + 1;
|
||||
}
|
||||
s.visible_samples += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (const s of summary.values()) {
|
||||
if (s.ticks === 0) continue;
|
||||
s.avg_topic_count /= s.ticks;
|
||||
s.avg_max_topic_size /= s.ticks;
|
||||
s.avg_visible_count /= s.ticks;
|
||||
s.multi_member_topic_share /= s.ticks;
|
||||
s.pair_recall_cluster = (s._cluster_total ?? 0) > 0 ? s.pair_recall_cluster / s._cluster_total : 0;
|
||||
s.false_adjacency = (s._separate_total ?? 0) > 0 ? s.false_adjacency / s._separate_total : 0;
|
||||
s.pair_recall_visible = (s._cluster_total_visible ?? 0) > 0 ? s.pair_recall_visible / s._cluster_total_visible : 0;
|
||||
s.false_adjacency_visible = (s._separate_total_visible ?? 0) > 0 ? s.false_adjacency_visible / s._separate_total_visible : 0;
|
||||
// Composite: weight visible recall (what users actually see),
|
||||
// penalise visible false adjacency, small bonus for multi-member
|
||||
// share. The visible variant is the deployment metric — it answers
|
||||
// "does this config produce a better brief?" rather than "does it
|
||||
// produce a better partition?"
|
||||
s.quality_score = (
|
||||
s.pair_recall_cluster * 0.6
|
||||
+ (1 - s.false_adjacency) * 0.3
|
||||
+ s.multi_member_topic_share * 0.1
|
||||
);
|
||||
s.visible_quality_score = (
|
||||
s.pair_recall_visible * 0.6
|
||||
+ (1 - s.false_adjacency_visible) * 0.3
|
||||
+ s.multi_member_topic_share * 0.1
|
||||
);
|
||||
delete s._cluster_total;
|
||||
delete s._separate_total;
|
||||
delete s._cluster_total_visible;
|
||||
delete s._separate_total_visible;
|
||||
}
|
||||
return [...summary.values()].sort((a, b) => a.threshold - b.threshold);
|
||||
}
|
||||
|
||||
// ── Output formatters ───────────────────────────────────────────────────
|
||||
|
||||
function renderMarkdownTable(rows, ctx) {
|
||||
const lines = [];
|
||||
lines.push(`# Brief topic-threshold sweep — ${ctx.rule} on ${ctx.date}`);
|
||||
lines.push('');
|
||||
lines.push(`Replay records: ${ctx.recordCount}, ticks: ${ctx.tickCount}, evaluable ticks: ${ctx.evaluableTicks}`);
|
||||
lines.push(`Labeled pairs loaded: ${ctx.labelCount} (${ctx.clusterLabels} cluster, ${ctx.separateLabels} separate)`);
|
||||
lines.push(`Production-equivalent slice: scoreFloor=${ctx.scoreFloor}, topN=${ctx.topN}, maxStoriesPerUser (cap)=${ctx.maxStoriesPerUser}`);
|
||||
if (ctx.missingEmbedDrops > 0) {
|
||||
lines.push(`Reps dropped due to missing cached embeddings: ${ctx.missingEmbedDrops} (across all ticks)`);
|
||||
}
|
||||
lines.push('');
|
||||
lines.push('Visible-window metrics measure what ends up in the user-visible top-N brief AFTER cap-truncation.');
|
||||
lines.push('Partition metrics measure cluster correctness ignoring the cap.');
|
||||
lines.push('');
|
||||
lines.push('| threshold | visible_quality | visible_recall | visible_false_adj | partition_quality | partition_recall | partition_false_adj | avg_topics | multi_share | visible_samples / partition_samples |');
|
||||
lines.push('|-----------|-----------------|----------------|-------------------|-------------------|------------------|---------------------|------------|-------------|-------------------------------------|');
|
||||
// Compute the GLOBAL best in a first pass so the ⭐ marker only
|
||||
// tags one row. The previous one-pass approach starred every row
|
||||
// that was the running best at the time it was rendered (Greptile
|
||||
// P1 on PR #3390).
|
||||
let best = null;
|
||||
for (const r of rows) {
|
||||
if (r.ticks === 0) continue;
|
||||
if (best == null || r.visible_quality_score > best.visible_quality_score) best = r;
|
||||
}
|
||||
for (const r of rows) {
|
||||
if (r.ticks === 0) continue;
|
||||
const star = (r === best) ? ' ⭐' : '';
|
||||
lines.push(
|
||||
`| ${r.threshold.toFixed(2)} `
|
||||
+ `| ${r.visible_quality_score.toFixed(3)}${star} `
|
||||
+ `| ${(r.pair_recall_visible * 100).toFixed(1)}% `
|
||||
+ `| ${(r.false_adjacency_visible * 100).toFixed(1)}% `
|
||||
+ `| ${r.quality_score.toFixed(3)} `
|
||||
+ `| ${(r.pair_recall_cluster * 100).toFixed(1)}% `
|
||||
+ `| ${(r.false_adjacency * 100).toFixed(1)}% `
|
||||
+ `| ${r.avg_topic_count.toFixed(1)} `
|
||||
+ `| ${(r.multi_member_topic_share * 100).toFixed(1)}% `
|
||||
+ `| ${r.visible_samples} / ${r.samples} |`,
|
||||
);
|
||||
}
|
||||
if (best) {
|
||||
lines.push('');
|
||||
lines.push(`**Recommended threshold: ${best.threshold.toFixed(2)}** (visible_quality=${best.visible_quality_score.toFixed(3)}, visible_recall=${(best.pair_recall_visible*100).toFixed(1)}%, visible_false_adj=${(best.false_adjacency_visible*100).toFixed(1)}%)`);
|
||||
lines.push('');
|
||||
lines.push(`Apply via Railway env on the **scripts-cron-digest-notifications** service:`);
|
||||
lines.push(` \`DIGEST_DEDUP_TOPIC_THRESHOLD=${best.threshold.toFixed(2)}\``);
|
||||
lines.push('');
|
||||
lines.push('To compare cap values, re-run with `--cap 12` and `--cap 16`. The `visible_*` columns will diverge if cap-truncation is materially affecting topic adjacency.');
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
// ── Main ────────────────────────────────────────────────────────────────
|
||||
|
||||
async function main() {
|
||||
const args = parseArgs(process.argv);
|
||||
const { url, token } = getRedisCredentials();
|
||||
const replayKey = `${REPLAY_KEY_PREFIX}:${args.rule}:${args.date}`;
|
||||
|
||||
const rawList = await redisLrangeAll(url, token, replayKey);
|
||||
const records = parseReplayRecords(rawList);
|
||||
if (records.length === 0) {
|
||||
console.error(`No replay records at ${replayKey}. Is DIGEST_DEDUP_REPLAY_LOG=1 set on Railway?`);
|
||||
process.exit(2);
|
||||
}
|
||||
|
||||
const ticks = groupByTick(records);
|
||||
|
||||
// For each tick: reps = records where isRep===true. Hydrate embeddings
|
||||
// via MGET on embeddingCacheKey.
|
||||
const allCacheKeys = new Set();
|
||||
for (const tickRecs of ticks.values()) {
|
||||
for (const r of tickRecs) {
|
||||
if (r.isRep && r.embeddingCacheKey) allCacheKeys.add(r.embeddingCacheKey);
|
||||
}
|
||||
}
|
||||
const cacheKeyList = [...allCacheKeys];
|
||||
// Chunk MGET to keep URL length sane (Upstash REST has practical caps).
|
||||
const CHUNK = 50;
|
||||
const embeddingByCacheKey = new Map();
|
||||
for (let i = 0; i < cacheKeyList.length; i += CHUNK) {
|
||||
const chunk = cacheKeyList.slice(i, i + CHUNK);
|
||||
const vals = await redisMget(url, token, chunk);
|
||||
for (let j = 0; j < chunk.length; j++) {
|
||||
if (typeof vals[j] !== 'string') continue;
|
||||
try {
|
||||
const vec = JSON.parse(vals[j]);
|
||||
if (Array.isArray(vec) && vec.length > 0) embeddingByCacheKey.set(chunk[j], vec);
|
||||
} catch { /* skip malformed */ }
|
||||
}
|
||||
}
|
||||
|
||||
const labels = indexLabelsByNormalizedTitle(loadLabeledPairs());
|
||||
const clusterLabels = labels.filter((l) => l.expected === 'cluster').length;
|
||||
const separateLabels = labels.length - clusterLabels;
|
||||
|
||||
// Score each tick at all thresholds. Reps with missing embeddings
|
||||
// are filtered inside scoreOneTick (D fix); a tick is skipped only
|
||||
// if too few reps survive (< MIN_SURVIVING_REPS).
|
||||
const perTick = [];
|
||||
let evaluable = 0;
|
||||
let missingEmbedDrops = 0;
|
||||
const reportMissing = (n) => { missingEmbedDrops += n; };
|
||||
for (const tickRecs of ticks.values()) {
|
||||
const reps = tickRecs.filter((r) => r.isRep);
|
||||
if (reps.length === 0) { perTick.push(null); continue; }
|
||||
const embeddingByHash = new Map();
|
||||
for (const r of reps) {
|
||||
const vec = embeddingByCacheKey.get(r.embeddingCacheKey);
|
||||
if (Array.isArray(vec)) embeddingByHash.set(r.storyHash, vec);
|
||||
}
|
||||
const tickRows = scoreOneTick({
|
||||
reps,
|
||||
embeddingByHash,
|
||||
labels,
|
||||
thresholds: args.thresholds,
|
||||
scoreFloor: args.scoreFloor,
|
||||
topN: args.topN,
|
||||
maxStoriesPerUser: args.maxStoriesPerUser,
|
||||
missingEmbedReporter: reportMissing,
|
||||
});
|
||||
if (tickRows) {
|
||||
perTick.push(tickRows);
|
||||
evaluable += 1;
|
||||
} else {
|
||||
perTick.push(null);
|
||||
}
|
||||
}
|
||||
|
||||
const rows = aggregateByThreshold(perTick, args.thresholds);
|
||||
const ctx = {
|
||||
rule: args.rule,
|
||||
date: args.date,
|
||||
recordCount: records.length,
|
||||
tickCount: ticks.size,
|
||||
evaluableTicks: evaluable,
|
||||
labelCount: labels.length,
|
||||
clusterLabels,
|
||||
separateLabels,
|
||||
scoreFloor: args.scoreFloor,
|
||||
topN: args.topN,
|
||||
maxStoriesPerUser: args.maxStoriesPerUser,
|
||||
missingEmbedDrops,
|
||||
};
|
||||
|
||||
if (args.json) {
|
||||
console.log(JSON.stringify({ ctx, rows }, null, 2));
|
||||
} else {
|
||||
console.log(renderMarkdownTable(rows, ctx));
|
||||
}
|
||||
}
|
||||
|
||||
main().catch((err) => {
|
||||
console.error(`sweep-topic-thresholds: ${err?.stack ?? err?.message ?? String(err)}`);
|
||||
process.exit(1);
|
||||
});
|
||||
@@ -14,25 +14,36 @@ if [ "$VERCEL_GIT_COMMIT_REF" = "main" ] && [ -n "$VERCEL_GIT_PREVIOUS_SHA" ]; t
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Skip preview deploys that aren't tied to a pull request
|
||||
[ -z "$VERCEL_GIT_PULL_REQUEST_ID" ] && exit 0
|
||||
|
||||
# Resolve comparison base: prefer `merge-base HEAD origin/main` (the SHA
|
||||
# where this PR branched off main), fall back to VERCEL_GIT_PREVIOUS_SHA.
|
||||
# where this branch left main), fall back to VERCEL_GIT_PREVIOUS_SHA.
|
||||
#
|
||||
# Why this ordering: on a PR branch's FIRST push, Vercel has historically
|
||||
# set VERCEL_GIT_PREVIOUS_SHA to values that make the path-diff come back
|
||||
# empty (the same SHA as HEAD, or a parent that sees no net change),
|
||||
# causing "Canceled by Ignored Build Step" on PRs that genuinely touch
|
||||
# web paths (PR #3346 incident: four web-relevant files changed, skipped
|
||||
# anyway). merge-base is the stable truth: "everything on this PR since
|
||||
# it left main", which is always a superset of any single push and is
|
||||
# what the reviewer actually needs a preview for.
|
||||
# We deliberately do NOT gate on VERCEL_GIT_PULL_REQUEST_ID. Vercel only
|
||||
# populates that var when the deploy is triggered by a fresh PR-aware
|
||||
# webhook event; manual "Redeploy" / "Redeploy without cache" actions
|
||||
# from the dashboard, and some integration edge cases, leave it empty
|
||||
# even on commits that are clearly attached to an open PR. Gating on it
|
||||
# silently cancels legitimate previews (PR #3403 incident: 24d511e on
|
||||
# feat/usage-telemetry, all 5 api/ + server/ files skipped).
|
||||
#
|
||||
# PREVIOUS_SHA stays as the fallback for the rare shallow-clone edge case
|
||||
# where `origin/main` isn't in Vercel's clone and merge-base returns
|
||||
# empty. This is the opposite priority from the main-branch branch above
|
||||
# (line 6), which correctly wants PREVIOUS_SHA = the last deployed commit.
|
||||
# The merge-base diff below is the authoritative "did this branch touch
|
||||
# anything web-relevant" check, and it's strictly stronger than the
|
||||
# PR-ID guard: branches with no web changes still skip via that diff,
|
||||
# branches with web changes build whether or not Vercel happens to know
|
||||
# about a PR association at deploy time.
|
||||
#
|
||||
# Why merge-base is preferred over PREVIOUS_SHA: on a branch's FIRST
|
||||
# push, Vercel has historically set VERCEL_GIT_PREVIOUS_SHA to values
|
||||
# that make the path-diff come back empty (the same SHA as HEAD, or a
|
||||
# parent that sees no net change), causing "Canceled by Ignored Build
|
||||
# Step" on commits that genuinely touch web paths (PR #3346 incident).
|
||||
# merge-base is the stable truth: "everything on this branch since it
|
||||
# left main", which is always a superset of any single push.
|
||||
#
|
||||
# PREVIOUS_SHA stays as the fallback for the rare shallow-clone edge
|
||||
# case where `origin/main` isn't in Vercel's clone and merge-base
|
||||
# returns empty. This is the opposite priority from the main-branch
|
||||
# block above (line 6), which correctly wants PREVIOUS_SHA = the last
|
||||
# deployed commit.
|
||||
COMPARE_SHA=$(git merge-base HEAD origin/main 2>/dev/null)
|
||||
if [ -z "$COMPARE_SHA" ] && [ -n "$VERCEL_GIT_PREVIOUS_SHA" ]; then
|
||||
git cat-file -e "$VERCEL_GIT_PREVIOUS_SHA" 2>/dev/null && COMPARE_SHA="$VERCEL_GIT_PREVIOUS_SHA"
|
||||
|
||||
143
server/__tests__/usage-identity.test.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
/**
|
||||
* Pure-resolver tests for buildUsageIdentity().
|
||||
*
|
||||
* The resolver maps gateway-internal auth state to the four telemetry identity
|
||||
* fields (auth_kind, principal_id, customer_id, tier). It is intentionally
|
||||
* pure — no JWT verification, no key hashing of secrets, no I/O — so the
|
||||
* branch matrix is trivially testable here.
|
||||
*/
|
||||
|
||||
import { describe, expect, test } from 'vitest';
|
||||
|
||||
import { buildUsageIdentity, type UsageIdentityInput } from '../_shared/usage-identity';
|
||||
|
||||
function baseInput(overrides: Partial<UsageIdentityInput> = {}): UsageIdentityInput {
|
||||
return {
|
||||
sessionUserId: null,
|
||||
isUserApiKey: false,
|
||||
enterpriseApiKey: null,
|
||||
widgetKey: null,
|
||||
clerkOrgId: null,
|
||||
userApiKeyCustomerRef: null,
|
||||
tier: null,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe('buildUsageIdentity — auth_kind branches', () => {
|
||||
test('user_api_key takes precedence over every other signal', () => {
|
||||
const ident = buildUsageIdentity(baseInput({
|
||||
isUserApiKey: true,
|
||||
sessionUserId: 'user_123',
|
||||
userApiKeyCustomerRef: 'customer_abc',
|
||||
enterpriseApiKey: 'should-be-ignored',
|
||||
widgetKey: 'should-be-ignored',
|
||||
tier: 2,
|
||||
}));
|
||||
expect(ident.auth_kind).toBe('user_api_key');
|
||||
expect(ident.principal_id).toBe('user_123');
|
||||
expect(ident.customer_id).toBe('customer_abc');
|
||||
expect(ident.tier).toBe(2);
|
||||
});
|
||||
|
||||
test('user_api_key falls back to sessionUserId for customer_id when no explicit ref', () => {
|
||||
const ident = buildUsageIdentity(baseInput({
|
||||
isUserApiKey: true,
|
||||
sessionUserId: 'user_123',
|
||||
tier: 1,
|
||||
}));
|
||||
expect(ident.customer_id).toBe('user_123');
|
||||
});
|
||||
|
||||
test('clerk_jwt: customer_id prefers org over user when org is present', () => {
|
||||
const ident = buildUsageIdentity(baseInput({
|
||||
sessionUserId: 'user_123',
|
||||
clerkOrgId: 'org_acme',
|
||||
tier: 1,
|
||||
}));
|
||||
expect(ident.auth_kind).toBe('clerk_jwt');
|
||||
expect(ident.principal_id).toBe('user_123');
|
||||
expect(ident.customer_id).toBe('org_acme');
|
||||
expect(ident.tier).toBe(1);
|
||||
});
|
||||
|
||||
test('clerk_jwt: customer_id falls back to user when no org', () => {
|
||||
const ident = buildUsageIdentity(baseInput({
|
||||
sessionUserId: 'user_123',
|
||||
}));
|
||||
expect(ident.customer_id).toBe('user_123');
|
||||
expect(ident.tier).toBe(0);
|
||||
});
|
||||
|
||||
test('enterprise_api_key: principal_id is hashed, not raw', () => {
|
||||
const ident = buildUsageIdentity(baseInput({
|
||||
enterpriseApiKey: 'wm_super_secret_key',
|
||||
tier: 3,
|
||||
}));
|
||||
expect(ident.auth_kind).toBe('enterprise_api_key');
|
||||
expect(ident.principal_id).not.toBe('wm_super_secret_key');
|
||||
expect(ident.principal_id).toMatch(/^[0-9a-z]+$/);
|
||||
// Customer is the unmapped sentinel until a real entry is added to ENTERPRISE_KEY_TO_CUSTOMER
|
||||
expect(ident.customer_id).toBe('enterprise-unmapped');
|
||||
expect(ident.tier).toBe(3);
|
||||
});
|
||||
|
||||
test('widget_key: customer_id is the widget key itself, principal_id is hashed', () => {
|
||||
const ident = buildUsageIdentity(baseInput({
|
||||
widgetKey: 'widget_pub_xyz',
|
||||
}));
|
||||
expect(ident.auth_kind).toBe('widget_key');
|
||||
expect(ident.customer_id).toBe('widget_pub_xyz');
|
||||
expect(ident.principal_id).not.toBe('widget_pub_xyz');
|
||||
expect(ident.principal_id).toMatch(/^[0-9a-z]+$/);
|
||||
expect(ident.tier).toBe(0);
|
||||
});
|
||||
|
||||
test('anon: every field null, tier always zero', () => {
|
||||
const ident = buildUsageIdentity(baseInput());
|
||||
expect(ident.auth_kind).toBe('anon');
|
||||
expect(ident.principal_id).toBeNull();
|
||||
expect(ident.customer_id).toBeNull();
|
||||
expect(ident.tier).toBe(0);
|
||||
});
|
||||
|
||||
test('anon: tier coerces to 0 even if input.tier was set (defensive)', () => {
|
||||
// No identity signal but a leftover tier value should not show up as a mystery free row.
|
||||
const ident = buildUsageIdentity(baseInput({ tier: 99 }));
|
||||
expect(ident.tier).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildUsageIdentity — tier handling', () => {
|
||||
test('null tier coerces to 0 for non-anon kinds', () => {
|
||||
const ident = buildUsageIdentity(baseInput({ sessionUserId: 'u', tier: null }));
|
||||
expect(ident.tier).toBe(0);
|
||||
});
|
||||
|
||||
test('zero tier is preserved (not promoted)', () => {
|
||||
const ident = buildUsageIdentity(baseInput({ sessionUserId: 'u', tier: 0 }));
|
||||
expect(ident.tier).toBe(0);
|
||||
});
|
||||
|
||||
test('integer tiers pass through unchanged', () => {
|
||||
for (const t of [0, 1, 2, 3]) {
|
||||
const ident = buildUsageIdentity(baseInput({ sessionUserId: 'u', tier: t }));
|
||||
expect(ident.tier).toBe(t);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildUsageIdentity — secret handling', () => {
|
||||
test('enterprise key never appears verbatim in any output field', () => {
|
||||
const secret = 'wm_ent_LEAKY_VALUE_DO_NOT_LOG';
|
||||
const ident = buildUsageIdentity(baseInput({ enterpriseApiKey: secret }));
|
||||
expect(JSON.stringify(ident)).not.toContain(secret);
|
||||
});
|
||||
|
||||
test('widget key appears as customer_id (intentional — widget keys are public)', () => {
|
||||
// Widget keys are embeds installed on third-party sites; treating them as
|
||||
// customer attribution is the contract documented in usage-identity.ts:73-79.
|
||||
const ident = buildUsageIdentity(baseInput({ widgetKey: 'widget_public_xyz' }));
|
||||
expect(ident.customer_id).toBe('widget_public_xyz');
|
||||
});
|
||||
});
|
||||
@@ -15,13 +15,18 @@
|
||||
import { jwtVerify } from 'jose';
|
||||
import { getClerkJwtVerifyOptions, getJWKS } from '../auth-session';
|
||||
|
||||
export interface ClerkSession {
|
||||
userId: string;
|
||||
orgId: string | null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts and verifies a bearer token from the request.
|
||||
* Returns the userId (sub claim) on success, null on any failure.
|
||||
* Returns { userId, orgId } on success, null on any failure.
|
||||
*
|
||||
* Fail-open: errors are logged but never thrown.
|
||||
*/
|
||||
export async function resolveSessionUserId(request: Request): Promise<string | null> {
|
||||
export async function resolveClerkSession(request: Request): Promise<ClerkSession | null> {
|
||||
try {
|
||||
const authHeader = request.headers.get('Authorization');
|
||||
if (!authHeader?.startsWith('Bearer ')) return null;
|
||||
@@ -38,7 +43,19 @@ export async function resolveSessionUserId(request: Request): Promise<string | n
|
||||
issuer: issuerDomain,
|
||||
});
|
||||
|
||||
return (payload.sub as string) ?? null;
|
||||
const userId = (payload.sub as string) ?? null;
|
||||
if (!userId) return null;
|
||||
|
||||
const orgClaim = (payload as Record<string, unknown>).org as
|
||||
| Record<string, unknown>
|
||||
| undefined;
|
||||
const orgId =
|
||||
(typeof orgClaim?.id === 'string' ? orgClaim.id : null) ??
|
||||
(typeof (payload as Record<string, unknown>).org_id === 'string'
|
||||
? ((payload as Record<string, unknown>).org_id as string)
|
||||
: null);
|
||||
|
||||
return { userId, orgId };
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
'[auth-session] JWT verification failed:',
|
||||
@@ -47,3 +64,11 @@ export async function resolveSessionUserId(request: Request): Promise<string | n
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Back-compat wrapper. Prefer resolveClerkSession() for new callers.
|
||||
*/
|
||||
export async function resolveSessionUserId(request: Request): Promise<string | null> {
|
||||
const session = await resolveClerkSession(request);
|
||||
return session?.userId ?? null;
|
||||
}
|
||||
|
||||
@@ -113,7 +113,16 @@ function isFiniteNumber(v) {
|
||||
const ALLOWED_ENVELOPE_KEYS = new Set(['version', 'issuedAt', 'data']);
|
||||
const ALLOWED_DATA_KEYS = new Set(['user', 'issue', 'date', 'dateLong', 'digest', 'stories']);
|
||||
const ALLOWED_USER_KEYS = new Set(['name', 'tz']);
|
||||
const ALLOWED_DIGEST_KEYS = new Set(['greeting', 'lead', 'numbers', 'threads', 'signals']);
|
||||
// publicLead / publicSignals / publicThreads: optional v3+ fields.
|
||||
// Hold non-personalised content the public-share renderer uses in
|
||||
// place of the personalised lead/signals/threads. v2 envelopes (no
|
||||
// publicLead) still pass — the validator's optional-key pattern is
|
||||
// "in the allow list, but isString/array check is skipped when
|
||||
// undefined" (see validateBriefDigest below).
|
||||
const ALLOWED_DIGEST_KEYS = new Set([
|
||||
'greeting', 'lead', 'numbers', 'threads', 'signals',
|
||||
'publicLead', 'publicSignals', 'publicThreads',
|
||||
]);
|
||||
const ALLOWED_NUMBERS_KEYS = new Set(['clusters', 'multiSource', 'surfaced']);
|
||||
const ALLOWED_THREAD_KEYS = new Set(['tag', 'teaser']);
|
||||
const ALLOWED_STORY_KEYS = new Set([
|
||||
@@ -243,6 +252,38 @@ export function assertBriefEnvelope(envelope) {
|
||||
assertNoExtraKeys(digest, ALLOWED_DIGEST_KEYS, 'envelope.data.digest');
|
||||
if (!isNonEmptyString(digest.greeting)) throw new Error('envelope.data.digest.greeting must be a non-empty string');
|
||||
if (!isNonEmptyString(digest.lead)) throw new Error('envelope.data.digest.lead must be a non-empty string');
|
||||
// publicLead: optional v3+ field. When present, MUST be a non-empty
|
||||
// string (typed contract enforcement); when absent, the renderer's
|
||||
// public-mode lead block omits the pull-quote entirely (per the
|
||||
// "never fall back to personalised lead" rule).
|
||||
if (digest.publicLead !== undefined && !isNonEmptyString(digest.publicLead)) {
|
||||
throw new Error('envelope.data.digest.publicLead, when present, must be a non-empty string');
|
||||
}
|
||||
// publicSignals + publicThreads: optional v3+. When present, MUST
|
||||
// match the signals/threads contracts (array of non-empty strings,
|
||||
// array of {tag, teaser}). Absent siblings are OK — public render
|
||||
// path falls back to "omit signals page" / "category-derived
|
||||
// threads stub" rather than serving the personalised version.
|
||||
if (digest.publicSignals !== undefined) {
|
||||
if (!Array.isArray(digest.publicSignals)) {
|
||||
throw new Error('envelope.data.digest.publicSignals, when present, must be an array');
|
||||
}
|
||||
digest.publicSignals.forEach((s, i) => {
|
||||
if (!isNonEmptyString(s)) throw new Error(`envelope.data.digest.publicSignals[${i}] must be a non-empty string`);
|
||||
});
|
||||
}
|
||||
if (digest.publicThreads !== undefined) {
|
||||
if (!Array.isArray(digest.publicThreads)) {
|
||||
throw new Error('envelope.data.digest.publicThreads, when present, must be an array');
|
||||
}
|
||||
digest.publicThreads.forEach((t, i) => {
|
||||
if (!isObject(t)) throw new Error(`envelope.data.digest.publicThreads[${i}] must be an object`);
|
||||
const th = /** @type {Record<string, unknown>} */ (t);
|
||||
assertNoExtraKeys(th, ALLOWED_THREAD_KEYS, `envelope.data.digest.publicThreads[${i}]`);
|
||||
if (!isNonEmptyString(th.tag)) throw new Error(`envelope.data.digest.publicThreads[${i}].tag must be a non-empty string`);
|
||||
if (!isNonEmptyString(th.teaser)) throw new Error(`envelope.data.digest.publicThreads[${i}].teaser must be a non-empty string`);
|
||||
});
|
||||
}
|
||||
|
||||
if (!isObject(digest.numbers)) throw new Error('envelope.data.digest.numbers is required');
|
||||
const numbers = /** @type {Record<string, unknown>} */ (digest.numbers);
|
||||
@@ -423,13 +464,22 @@ function renderCover({ dateLong, issue, storyCount, pageIndex, totalPages, greet
|
||||
* @param {{ greeting: string; lead: string; dateShort: string; pageIndex: number; totalPages: number }} opts
|
||||
*/
|
||||
function renderDigestGreeting({ greeting, lead, dateShort, pageIndex, totalPages }) {
|
||||
// Public-share fail-safe: when `lead` is empty, omit the pull-quote
|
||||
// entirely. Reached via redactForPublic when the envelope lacks a
|
||||
// non-empty `publicLead` — NEVER serve the personalised lead on the
|
||||
// public surface. Page still reads as a complete editorial layout
|
||||
// (greeting + horizontal rule), just without the italic blockquote.
|
||||
// Codex Round-2 High (security on share-URL surface).
|
||||
const blockquote = typeof lead === 'string' && lead.length > 0
|
||||
? `<blockquote>${escapeHtml(lead)}</blockquote>`
|
||||
: '';
|
||||
return (
|
||||
'<section class="page digest">' +
|
||||
digestRunningHead(dateShort, 'Digest / 01') +
|
||||
'<div class="body">' +
|
||||
'<div class="label mono">At The Top Of The Hour</div>' +
|
||||
`<h2>${escapeHtml(greeting)}</h2>` +
|
||||
`<blockquote>${escapeHtml(lead)}</blockquote>` +
|
||||
blockquote +
|
||||
'<hr class="rule" />' +
|
||||
'</div>' +
|
||||
`<div class="page-number mono">${pad2(pageIndex)} / ${pad2(totalPages)}</div>` +
|
||||
@@ -1141,17 +1191,57 @@ const NAV_SCRIPT = `<script>
|
||||
* leaking the recipient's name or the LLM-generated whyMatters (which
|
||||
* is framed as direct advice to that specific reader).
|
||||
*
|
||||
* Runs AFTER assertBriefEnvelope so the full v2 contract is still
|
||||
* Runs AFTER assertBriefEnvelope so the full contract is still
|
||||
* enforced on the input — we never loosen validation for the public
|
||||
* path, only redact the output.
|
||||
*
|
||||
* Lead-field handling (v3, 2026-04-25): the personalised `digest.lead`
|
||||
* can carry profile context (watched assets, region preferences) and
|
||||
* MUST NEVER be served on the public surface. v3 envelopes carry
|
||||
* `digest.publicLead` — a non-personalised parallel synthesis from
|
||||
* generateDigestProsePublic — which we substitute into the `lead`
|
||||
* slot so all downstream renderers stay agnostic to the public/
|
||||
* personalised distinction. When `publicLead` is absent (v2
|
||||
* envelopes still in the 7-day TTL window, or v3 envelopes where
|
||||
* the publicLead generation failed), we substitute an EMPTY string
|
||||
* — the renderer's pull-quote block reads "no pull-quote" for empty
|
||||
* leads (per renderDigestGreeting), so the page renders without
|
||||
* leaking personalised content. NEVER fall through to the original
|
||||
* `lead`. Codex Round-2 High (security).
|
||||
*
|
||||
* @param {BriefData} data
|
||||
* @returns {BriefData}
|
||||
*/
|
||||
function redactForPublic(data) {
|
||||
const safeLead = typeof data.digest?.publicLead === 'string' && data.digest.publicLead.length > 0
|
||||
? data.digest.publicLead
|
||||
: '';
|
||||
// Public signals: substitute the publicSignals array (also produced
|
||||
// by generateDigestProsePublic with profile=null) when present.
|
||||
// When absent, EMPTY the signals array — the renderer's hasSignals
|
||||
// gate then omits the entire "04 · Signals" page rather than
|
||||
// serving the personalised forward-looking phrases (which can echo
|
||||
// the user's watched assets / regions).
|
||||
const safeSignals = Array.isArray(data.digest?.publicSignals) && data.digest.publicSignals.length > 0
|
||||
? data.digest.publicSignals
|
||||
: [];
|
||||
// Public threads: substitute publicThreads when present (preferred
|
||||
// — the public synthesis still produces topic clusters from story
|
||||
// content). When absent, fall back to category-derived stubs so
|
||||
// the threads page still renders without leaking any personalised
|
||||
// phrasing the original `threads` array might carry.
|
||||
const safeThreads = Array.isArray(data.digest?.publicThreads) && data.digest.publicThreads.length > 0
|
||||
? data.digest.publicThreads
|
||||
: derivePublicThreadsStub(data.stories);
|
||||
return {
|
||||
...data,
|
||||
user: { ...data.user, name: 'WorldMonitor' },
|
||||
digest: {
|
||||
...data.digest,
|
||||
lead: safeLead,
|
||||
signals: safeSignals,
|
||||
threads: safeThreads,
|
||||
},
|
||||
stories: data.stories.map((s) => ({
|
||||
...s,
|
||||
whyMatters: 'Subscribe to WorldMonitor Brief to see the full editorial on this story.',
|
||||
@@ -1159,6 +1249,31 @@ function redactForPublic(data) {
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Category-derived threads fallback for the public surface when the
|
||||
* envelope lacks `publicThreads`. Mirrors deriveThreadsFromStories
|
||||
* in shared/brief-filter.js (the composer's stub path) — keeps the
|
||||
* fallback shape identical to what v2 envelopes already render with.
|
||||
*
|
||||
* @param {Array<{ category?: unknown }>} stories
|
||||
* @returns {Array<{ tag: string; teaser: string }>}
|
||||
*/
|
||||
function derivePublicThreadsStub(stories) {
|
||||
if (!Array.isArray(stories) || stories.length === 0) {
|
||||
return [{ tag: 'World', teaser: 'One thread on the desk today.' }];
|
||||
}
|
||||
const byCategory = new Map();
|
||||
for (const s of stories) {
|
||||
const tag = typeof s?.category === 'string' && s.category.length > 0 ? s.category : 'World';
|
||||
byCategory.set(tag, (byCategory.get(tag) ?? 0) + 1);
|
||||
}
|
||||
const sorted = [...byCategory.entries()].sort((a, b) => b[1] - a[1]);
|
||||
return sorted.slice(0, 6).map(([tag, count]) => ({
|
||||
tag,
|
||||
teaser: count === 1 ? 'One thread on the desk today.' : `${count} threads on the desk today.`,
|
||||
}));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {BriefEnvelope} envelope
|
||||
* @param {{ publicMode?: boolean; refCode?: string; shareUrl?: string }} [options]
|
||||
|
||||
@@ -1,14 +1,28 @@
|
||||
import { CHROME_UA } from './constants';
|
||||
import type { UsageHook } from './redis';
|
||||
import { buildUpstreamEvent, getUsageScope, sendToAxiom } from './usage';
|
||||
|
||||
interface FetchJsonOptions {
|
||||
timeoutMs?: number;
|
||||
headers?: Record<string, string>;
|
||||
/**
|
||||
* Provider attribution for usage telemetry. When set, an upstream event
|
||||
* is emitted for this call. Leaves request_id / customer_id / route / tier
|
||||
* to flow implicitly from the gateway-set UsageScope (issue #3381).
|
||||
*/
|
||||
provider?: string;
|
||||
operation?: string;
|
||||
/** Escape hatch for callers outside a request scope. Rarely needed. */
|
||||
usage?: UsageHook;
|
||||
}
|
||||
|
||||
export async function fetchJson<T>(
|
||||
url: string,
|
||||
options: FetchJsonOptions = {},
|
||||
): Promise<T | null> {
|
||||
const t0 = Date.now();
|
||||
let status = 0;
|
||||
let responseBytes = 0;
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
headers: {
|
||||
@@ -18,9 +32,57 @@ export async function fetchJson<T>(
|
||||
},
|
||||
signal: AbortSignal.timeout(options.timeoutMs ?? 8_000),
|
||||
});
|
||||
status = response.status;
|
||||
if (!response.ok) return null;
|
||||
return await response.json() as T;
|
||||
const text = await response.text();
|
||||
responseBytes = text.length;
|
||||
return JSON.parse(text) as T;
|
||||
} catch {
|
||||
return null;
|
||||
} finally {
|
||||
// Emit only when the caller has labeled the provider — avoids polluting
|
||||
// the dataset with "unknown" rows from internal/utility fetches.
|
||||
const provider = options.usage?.provider ?? options.provider;
|
||||
const operation = options.usage?.operation ?? options.operation ?? 'fetch';
|
||||
if (provider) {
|
||||
const durationMs = Date.now() - t0;
|
||||
const explicit = options.usage;
|
||||
const host = explicit?.host ?? safeHost(url);
|
||||
// Single waitUntil() registered synchronously here — no nested
|
||||
// ctx.waitUntil() inside the Axiom delivery (Edge runtimes may drop
|
||||
// the outer registration after the response phase ends). Static
|
||||
// import keeps the emit path on the hot path.
|
||||
const scope = getUsageScope();
|
||||
const ctx = explicit?.ctx ?? scope?.ctx;
|
||||
if (ctx) {
|
||||
const event = buildUpstreamEvent({
|
||||
requestId: explicit?.requestId ?? scope?.requestId ?? '',
|
||||
customerId: explicit?.customerId ?? scope?.customerId ?? null,
|
||||
route: explicit?.route ?? scope?.route ?? '',
|
||||
tier: explicit?.tier ?? scope?.tier ?? 0,
|
||||
provider,
|
||||
operation,
|
||||
host,
|
||||
status,
|
||||
durationMs,
|
||||
requestBytes: 0,
|
||||
responseBytes,
|
||||
cacheStatus: 'miss',
|
||||
});
|
||||
try {
|
||||
ctx.waitUntil(sendToAxiom([event]));
|
||||
} catch {
|
||||
/* telemetry must never throw */
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function safeHost(url: string): string {
|
||||
try {
|
||||
return new URL(url).host;
|
||||
} catch {
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -96,6 +96,10 @@ export const ENDPOINT_RATE_POLICIES: Record<string, EndpointRatePolicy> = {
|
||||
// inline Upstash INCR. Gateway now enforces the same budget with per-IP
|
||||
// keying in checkEndpointRateLimit.
|
||||
'/api/scenario/v1/run-scenario': { limit: 10, window: '60 s' },
|
||||
// Live tanker map (Energy Atlas): one user with 6 chokepoints × 1 call/min
|
||||
// = 6 req/min/IP base load. 60/min headroom covers tab refreshes + zoom
|
||||
// pans within a single user without flagging legitimate traffic.
|
||||
'/api/maritime/v1/get-vessel-snapshot': { limit: 60, window: '60 s' },
|
||||
};
|
||||
|
||||
const endpointLimiters = new Map<string, Ratelimit>();
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { unwrapEnvelope } from './seed-envelope';
|
||||
import { buildUpstreamEvent, getUsageScope, sendToAxiom } from './usage';
|
||||
|
||||
const REDIS_OP_TIMEOUT_MS = 1_500;
|
||||
const REDIS_PIPELINE_TIMEOUT_MS = 5_000;
|
||||
@@ -288,6 +289,31 @@ export async function cachedFetchJson<T extends object>(
|
||||
return promise;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-call usage-telemetry hook for upstream event emission (issue #3381).
|
||||
*
|
||||
* The only required field is `provider` — its presence is what tells the
|
||||
* helper "emit an upstream event for this call." Everything else is filled
|
||||
* in by the gateway-set UsageScope (request_id, customer_id, route, tier,
|
||||
* ctx) via AsyncLocalStorage. Pass overrides explicitly if you need to.
|
||||
*
|
||||
* Use this when calling fetchJson / cachedFetchJsonWithMeta from a code
|
||||
* path that runs inside a gateway-handled request. For helpers used
|
||||
* outside any request (cron, scripts), no scope exists and emission is
|
||||
* skipped silently.
|
||||
*/
|
||||
export interface UsageHook {
|
||||
provider: string;
|
||||
operation?: string;
|
||||
host?: string;
|
||||
// Overrides — leave unset to inherit from gateway-set UsageScope.
|
||||
ctx?: { waitUntil: (p: Promise<unknown>) => void };
|
||||
requestId?: string;
|
||||
customerId?: string | null;
|
||||
route?: string;
|
||||
tier?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Like cachedFetchJson but reports the data source.
|
||||
* Use when callers need to distinguish cache hits from fresh fetches
|
||||
@@ -296,12 +322,17 @@ export async function cachedFetchJson<T extends object>(
|
||||
* Returns { data, source } where source is:
|
||||
* 'cache' — served from Redis
|
||||
* 'fresh' — fetcher ran (leader) or joined an in-flight fetch (follower)
|
||||
*
|
||||
* If `opts.usage` is supplied, an upstream event is emitted on the fresh
|
||||
* path (issue #3381). Pass-through for callers that don't care about
|
||||
* telemetry — backwards-compatible.
|
||||
*/
|
||||
export async function cachedFetchJsonWithMeta<T extends object>(
|
||||
key: string,
|
||||
ttlSeconds: number,
|
||||
fetcher: () => Promise<T | null>,
|
||||
negativeTtlSeconds = 120,
|
||||
opts?: { usage?: UsageHook },
|
||||
): Promise<{ data: T | null; source: 'cache' | 'fresh' }> {
|
||||
const cached = await getCachedJson(key);
|
||||
if (cached === NEG_SENTINEL) return { data: null, source: 'cache' };
|
||||
@@ -313,16 +344,30 @@ export async function cachedFetchJsonWithMeta<T extends object>(
|
||||
return { data, source: 'fresh' };
|
||||
}
|
||||
|
||||
const fetchT0 = Date.now();
|
||||
let upstreamStatus = 0;
|
||||
let cacheStatus: 'miss' | 'neg-sentinel' = 'miss';
|
||||
|
||||
const promise = fetcher()
|
||||
.then(async (result) => {
|
||||
// Only count an upstream call as a 200 when it actually returned data.
|
||||
// A null result triggers the neg-sentinel branch below — these are
|
||||
// empty/failed upstream calls and must NOT show up as `status=200` in
|
||||
// dashboards (would poison the cache-hit-ratio recipe and per-provider
|
||||
// error rates). Use status=0 for the empty branch; cache_status carries
|
||||
// the structural detail.
|
||||
if (result != null) {
|
||||
upstreamStatus = 200;
|
||||
await setCachedJson(key, result, ttlSeconds);
|
||||
} else {
|
||||
upstreamStatus = 0;
|
||||
cacheStatus = 'neg-sentinel';
|
||||
await setCachedJson(key, NEG_SENTINEL, negativeTtlSeconds);
|
||||
}
|
||||
return result;
|
||||
})
|
||||
.catch((err: unknown) => {
|
||||
upstreamStatus = 0;
|
||||
console.warn(`[redis] cachedFetchJsonWithMeta fetcher failed for "${key}":`, errMsg(err));
|
||||
throw err;
|
||||
})
|
||||
@@ -331,10 +376,50 @@ export async function cachedFetchJsonWithMeta<T extends object>(
|
||||
});
|
||||
|
||||
inflight.set(key, promise);
|
||||
const data = await promise;
|
||||
let data: T | null;
|
||||
try {
|
||||
data = await promise;
|
||||
} finally {
|
||||
emitUpstreamFromHook(opts?.usage, upstreamStatus, Date.now() - fetchT0, cacheStatus);
|
||||
}
|
||||
return { data, source: 'fresh' };
|
||||
}
|
||||
|
||||
function emitUpstreamFromHook(
|
||||
usage: UsageHook | undefined,
|
||||
status: number,
|
||||
durationMs: number,
|
||||
cacheStatus: 'miss' | 'fresh' | 'stale-while-revalidate' | 'neg-sentinel',
|
||||
): void {
|
||||
// Emit only when caller labels the provider — avoids "unknown" pollution.
|
||||
if (!usage?.provider) return;
|
||||
// Single waitUntil() registered synchronously here — no nested
|
||||
// ctx.waitUntil() inside Axiom delivery. Static import keeps the call
|
||||
// synchronous so the runtime registers it during the request phase.
|
||||
const scope = getUsageScope();
|
||||
const ctx = usage.ctx ?? scope?.ctx;
|
||||
if (!ctx) return;
|
||||
const event = buildUpstreamEvent({
|
||||
requestId: usage.requestId ?? scope?.requestId ?? '',
|
||||
customerId: usage.customerId ?? scope?.customerId ?? null,
|
||||
route: usage.route ?? scope?.route ?? '',
|
||||
tier: usage.tier ?? scope?.tier ?? 0,
|
||||
provider: usage.provider,
|
||||
operation: usage.operation ?? 'fetch',
|
||||
host: usage.host ?? '',
|
||||
status,
|
||||
durationMs,
|
||||
requestBytes: 0,
|
||||
responseBytes: 0,
|
||||
cacheStatus,
|
||||
});
|
||||
try {
|
||||
ctx.waitUntil(sendToAxiom([event]));
|
||||
} catch {
|
||||
/* telemetry must never throw */
|
||||
}
|
||||
}
|
||||
|
||||
export async function geoSearchByBox(
|
||||
key: string, lon: number, lat: number,
|
||||
widthKm: number, heightKm: number, count: number, raw = false,
|
||||
|
||||
108
server/_shared/usage-identity.ts
Normal file
@@ -0,0 +1,108 @@
|
||||
/**
|
||||
* Pure resolver: maps gateway-internal auth state to a UsageIdentity event field set.
|
||||
*
|
||||
* MUST NOT re-verify JWTs, re-hash keys, or re-validate API keys. The gateway has
|
||||
* already done that work — this function consumes the resolved values.
|
||||
*
|
||||
* Tier is the user's current entitlement tier (0 = free / unknown). For non-tier-gated
|
||||
* endpoints the gateway never resolves it, so we accept null/undefined and report 0.
|
||||
*/
|
||||
|
||||
export type AuthKind =
|
||||
| 'clerk_jwt'
|
||||
| 'user_api_key'
|
||||
| 'enterprise_api_key'
|
||||
| 'widget_key'
|
||||
| 'anon';
|
||||
|
||||
export interface UsageIdentity {
|
||||
auth_kind: AuthKind;
|
||||
principal_id: string | null;
|
||||
customer_id: string | null;
|
||||
tier: number;
|
||||
}
|
||||
|
||||
export interface UsageIdentityInput {
|
||||
sessionUserId: string | null;
|
||||
isUserApiKey: boolean;
|
||||
enterpriseApiKey: string | null;
|
||||
widgetKey: string | null;
|
||||
clerkOrgId: string | null;
|
||||
userApiKeyCustomerRef: string | null;
|
||||
tier: number | null;
|
||||
}
|
||||
|
||||
// Static enterprise-key → customer map. Explicit so attribution is reviewable in code,
|
||||
// not floating in env vars. Add entries here as enterprise customers are onboarded.
|
||||
// The hash (not the raw key) is used as principal_id so logs never leak the secret.
|
||||
const ENTERPRISE_KEY_TO_CUSTOMER: Record<string, string> = {
|
||||
// 'wm_ent_xxxx': 'acme-corp',
|
||||
};
|
||||
|
||||
export function buildUsageIdentity(input: UsageIdentityInput): UsageIdentity {
|
||||
const tier = input.tier ?? 0;
|
||||
|
||||
if (input.isUserApiKey) {
|
||||
return {
|
||||
auth_kind: 'user_api_key',
|
||||
principal_id: input.sessionUserId,
|
||||
customer_id: input.userApiKeyCustomerRef ?? input.sessionUserId,
|
||||
tier,
|
||||
};
|
||||
}
|
||||
|
||||
if (input.sessionUserId) {
|
||||
return {
|
||||
auth_kind: 'clerk_jwt',
|
||||
principal_id: input.sessionUserId,
|
||||
customer_id: input.clerkOrgId ?? input.sessionUserId,
|
||||
tier,
|
||||
};
|
||||
}
|
||||
|
||||
if (input.enterpriseApiKey) {
|
||||
const customer = ENTERPRISE_KEY_TO_CUSTOMER[input.enterpriseApiKey] ?? 'enterprise-unmapped';
|
||||
return {
|
||||
auth_kind: 'enterprise_api_key',
|
||||
principal_id: hashKeySync(input.enterpriseApiKey),
|
||||
customer_id: customer,
|
||||
tier,
|
||||
};
|
||||
}
|
||||
|
||||
if (input.widgetKey) {
|
||||
return {
|
||||
auth_kind: 'widget_key',
|
||||
principal_id: hashKeySync(input.widgetKey),
|
||||
customer_id: input.widgetKey,
|
||||
tier,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
auth_kind: 'anon',
|
||||
principal_id: null,
|
||||
customer_id: null,
|
||||
tier: 0,
|
||||
};
|
||||
}
|
||||
|
||||
// 64-bit FNV-1a (two-round, XOR-folded) — non-cryptographic, only used to
|
||||
// avoid logging raw key material. Edge crypto.subtle.digest is async; we
|
||||
// want a sync helper for the hot path. Two rounds with different seeds give
|
||||
// ~64 bits of state, dropping birthday-collision risk well below the
|
||||
// widget-key population horizon (32-bit collides ~65k keys).
|
||||
function hashKeySync(key: string): string {
|
||||
let h1 = 2166136261;
|
||||
let h2 = 0x811c9dc5 ^ 0xa3b2c1d4;
|
||||
for (let i = 0; i < key.length; i++) {
|
||||
const c = key.charCodeAt(i);
|
||||
h1 ^= c;
|
||||
h1 = Math.imul(h1, 16777619);
|
||||
h2 ^= c + 0x9e3779b1;
|
||||
h2 = Math.imul(h2, 16777619);
|
||||
}
|
||||
const lo = (h1 >>> 0).toString(36);
|
||||
const hi = (h2 >>> 0).toString(36);
|
||||
return `${hi}${lo}`;
|
||||
}
|
||||
421
server/_shared/usage.ts
Normal file
@@ -0,0 +1,421 @@
|
||||
/**
|
||||
* Axiom-based API usage observability — emit-side primitives.
|
||||
*
|
||||
* - Builders accept allowlisted primitives only. Never accept Request, Response,
|
||||
* or untyped objects: future field additions then leak by structural impossibility.
|
||||
* - emitUsageEvents fires via ctx.waitUntil so the Edge isolate cannot tear down
|
||||
* the unflushed POST. Direct fetch, 1.5s timeout, no retry.
|
||||
* - Circuit breaker (5% failure / 5min sliding window) trips when delivery is broken.
|
||||
* - Tripping logs once via console.error; drops thereafter are 1%-sampled console.warn.
|
||||
* - Telemetry failure must not affect API availability or latency.
|
||||
*
|
||||
* Scoped to USAGE attribution. Sentry-edge already covers exceptions — do NOT
|
||||
* emit error tracebacks here. Cross-link via sentry_trace_id field instead.
|
||||
*/
|
||||
|
||||
import type { AuthKind } from './usage-identity';
|
||||
|
||||
const AXIOM_DATASET = 'wm_api_usage';
|
||||
// US region endpoint. EU workspaces would use api.eu.axiom.co.
|
||||
const AXIOM_INGEST_URL = `https://api.axiom.co/v1/datasets/${AXIOM_DATASET}/ingest`;
|
||||
const TELEMETRY_TIMEOUT_MS = 1_500;
|
||||
|
||||
const CB_WINDOW_MS = 5 * 60 * 1_000;
|
||||
const CB_TRIP_FAILURE_RATIO = 0.05;
|
||||
const CB_MIN_SAMPLES = 20;
|
||||
const SAMPLED_DROP_LOG_RATE = 0.01;
|
||||
|
||||
function isUsageEnabled(): boolean {
|
||||
return process.env.USAGE_TELEMETRY === '1';
|
||||
}
|
||||
|
||||
function isDevHeaderEnabled(): boolean {
|
||||
return process.env.NODE_ENV !== 'production';
|
||||
}
|
||||
|
||||
// ---------- Event shapes ----------
|
||||
|
||||
export type CacheTier =
|
||||
| 'fast'
|
||||
| 'medium'
|
||||
| 'slow'
|
||||
| 'slow-browser'
|
||||
| 'static'
|
||||
| 'daily'
|
||||
| 'no-store'
|
||||
| 'live';
|
||||
|
||||
export type CacheStatus = 'miss' | 'fresh' | 'stale-while-revalidate' | 'neg-sentinel';
|
||||
|
||||
export type ExecutionPlane = 'vercel-edge' | 'vercel-node' | 'railway-relay';
|
||||
|
||||
export type OriginKind =
|
||||
| 'browser-same-origin'
|
||||
| 'browser-cross-origin'
|
||||
| 'api-key'
|
||||
| 'oauth'
|
||||
| 'mcp'
|
||||
| 'internal-cron';
|
||||
|
||||
export type RequestReason =
|
||||
| 'ok'
|
||||
| 'origin_403'
|
||||
| 'rate_limit_429'
|
||||
| 'preflight'
|
||||
| 'auth_401'
|
||||
| 'auth_403'
|
||||
| 'tier_403';
|
||||
|
||||
export interface RequestEvent {
|
||||
_time: string;
|
||||
event_type: 'request';
|
||||
request_id: string;
|
||||
domain: string;
|
||||
route: string;
|
||||
method: string;
|
||||
status: number;
|
||||
duration_ms: number;
|
||||
req_bytes: number;
|
||||
res_bytes: number;
|
||||
customer_id: string | null;
|
||||
principal_id: string | null;
|
||||
auth_kind: AuthKind;
|
||||
tier: number;
|
||||
country: string | null;
|
||||
execution_region: string | null;
|
||||
execution_plane: ExecutionPlane;
|
||||
origin_kind: OriginKind | null;
|
||||
cache_tier: CacheTier | null;
|
||||
ua_hash: string | null;
|
||||
sentry_trace_id: string | null;
|
||||
reason: RequestReason;
|
||||
}
|
||||
|
||||
export interface UpstreamEvent {
|
||||
_time: string;
|
||||
event_type: 'upstream';
|
||||
request_id: string;
|
||||
customer_id: string | null;
|
||||
route: string;
|
||||
tier: number;
|
||||
provider: string;
|
||||
operation: string;
|
||||
host: string;
|
||||
status: number;
|
||||
duration_ms: number;
|
||||
request_bytes: number;
|
||||
response_bytes: number;
|
||||
cache_status: CacheStatus;
|
||||
}
|
||||
|
||||
export type UsageEvent = RequestEvent | UpstreamEvent;
|
||||
|
||||
// ---------- Builders (allowlisted primitives only) ----------
|
||||
|
||||
export function buildRequestEvent(p: {
|
||||
requestId: string;
|
||||
domain: string;
|
||||
route: string;
|
||||
method: string;
|
||||
status: number;
|
||||
durationMs: number;
|
||||
reqBytes: number;
|
||||
resBytes: number;
|
||||
customerId: string | null;
|
||||
principalId: string | null;
|
||||
authKind: AuthKind;
|
||||
tier: number;
|
||||
country: string | null;
|
||||
executionRegion: string | null;
|
||||
executionPlane: ExecutionPlane;
|
||||
originKind: OriginKind | null;
|
||||
cacheTier: CacheTier | null;
|
||||
uaHash: string | null;
|
||||
sentryTraceId: string | null;
|
||||
reason: RequestReason;
|
||||
}): RequestEvent {
|
||||
return {
|
||||
_time: new Date().toISOString(),
|
||||
event_type: 'request',
|
||||
request_id: p.requestId,
|
||||
domain: p.domain,
|
||||
route: p.route,
|
||||
method: p.method,
|
||||
status: p.status,
|
||||
duration_ms: p.durationMs,
|
||||
req_bytes: p.reqBytes,
|
||||
res_bytes: p.resBytes,
|
||||
customer_id: p.customerId,
|
||||
principal_id: p.principalId,
|
||||
auth_kind: p.authKind,
|
||||
tier: p.tier,
|
||||
country: p.country,
|
||||
execution_region: p.executionRegion,
|
||||
execution_plane: p.executionPlane,
|
||||
origin_kind: p.originKind,
|
||||
cache_tier: p.cacheTier,
|
||||
ua_hash: p.uaHash,
|
||||
sentry_trace_id: p.sentryTraceId,
|
||||
reason: p.reason,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildUpstreamEvent(p: {
|
||||
requestId: string;
|
||||
customerId: string | null;
|
||||
route: string;
|
||||
tier: number;
|
||||
provider: string;
|
||||
operation: string;
|
||||
host: string;
|
||||
status: number;
|
||||
durationMs: number;
|
||||
requestBytes: number;
|
||||
responseBytes: number;
|
||||
cacheStatus: CacheStatus;
|
||||
}): UpstreamEvent {
|
||||
return {
|
||||
_time: new Date().toISOString(),
|
||||
event_type: 'upstream',
|
||||
request_id: p.requestId,
|
||||
customer_id: p.customerId,
|
||||
route: p.route,
|
||||
tier: p.tier,
|
||||
provider: p.provider,
|
||||
operation: p.operation,
|
||||
host: p.host,
|
||||
status: p.status,
|
||||
duration_ms: p.durationMs,
|
||||
request_bytes: p.requestBytes,
|
||||
response_bytes: p.responseBytes,
|
||||
cache_status: p.cacheStatus,
|
||||
};
|
||||
}
|
||||
|
||||
// ---------- Header-derived helpers (ok to take Request — these only read primitives) ----------
|
||||
|
||||
export function deriveRequestId(req: Request): string {
|
||||
return req.headers.get('x-vercel-id') ?? '';
|
||||
}
|
||||
|
||||
export function deriveExecutionRegion(req: Request): string | null {
|
||||
const id = req.headers.get('x-vercel-id');
|
||||
if (!id) return null;
|
||||
const sep = id.indexOf('::');
|
||||
return sep > 0 ? id.slice(0, sep) : null;
|
||||
}
|
||||
|
||||
export function deriveCountry(req: Request): string | null {
|
||||
return (
|
||||
req.headers.get('x-vercel-ip-country') ??
|
||||
req.headers.get('cf-ipcountry') ??
|
||||
null
|
||||
);
|
||||
}
|
||||
|
||||
export function deriveReqBytes(req: Request): number {
|
||||
const len = req.headers.get('content-length');
|
||||
if (!len) return 0;
|
||||
const n = Number(len);
|
||||
return Number.isFinite(n) && n >= 0 ? n : 0;
|
||||
}
|
||||
|
||||
export function deriveSentryTraceId(req: Request): string | null {
|
||||
return req.headers.get('sentry-trace') ?? null;
|
||||
}
|
||||
|
||||
// ua_hash: SHA-256(UA + monthly-rotated pepper). Pepper key: USAGE_UA_PEPPER.
|
||||
// If the pepper is unset we return null rather than a stable per-browser fingerprint.
|
||||
export async function deriveUaHash(req: Request): Promise<string | null> {
|
||||
const pepper = process.env.USAGE_UA_PEPPER;
|
||||
if (!pepper) return null;
|
||||
const ua = req.headers.get('user-agent') ?? '';
|
||||
if (!ua) return null;
|
||||
const data = new TextEncoder().encode(`${pepper}|${ua}`);
|
||||
const buf = await crypto.subtle.digest('SHA-256', data);
|
||||
return Array.from(new Uint8Array(buf), (b) => b.toString(16).padStart(2, '0')).join('');
|
||||
}
|
||||
|
||||
export function deriveOriginKind(req: Request): OriginKind | null {
|
||||
const origin = req.headers.get('origin') ?? '';
|
||||
const hasApiKey =
|
||||
req.headers.has('x-worldmonitor-key') || req.headers.has('x-api-key');
|
||||
const hasBearer = (req.headers.get('authorization') ?? '').startsWith('Bearer ');
|
||||
if (hasApiKey) return 'api-key';
|
||||
if (hasBearer) return 'oauth';
|
||||
if (!origin) return null;
|
||||
try {
|
||||
const host = new URL(origin).host;
|
||||
const reqHost = new URL(req.url).host;
|
||||
return host === reqHost ? 'browser-same-origin' : 'browser-cross-origin';
|
||||
} catch {
|
||||
return 'browser-cross-origin';
|
||||
}
|
||||
}
|
||||
|
||||
// ---------- Circuit breaker ----------
|
||||
|
||||
interface BreakerSample {
|
||||
ts: number;
|
||||
ok: boolean;
|
||||
}
|
||||
|
||||
const breakerSamples: BreakerSample[] = [];
|
||||
let breakerTripped = false;
|
||||
let breakerLastNotifyTs = 0;
|
||||
|
||||
function pruneOldSamples(now: number): void {
|
||||
while (breakerSamples.length > 0 && now - breakerSamples[0]!.ts > CB_WINDOW_MS) {
|
||||
breakerSamples.shift();
|
||||
}
|
||||
}
|
||||
|
||||
function recordSample(ok: boolean): void {
|
||||
const now = Date.now();
|
||||
pruneOldSamples(now);
|
||||
breakerSamples.push({ ts: now, ok });
|
||||
|
||||
if (breakerSamples.length < CB_MIN_SAMPLES) {
|
||||
breakerTripped = false;
|
||||
return;
|
||||
}
|
||||
let failures = 0;
|
||||
for (const s of breakerSamples) if (!s.ok) failures++;
|
||||
const ratio = failures / breakerSamples.length;
|
||||
const wasTripped = breakerTripped;
|
||||
breakerTripped = ratio > CB_TRIP_FAILURE_RATIO;
|
||||
|
||||
if (breakerTripped && !wasTripped && now - breakerLastNotifyTs > CB_WINDOW_MS) {
|
||||
breakerLastNotifyTs = now;
|
||||
console.error('[usage-telemetry] circuit breaker tripped', {
|
||||
ratio: ratio.toFixed(3),
|
||||
samples: breakerSamples.length,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export function getTelemetryHealth(): 'ok' | 'degraded' | 'off' {
|
||||
if (!isUsageEnabled()) return 'off';
|
||||
return breakerTripped ? 'degraded' : 'ok';
|
||||
}
|
||||
|
||||
export function maybeAttachDevHealthHeader(headers: Headers): void {
|
||||
if (!isDevHeaderEnabled()) return;
|
||||
headers.set('x-usage-telemetry', getTelemetryHealth());
|
||||
}
|
||||
|
||||
// ---------- Implicit request scope (AsyncLocalStorage) ----------
|
||||
//
|
||||
// Per koala's review (#3381), this lets fetch helpers emit upstream events
|
||||
// without leaf handlers having to thread a usage hook through every call.
|
||||
// The gateway sets the scope before invoking matchedHandler; fetch helpers
|
||||
// (fetchJson, cachedFetchJsonWithMeta) read from it lazily.
|
||||
//
|
||||
// AsyncLocalStorage is loaded defensively. If the runtime ever rejects the
|
||||
// import (older Edge versions, sandboxed contexts), the scope helpers
|
||||
// degrade to no-ops and telemetry simply skips. The gateway request event
|
||||
// is unaffected — it never depended on ALS.
|
||||
|
||||
export interface UsageScope {
|
||||
ctx: WaitUntilCtx;
|
||||
requestId: string;
|
||||
customerId: string | null;
|
||||
route: string;
|
||||
tier: number;
|
||||
}
|
||||
|
||||
type ALSLike<T> = {
|
||||
run: <R>(store: T, fn: () => R) => R;
|
||||
getStore: () => T | undefined;
|
||||
};
|
||||
|
||||
let scopeStore: ALSLike<UsageScope> | null = null;
|
||||
|
||||
async function getScopeStore(): Promise<ALSLike<UsageScope> | null> {
|
||||
if (scopeStore) return scopeStore;
|
||||
try {
|
||||
const mod = await import('node:async_hooks');
|
||||
scopeStore = new mod.AsyncLocalStorage<UsageScope>();
|
||||
return scopeStore;
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function runWithUsageScope<R>(scope: UsageScope, fn: () => R | Promise<R>): Promise<R> {
|
||||
const store = await getScopeStore();
|
||||
if (!store) return fn();
|
||||
return store.run(scope, fn) as R | Promise<R>;
|
||||
}
|
||||
|
||||
export function getUsageScope(): UsageScope | undefined {
|
||||
return scopeStore?.getStore();
|
||||
}
|
||||
|
||||
// ---------- Sink ----------
|
||||
|
||||
export async function sendToAxiom(events: UsageEvent[]): Promise<void> {
|
||||
if (!isUsageEnabled()) return;
|
||||
if (events.length === 0) return;
|
||||
const token = process.env.AXIOM_API_TOKEN;
|
||||
if (!token) {
|
||||
if (Math.random() < SAMPLED_DROP_LOG_RATE) {
|
||||
console.warn('[usage-telemetry] drop', { reason: 'no-token' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (breakerTripped) {
|
||||
if (Math.random() < SAMPLED_DROP_LOG_RATE) {
|
||||
console.warn('[usage-telemetry] drop', { reason: 'breaker-open' });
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), TELEMETRY_TIMEOUT_MS);
|
||||
try {
|
||||
const resp = await fetch(AXIOM_INGEST_URL, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(events),
|
||||
signal: controller.signal,
|
||||
});
|
||||
if (!resp.ok) {
|
||||
recordSample(false);
|
||||
if (Math.random() < SAMPLED_DROP_LOG_RATE) {
|
||||
console.warn('[usage-telemetry] drop', { reason: `http-${resp.status}` });
|
||||
}
|
||||
return;
|
||||
}
|
||||
recordSample(true);
|
||||
} catch (err) {
|
||||
recordSample(false);
|
||||
if (Math.random() < SAMPLED_DROP_LOG_RATE) {
|
||||
const reason = err instanceof Error && err.name === 'AbortError' ? 'timeout' : 'fetch-error';
|
||||
console.warn('[usage-telemetry] drop', { reason });
|
||||
}
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
export interface WaitUntilCtx {
|
||||
waitUntil: (p: Promise<unknown>) => void;
|
||||
}
|
||||
|
||||
export function emitUsageEvents(ctx: WaitUntilCtx, events: UsageEvent[]): void {
|
||||
if (!isUsageEnabled() || events.length === 0) return;
|
||||
ctx.waitUntil(sendToAxiom(events));
|
||||
}
|
||||
|
||||
// Variant that returns the in-flight delivery promise instead of registering
|
||||
// it on a context. Use when the caller is already inside a single
|
||||
// ctx.waitUntil() chain and wants to await delivery synchronously to avoid a
|
||||
// nested waitUntil registration (which Edge runtimes may drop).
|
||||
export function deliverUsageEvents(events: UsageEvent[]): Promise<void> {
|
||||
if (!isUsageEnabled() || events.length === 0) return Promise.resolve();
|
||||
return sendToAxiom(events);
|
||||
}
|
||||
@@ -13,10 +13,13 @@
|
||||
*
|
||||
* Trivially deleted when v1 retires — just `rm` the alias files.
|
||||
*/
|
||||
type GatewayCtx = { waitUntil: (p: Promise<unknown>) => void };
|
||||
|
||||
export async function rewriteToSebuf(
|
||||
req: Request,
|
||||
newPath: string,
|
||||
gateway: (req: Request) => Promise<Response>,
|
||||
gateway: (req: Request, ctx: GatewayCtx) => Promise<Response>,
|
||||
ctx: GatewayCtx,
|
||||
): Promise<Response> {
|
||||
const url = new URL(req.url);
|
||||
url.pathname = newPath;
|
||||
@@ -27,5 +30,5 @@ export async function rewriteToSebuf(
|
||||
headers: req.headers,
|
||||
body,
|
||||
});
|
||||
return gateway(rewritten);
|
||||
return gateway(rewritten, ctx);
|
||||
}
|
||||
|
||||
@@ -17,7 +17,23 @@ import { mapErrorToResponse } from './error-mapper';
|
||||
import { checkRateLimit, checkEndpointRateLimit, hasEndpointRatePolicy } from './_shared/rate-limit';
|
||||
import { drainResponseHeaders } from './_shared/response-headers';
|
||||
import { checkEntitlement, getRequiredTier, getEntitlements } from './_shared/entitlement-check';
|
||||
import { resolveSessionUserId } from './_shared/auth-session';
|
||||
import { resolveClerkSession } from './_shared/auth-session';
|
||||
import { buildUsageIdentity, type UsageIdentityInput } from './_shared/usage-identity';
|
||||
import {
|
||||
deliverUsageEvents,
|
||||
buildRequestEvent,
|
||||
deriveRequestId,
|
||||
deriveExecutionRegion,
|
||||
deriveCountry,
|
||||
deriveReqBytes,
|
||||
deriveSentryTraceId,
|
||||
deriveOriginKind,
|
||||
deriveUaHash,
|
||||
maybeAttachDevHealthHeader,
|
||||
runWithUsageScope,
|
||||
type CacheTier as UsageCacheTier,
|
||||
type RequestReason,
|
||||
} from './_shared/usage';
|
||||
import type { ServerOptions } from '../src/generated/server/worldmonitor/seismology/v1/service_server';
|
||||
|
||||
export const serverOptions: ServerOptions = { onError: mapErrorToResponse };
|
||||
@@ -26,11 +42,16 @@ export const serverOptions: ServerOptions = { onError: mapErrorToResponse };
|
||||
// NOTE: This map is shared across all domain bundles (~3KB). Kept centralised for
|
||||
// single-source-of-truth maintainability; the size is negligible vs handler code.
|
||||
|
||||
type CacheTier = 'fast' | 'medium' | 'slow' | 'slow-browser' | 'static' | 'daily' | 'no-store';
|
||||
type CacheTier = 'fast' | 'medium' | 'slow' | 'slow-browser' | 'static' | 'daily' | 'no-store' | 'live';
|
||||
|
||||
// Three-tier caching: browser (max-age) → CF edge (s-maxage) → Vercel CDN (CDN-Cache-Control).
|
||||
// CF ignores Vary: Origin so it may pin a single ACAO value, but this is acceptable
|
||||
// since production traffic is same-origin and preview deployments hit Vercel CDN directly.
|
||||
//
|
||||
// 'live' tier (60s) is for endpoints with strict freshness contracts — the
|
||||
// energy-atlas live-tanker map layer requires position fixes to refresh on
|
||||
// the order of one minute. Every shorter-than-medium tier is custom; we keep
|
||||
// the existing tiers untouched so unrelated endpoints aren't impacted.
|
||||
const TIER_HEADERS: Record<CacheTier, string> = {
|
||||
fast: 'public, max-age=60, s-maxage=300, stale-while-revalidate=60, stale-if-error=600',
|
||||
medium: 'public, max-age=120, s-maxage=600, stale-while-revalidate=120, stale-if-error=900',
|
||||
@@ -39,6 +60,7 @@ const TIER_HEADERS: Record<CacheTier, string> = {
|
||||
static: 'public, max-age=600, s-maxage=3600, stale-while-revalidate=600, stale-if-error=14400',
|
||||
daily: 'public, max-age=3600, s-maxage=14400, stale-while-revalidate=7200, stale-if-error=172800',
|
||||
'no-store': 'no-store',
|
||||
live: 'public, max-age=30, s-maxage=60, stale-while-revalidate=60, stale-if-error=300',
|
||||
};
|
||||
|
||||
// Vercel CDN-specific cache TTLs — CDN-Cache-Control overrides Cache-Control for
|
||||
@@ -52,10 +74,14 @@ const TIER_CDN_CACHE: Record<CacheTier, string | null> = {
|
||||
static: 'public, s-maxage=14400, stale-while-revalidate=3600, stale-if-error=28800',
|
||||
daily: 'public, s-maxage=86400, stale-while-revalidate=14400, stale-if-error=172800',
|
||||
'no-store': null,
|
||||
live: 'public, s-maxage=60, stale-while-revalidate=60, stale-if-error=300',
|
||||
};
|
||||
|
||||
const RPC_CACHE_TIER: Record<string, CacheTier> = {
|
||||
'/api/maritime/v1/get-vessel-snapshot': 'no-store',
|
||||
// 'live' tier — bbox-quantized + tanker-aware caching upstream of the
|
||||
// 60s in-handler cache, absorbing identical-bbox requests at the CDN
|
||||
// before they hit this Vercel function. Energy Atlas live-tanker layer.
|
||||
'/api/maritime/v1/get-vessel-snapshot': 'live',
|
||||
|
||||
'/api/market/v1/list-market-quotes': 'medium',
|
||||
'/api/market/v1/list-crypto-quotes': 'medium',
|
||||
@@ -273,18 +299,86 @@ import { PREMIUM_RPC_PATHS } from '../src/shared/premium-paths';
|
||||
* Applies the full gateway pipeline: origin check → CORS → OPTIONS preflight →
|
||||
* API key → rate limit → route match (with POST→GET compat) → execute → cache headers.
|
||||
*/
|
||||
export type GatewayCtx = { waitUntil: (p: Promise<unknown>) => void };
|
||||
|
||||
export function createDomainGateway(
|
||||
routes: RouteDescriptor[],
|
||||
): (req: Request) => Promise<Response> {
|
||||
): (req: Request, ctx?: GatewayCtx) => Promise<Response> {
|
||||
const router = createRouter(routes);
|
||||
|
||||
return async function handler(originalRequest: Request): Promise<Response> {
|
||||
return async function handler(originalRequest: Request, ctx?: GatewayCtx): Promise<Response> {
|
||||
let request = originalRequest;
|
||||
const rawPathname = new URL(request.url).pathname;
|
||||
const pathname = rawPathname.length > 1 ? rawPathname.replace(/\/+$/, '') : rawPathname;
|
||||
const t0 = Date.now();
|
||||
|
||||
// Usage-telemetry identity inputs — accumulated as gateway auth resolution progresses.
|
||||
// Read at every return point; null/0 defaults are valid for early returns.
|
||||
//
|
||||
// x-widget-key is intentionally NOT trusted here: a header is attacker-
|
||||
// controllable, and emitting it as `customer_id` would let unauthenticated
|
||||
// callers poison per-customer dashboards (per koala #3403 review). We only
|
||||
// populate `widgetKey` after validating it against the configured
|
||||
// WIDGET_AGENT_KEY — same check used in api/widget-agent.ts.
|
||||
const rawWidgetKey = request.headers.get('x-widget-key') ?? null;
|
||||
const widgetAgentKey = process.env.WIDGET_AGENT_KEY ?? '';
|
||||
const validatedWidgetKey =
|
||||
rawWidgetKey && widgetAgentKey && rawWidgetKey === widgetAgentKey ? rawWidgetKey : null;
|
||||
const usage: UsageIdentityInput = {
|
||||
sessionUserId: null,
|
||||
isUserApiKey: false,
|
||||
enterpriseApiKey: null,
|
||||
widgetKey: validatedWidgetKey,
|
||||
clerkOrgId: null,
|
||||
userApiKeyCustomerRef: null,
|
||||
tier: null,
|
||||
};
|
||||
// Domain segment for telemetry. Path layouts:
|
||||
// /api/<domain>/v1/<rpc> → parts[2] = domain
|
||||
// /api/v2/<domain>/<rpc> → parts[2] = "v2", parts[3] = domain
|
||||
const _parts = pathname.split('/');
|
||||
const domain = (/^v\d+$/.test(_parts[2] ?? '') ? _parts[3] : _parts[2]) ?? '';
|
||||
const reqBytes = deriveReqBytes(request);
|
||||
|
||||
function emitRequest(status: number, reason: RequestReason, cacheTier: UsageCacheTier | null, resBytes = 0): void {
|
||||
if (!ctx?.waitUntil) return;
|
||||
const identity = buildUsageIdentity(usage);
|
||||
// Single ctx.waitUntil() registered synchronously in the request phase.
|
||||
// The IIFE awaits ua_hash (SHA-256) then awaits delivery directly via
|
||||
// deliverUsageEvents — no nested waitUntil call, which Edge runtimes
|
||||
// (Cloudflare/Vercel) may drop after the response phase ends.
|
||||
ctx.waitUntil((async () => {
|
||||
const uaHash = await deriveUaHash(originalRequest);
|
||||
await deliverUsageEvents([
|
||||
buildRequestEvent({
|
||||
requestId: deriveRequestId(originalRequest),
|
||||
domain,
|
||||
route: pathname,
|
||||
method: originalRequest.method,
|
||||
status,
|
||||
durationMs: Date.now() - t0,
|
||||
reqBytes,
|
||||
resBytes,
|
||||
customerId: identity.customer_id,
|
||||
principalId: identity.principal_id,
|
||||
authKind: identity.auth_kind,
|
||||
tier: identity.tier,
|
||||
country: deriveCountry(originalRequest),
|
||||
executionRegion: deriveExecutionRegion(originalRequest),
|
||||
executionPlane: 'vercel-edge',
|
||||
originKind: deriveOriginKind(originalRequest),
|
||||
cacheTier,
|
||||
uaHash,
|
||||
sentryTraceId: deriveSentryTraceId(originalRequest),
|
||||
reason,
|
||||
}),
|
||||
]);
|
||||
})());
|
||||
}
|
||||
|
||||
// Origin check — skip CORS headers for disallowed origins
|
||||
if (isDisallowedOrigin(request)) {
|
||||
emitRequest(403, 'origin_403', null);
|
||||
return new Response(JSON.stringify({ error: 'Origin not allowed' }), {
|
||||
status: 403,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
@@ -300,6 +394,7 @@ export function createDomainGateway(
|
||||
|
||||
// OPTIONS preflight
|
||||
if (request.method === 'OPTIONS') {
|
||||
emitRequest(204, 'preflight', null);
|
||||
return new Response(null, { status: 204, headers: corsHeaders });
|
||||
}
|
||||
|
||||
@@ -312,7 +407,10 @@ export function createDomainGateway(
|
||||
// Only runs for tier-gated endpoints to avoid JWKS lookup on every request.
|
||||
let sessionUserId: string | null = null;
|
||||
if (isTierGated) {
|
||||
sessionUserId = await resolveSessionUserId(request);
|
||||
const session = await resolveClerkSession(request);
|
||||
sessionUserId = session?.userId ?? null;
|
||||
usage.sessionUserId = sessionUserId;
|
||||
usage.clerkOrgId = session?.orgId ?? null;
|
||||
if (sessionUserId) {
|
||||
request = new Request(request.url, {
|
||||
method: request.method,
|
||||
@@ -344,10 +442,13 @@ export function createDomainGateway(
|
||||
const userKeyResult = await validateUserApiKey(wmKey);
|
||||
if (userKeyResult) {
|
||||
isUserApiKey = true;
|
||||
usage.isUserApiKey = true;
|
||||
usage.userApiKeyCustomerRef = userKeyResult.userId;
|
||||
keyCheck = { valid: true, required: true };
|
||||
// Inject x-user-id for downstream entitlement checks
|
||||
if (!sessionUserId) {
|
||||
sessionUserId = userKeyResult.userId;
|
||||
usage.sessionUserId = sessionUserId;
|
||||
request = new Request(request.url, {
|
||||
method: request.method,
|
||||
headers: (() => {
|
||||
@@ -361,11 +462,19 @@ export function createDomainGateway(
|
||||
}
|
||||
}
|
||||
|
||||
// Enterprise API key (WORLDMONITOR_VALID_KEYS): keyCheck.valid + wmKey present
|
||||
// and not a wm_-prefixed user key.
|
||||
if (keyCheck.valid && wmKey && !isUserApiKey && !wmKey.startsWith('wm_')) {
|
||||
usage.enterpriseApiKey = wmKey;
|
||||
}
|
||||
|
||||
// User API keys on PREMIUM_RPC_PATHS need verified pro-tier entitlement.
|
||||
// Admin keys (WORLDMONITOR_VALID_KEYS) bypass this since they are operator-issued.
|
||||
if (isUserApiKey && needsLegacyProBearerGate && sessionUserId) {
|
||||
const ent = await getEntitlements(sessionUserId);
|
||||
if (ent) usage.tier = typeof ent.features.tier === 'number' ? ent.features.tier : 0;
|
||||
if (!ent || !ent.features.apiAccess) {
|
||||
emitRequest(403, 'tier_403', null);
|
||||
return new Response(JSON.stringify({ error: 'API access subscription required' }), {
|
||||
status: 403,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders },
|
||||
@@ -380,11 +489,19 @@ export function createDomainGateway(
|
||||
const { validateBearerToken } = await import('./auth-session');
|
||||
const session = await validateBearerToken(authHeader.slice(7));
|
||||
if (!session.valid) {
|
||||
emitRequest(401, 'auth_401', null);
|
||||
return new Response(JSON.stringify({ error: 'Invalid or expired session' }), {
|
||||
status: 401,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders },
|
||||
});
|
||||
}
|
||||
// Capture identity for telemetry — legacy bearer auth bypasses the
|
||||
// earlier resolveClerkSession() block (only runs for tier-gated routes),
|
||||
// so without this premium bearer requests would emit as anonymous.
|
||||
if (session.userId) {
|
||||
sessionUserId = session.userId;
|
||||
usage.sessionUserId = session.userId;
|
||||
}
|
||||
// Accept EITHER a Clerk 'pro' role OR a Convex Dodo entitlement with
|
||||
// tier >= 1. The Dodo webhook pipeline writes Convex entitlements but
|
||||
// does NOT sync Clerk publicMetadata.role, so a paying subscriber's
|
||||
@@ -404,9 +521,11 @@ export function createDomainGateway(
|
||||
let allowed = session.role === 'pro';
|
||||
if (!allowed && session.userId) {
|
||||
const ent = await getEntitlements(session.userId);
|
||||
if (ent) usage.tier = typeof ent.features.tier === 'number' ? ent.features.tier : 0;
|
||||
allowed = !!ent && ent.features.tier >= 1 && ent.validUntil >= Date.now();
|
||||
}
|
||||
if (!allowed) {
|
||||
emitRequest(403, 'tier_403', null);
|
||||
return new Response(JSON.stringify({ error: 'Pro subscription required' }), {
|
||||
status: 403,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders },
|
||||
@@ -414,12 +533,14 @@ export function createDomainGateway(
|
||||
}
|
||||
// Valid pro session (Clerk role OR Dodo entitlement) — fall through to route handling.
|
||||
} else {
|
||||
emitRequest(401, 'auth_401', null);
|
||||
return new Response(JSON.stringify({ error: keyCheck.error, _debug: (keyCheck as any)._debug }), {
|
||||
status: 401,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders },
|
||||
});
|
||||
}
|
||||
} else {
|
||||
emitRequest(401, 'auth_401', null);
|
||||
return new Response(JSON.stringify({ error: keyCheck.error }), {
|
||||
status: 401,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders },
|
||||
@@ -432,16 +553,36 @@ export function createDomainGateway(
|
||||
// User API keys do NOT bypass — the key owner's tier is checked normally.
|
||||
if (!(keyCheck.valid && wmKey && !isUserApiKey)) {
|
||||
const entitlementResponse = await checkEntitlement(request, pathname, corsHeaders);
|
||||
if (entitlementResponse) return entitlementResponse;
|
||||
if (entitlementResponse) {
|
||||
const entReason: RequestReason =
|
||||
entitlementResponse.status === 401 ? 'auth_401'
|
||||
: entitlementResponse.status === 403 ? 'tier_403'
|
||||
: 'ok';
|
||||
emitRequest(entitlementResponse.status, entReason, null);
|
||||
return entitlementResponse;
|
||||
}
|
||||
// Allowed → record the resolved tier for telemetry. getEntitlements has
|
||||
// its own Redis cache + in-flight coalescing, so the second lookup here
|
||||
// does not double the cost when checkEntitlement already fetched.
|
||||
if (isTierGated && sessionUserId && usage.tier === null) {
|
||||
const ent = await getEntitlements(sessionUserId);
|
||||
if (ent) usage.tier = typeof ent.features.tier === 'number' ? ent.features.tier : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// IP-based rate limiting — two-phase: endpoint-specific first, then global fallback
|
||||
const endpointRlResponse = await checkEndpointRateLimit(request, pathname, corsHeaders);
|
||||
if (endpointRlResponse) return endpointRlResponse;
|
||||
if (endpointRlResponse) {
|
||||
emitRequest(endpointRlResponse.status, 'rate_limit_429', null);
|
||||
return endpointRlResponse;
|
||||
}
|
||||
|
||||
if (!hasEndpointRatePolicy(pathname)) {
|
||||
const rateLimitResponse = await checkRateLimit(request, corsHeaders);
|
||||
if (rateLimitResponse) return rateLimitResponse;
|
||||
if (rateLimitResponse) {
|
||||
emitRequest(rateLimitResponse.status, 'rate_limit_429', null);
|
||||
return rateLimitResponse;
|
||||
}
|
||||
}
|
||||
|
||||
// Route matching — if POST doesn't match, convert to GET for stale clients
|
||||
@@ -467,21 +608,38 @@ export function createDomainGateway(
|
||||
if (!matchedHandler) {
|
||||
const allowed = router.allowedMethods(new URL(request.url).pathname);
|
||||
if (allowed.length > 0) {
|
||||
emitRequest(405, 'ok', null);
|
||||
return new Response(JSON.stringify({ error: 'Method not allowed' }), {
|
||||
status: 405,
|
||||
headers: { 'Content-Type': 'application/json', Allow: allowed.join(', '), ...corsHeaders },
|
||||
});
|
||||
}
|
||||
emitRequest(404, 'ok', null);
|
||||
return new Response(JSON.stringify({ error: 'Not found' }), {
|
||||
status: 404,
|
||||
headers: { 'Content-Type': 'application/json', ...corsHeaders },
|
||||
});
|
||||
}
|
||||
|
||||
// Execute handler with top-level error boundary
|
||||
// Execute handler with top-level error boundary.
|
||||
// Wrap in runWithUsageScope so deep fetch helpers (fetchJson,
|
||||
// cachedFetchJsonWithMeta) can attribute upstream calls to this customer
|
||||
// without leaf handlers having to thread a usage hook through every call.
|
||||
let response: Response;
|
||||
const identityForScope = buildUsageIdentity(usage);
|
||||
const handlerCall = matchedHandler;
|
||||
const requestForHandler = request;
|
||||
try {
|
||||
response = await matchedHandler(request);
|
||||
response = await runWithUsageScope(
|
||||
{
|
||||
ctx: ctx ?? { waitUntil: () => {} },
|
||||
requestId: deriveRequestId(originalRequest),
|
||||
customerId: identityForScope.customer_id,
|
||||
route: pathname,
|
||||
tier: identityForScope.tier,
|
||||
},
|
||||
() => handlerCall(requestForHandler),
|
||||
);
|
||||
} catch (err) {
|
||||
console.error('[gateway] Unhandled handler error:', err);
|
||||
response = new Response(JSON.stringify({ message: 'Internal server error' }), {
|
||||
@@ -503,6 +661,7 @@ export function createDomainGateway(
|
||||
}
|
||||
|
||||
// For GET 200 responses: read body once for cache-header decisions + ETag
|
||||
let resolvedCacheTier: CacheTier | null = null;
|
||||
if (response.status === 200 && request.method === 'GET' && response.body) {
|
||||
const bodyBytes = await response.arrayBuffer();
|
||||
|
||||
@@ -514,12 +673,14 @@ export function createDomainGateway(
|
||||
if (mergedHeaders.get('X-No-Cache') || isUpstreamUnavailable) {
|
||||
mergedHeaders.set('Cache-Control', 'no-store');
|
||||
mergedHeaders.set('X-Cache-Tier', 'no-store');
|
||||
resolvedCacheTier = 'no-store';
|
||||
} else {
|
||||
const rpcName = pathname.split('/').pop() ?? '';
|
||||
const envOverride = process.env[`CACHE_TIER_OVERRIDE_${rpcName.replace(/-/g, '_').toUpperCase()}`] as CacheTier | undefined;
|
||||
const isPremium = PREMIUM_RPC_PATHS.has(pathname) || getRequiredTier(pathname) !== null;
|
||||
const tier = isPremium ? 'slow-browser' as CacheTier
|
||||
: (envOverride && envOverride in TIER_HEADERS ? envOverride : null) ?? RPC_CACHE_TIER[pathname] ?? 'medium';
|
||||
resolvedCacheTier = tier;
|
||||
mergedHeaders.set('Cache-Control', TIER_HEADERS[tier]);
|
||||
// Only allow Vercel CDN caching for trusted origins (worldmonitor.app, Vercel previews,
|
||||
// Tauri). No-origin server-side requests (external scrapers) must always reach the edge
|
||||
@@ -553,9 +714,13 @@ export function createDomainGateway(
|
||||
|
||||
const ifNoneMatch = request.headers.get('If-None-Match');
|
||||
if (ifNoneMatch === etag) {
|
||||
emitRequest(304, 'ok', resolvedCacheTier, 0);
|
||||
maybeAttachDevHealthHeader(mergedHeaders);
|
||||
return new Response(null, { status: 304, headers: mergedHeaders });
|
||||
}
|
||||
|
||||
emitRequest(response.status, 'ok', resolvedCacheTier, view.length);
|
||||
maybeAttachDevHealthHeader(mergedHeaders);
|
||||
return new Response(bodyBytes, {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
@@ -570,6 +735,12 @@ export function createDomainGateway(
|
||||
mergedHeaders.delete('X-No-Cache');
|
||||
}
|
||||
|
||||
// Streaming/non-GET-200 responses: res_bytes is best-effort 0 (Content-Length
|
||||
// is often absent on chunked responses; teeing the stream would add latency).
|
||||
const finalContentLen = response.headers.get('content-length');
|
||||
const finalResBytes = finalContentLen ? Number(finalContentLen) || 0 : 0;
|
||||
emitRequest(response.status, 'ok', resolvedCacheTier, finalResBytes);
|
||||
maybeAttachDevHealthHeader(mergedHeaders);
|
||||
return new Response(response.body, {
|
||||
status: response.status,
|
||||
statusText: response.statusText,
|
||||
|
||||
@@ -27,10 +27,27 @@ const SEVERITY_MAP: Record<string, AisDisruptionSeverity> = {
|
||||
high: 'AIS_DISRUPTION_SEVERITY_HIGH',
|
||||
};
|
||||
|
||||
// Cache the two variants separately — candidate reports materially change
|
||||
// payload size, and clients with no position callbacks should not have to
|
||||
// wait on or pay for the heavier payload.
|
||||
const SNAPSHOT_CACHE_TTL_MS = 300_000; // 5 min -- matches client poll interval
|
||||
// In-process cache TTLs.
|
||||
//
|
||||
// The base snapshot (no candidates, no tankers, no bbox) is the high-traffic
|
||||
// path consumed by the AIS-density layer + military-detection consumers. It
|
||||
// re-uses the existing 5-minute cache because density / disruptions only
|
||||
// change once per relay cycle.
|
||||
//
|
||||
// Tanker (live-tanker map layer) and bbox-filtered responses MUST refresh
|
||||
// every 60s to honor the live-tanker freshness contract — anything longer
|
||||
// shows stale vessel positions and collapses distinct bboxes onto one
|
||||
// payload, defeating the bbox parameter entirely.
|
||||
const SNAPSHOT_CACHE_TTL_BASE_MS = 300_000; // 5 min for non-bbox / non-tanker reads
|
||||
const SNAPSHOT_CACHE_TTL_LIVE_MS = 60_000; // 60 s for live tanker / bbox reads
|
||||
|
||||
// 1° bbox quantization for cache-key reuse: a user panning a few decimal
|
||||
// degrees should hit the same cache slot as another user nearby. Done
|
||||
// server-side so the gateway 'live' tier sees identical query strings and
|
||||
// the CDN absorbs the request before it reaches this handler.
|
||||
function quantize(v: number): number {
|
||||
return Math.floor(v);
|
||||
}
|
||||
|
||||
interface SnapshotCacheSlot {
|
||||
snapshot: VesselSnapshot | undefined;
|
||||
@@ -38,28 +55,91 @@ interface SnapshotCacheSlot {
|
||||
inFlight: Promise<VesselSnapshot | undefined> | null;
|
||||
}
|
||||
|
||||
const cache: Record<'with' | 'without', SnapshotCacheSlot> = {
|
||||
with: { snapshot: undefined, timestamp: 0, inFlight: null },
|
||||
without: { snapshot: undefined, timestamp: 0, inFlight: null },
|
||||
};
|
||||
// Cache keyed by request shape: candidates, tankers, and quantized bbox.
|
||||
// Replaces the prior `with|without` keying which would silently serve
|
||||
// stale tanker data and collapse distinct bboxes.
|
||||
//
|
||||
// LRU-bounded: each distinct (includeCandidates, includeTankers, quantizedBbox)
|
||||
// triple creates a slot. With 1° quantization and a misbehaving client, the
|
||||
// keyspace is ~64,000 (180×360); without a cap the Map would grow unbounded
|
||||
// across the lifetime of the serverless instance. Realistic load is ~12 slots
|
||||
// (6 chokepoints × 2 flag combos), so a 128-slot cap leaves >10x headroom for
|
||||
// edge panning while making OOM impossible.
|
||||
const SNAPSHOT_CACHE_MAX_SLOTS = 128;
|
||||
const cache = new Map<string, SnapshotCacheSlot>();
|
||||
|
||||
async function fetchVesselSnapshot(includeCandidates: boolean): Promise<VesselSnapshot | undefined> {
|
||||
const slot = cache[includeCandidates ? 'with' : 'without'];
|
||||
function touchSlot(key: string, slot: SnapshotCacheSlot): void {
|
||||
// Move to end of insertion order so it's most-recently-used. Map iteration
|
||||
// order = insertion order, so the first entry is the LRU candidate.
|
||||
cache.delete(key);
|
||||
cache.set(key, slot);
|
||||
}
|
||||
|
||||
function evictIfNeeded(): void {
|
||||
if (cache.size < SNAPSHOT_CACHE_MAX_SLOTS) return;
|
||||
// Walk insertion order; evict the first slot that has no in-flight fetch.
|
||||
// An in-flight slot is still in use by an awaiting caller — evicting it
|
||||
// would orphan the promise.
|
||||
for (const [k, s] of cache) {
|
||||
if (s.inFlight === null) {
|
||||
cache.delete(k);
|
||||
return;
|
||||
}
|
||||
}
|
||||
// All slots in flight — nothing to evict. Caller still inserts; we
|
||||
// accept temporary growth past the cap until in-flight settles.
|
||||
}
|
||||
|
||||
function cacheKeyFor(
|
||||
includeCandidates: boolean,
|
||||
includeTankers: boolean,
|
||||
bbox: { swLat: number; swLon: number; neLat: number; neLon: number } | null,
|
||||
): string {
|
||||
const c = includeCandidates ? '1' : '0';
|
||||
const t = includeTankers ? '1' : '0';
|
||||
if (!bbox) return `${c}${t}|null`;
|
||||
const sl = quantize(bbox.swLat);
|
||||
const so = quantize(bbox.swLon);
|
||||
const nl = quantize(bbox.neLat);
|
||||
const no = quantize(bbox.neLon);
|
||||
return `${c}${t}|${sl},${so},${nl},${no}`;
|
||||
}
|
||||
|
||||
function ttlFor(includeTankers: boolean, bbox: unknown): number {
|
||||
return includeTankers || bbox ? SNAPSHOT_CACHE_TTL_LIVE_MS : SNAPSHOT_CACHE_TTL_BASE_MS;
|
||||
}
|
||||
|
||||
async function fetchVesselSnapshot(
|
||||
includeCandidates: boolean,
|
||||
includeTankers: boolean,
|
||||
bbox: { swLat: number; swLon: number; neLat: number; neLon: number } | null,
|
||||
): Promise<VesselSnapshot | undefined> {
|
||||
const key = cacheKeyFor(includeCandidates, includeTankers, bbox);
|
||||
let slot = cache.get(key);
|
||||
if (!slot) {
|
||||
evictIfNeeded();
|
||||
slot = { snapshot: undefined, timestamp: 0, inFlight: null };
|
||||
cache.set(key, slot);
|
||||
}
|
||||
const now = Date.now();
|
||||
if (slot.snapshot && (now - slot.timestamp) < SNAPSHOT_CACHE_TTL_MS) {
|
||||
const ttl = ttlFor(includeTankers, bbox);
|
||||
if (slot.snapshot && (now - slot.timestamp) < ttl) {
|
||||
touchSlot(key, slot);
|
||||
return slot.snapshot;
|
||||
}
|
||||
|
||||
if (slot.inFlight) {
|
||||
touchSlot(key, slot);
|
||||
return slot.inFlight;
|
||||
}
|
||||
|
||||
slot.inFlight = fetchVesselSnapshotFromRelay(includeCandidates);
|
||||
slot.inFlight = fetchVesselSnapshotFromRelay(includeCandidates, includeTankers, bbox);
|
||||
try {
|
||||
const result = await slot.inFlight;
|
||||
if (result) {
|
||||
slot.snapshot = result;
|
||||
slot.timestamp = Date.now();
|
||||
touchSlot(key, slot);
|
||||
}
|
||||
return result ?? slot.snapshot; // serve stale on relay failure
|
||||
} finally {
|
||||
@@ -87,13 +167,31 @@ function toCandidateReport(raw: any): SnapshotCandidateReport | null {
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchVesselSnapshotFromRelay(includeCandidates: boolean): Promise<VesselSnapshot | undefined> {
|
||||
async function fetchVesselSnapshotFromRelay(
|
||||
includeCandidates: boolean,
|
||||
includeTankers: boolean,
|
||||
bbox: { swLat: number; swLon: number; neLat: number; neLon: number } | null,
|
||||
): Promise<VesselSnapshot | undefined> {
|
||||
try {
|
||||
const relayBaseUrl = getRelayBaseUrl();
|
||||
if (!relayBaseUrl) return undefined;
|
||||
|
||||
const params = new URLSearchParams();
|
||||
params.set('candidates', includeCandidates ? 'true' : 'false');
|
||||
if (includeTankers) params.set('tankers', 'true');
|
||||
if (bbox) {
|
||||
// Quantized bbox: prevents the relay from caching one URL per
|
||||
// floating-point pixel as users pan. Same quantization as the
|
||||
// handler-side cache key so they stay consistent.
|
||||
const sl = quantize(bbox.swLat);
|
||||
const so = quantize(bbox.swLon);
|
||||
const nl = quantize(bbox.neLat);
|
||||
const no = quantize(bbox.neLon);
|
||||
params.set('bbox', `${sl},${so},${nl},${no}`);
|
||||
}
|
||||
|
||||
const response = await fetch(
|
||||
`${relayBaseUrl}/ais/snapshot?candidates=${includeCandidates ? 'true' : 'false'}`,
|
||||
`${relayBaseUrl}/ais/snapshot?${params.toString()}`,
|
||||
{
|
||||
headers: getRelayHeaders(),
|
||||
signal: AbortSignal.timeout(10000),
|
||||
@@ -141,6 +239,9 @@ async function fetchVesselSnapshotFromRelay(includeCandidates: boolean): Promise
|
||||
const candidateReports = (includeCandidates && Array.isArray(data.candidateReports))
|
||||
? data.candidateReports.map(toCandidateReport).filter((r: SnapshotCandidateReport | null): r is SnapshotCandidateReport => r !== null)
|
||||
: [];
|
||||
const tankerReports = (includeTankers && Array.isArray(data.tankerReports))
|
||||
? data.tankerReports.map(toCandidateReport).filter((r: SnapshotCandidateReport | null): r is SnapshotCandidateReport => r !== null)
|
||||
: [];
|
||||
|
||||
return {
|
||||
snapshotAt: Date.now(),
|
||||
@@ -153,6 +254,7 @@ async function fetchVesselSnapshotFromRelay(includeCandidates: boolean): Promise
|
||||
messages: Number.isFinite(Number(rawStatus.messages)) ? Number(rawStatus.messages) : 0,
|
||||
},
|
||||
candidateReports,
|
||||
tankerReports,
|
||||
};
|
||||
} catch {
|
||||
return undefined;
|
||||
@@ -163,14 +265,79 @@ async function fetchVesselSnapshotFromRelay(includeCandidates: boolean): Promise
|
||||
// RPC handler
|
||||
// ========================================================================
|
||||
|
||||
// Bbox-size guard: reject requests where either dimension exceeds 10°. This
|
||||
// prevents a malicious or buggy client from requesting a global box and
|
||||
// pulling every tanker through one query.
|
||||
const MAX_BBOX_DEGREES = 10;
|
||||
|
||||
/**
|
||||
* 400-class bbox validation error. Carries `statusCode = 400` so
|
||||
* server/error-mapper.ts surfaces it as HTTP 400 (the mapper branches
|
||||
* on `'statusCode' in error`; a plain Error would fall through to
|
||||
* "unhandled error" → 500). Used for both the size guard and the
|
||||
* lat/lon range guard.
|
||||
*
|
||||
* Range checks matter because the relay silently DROPS a malformed
|
||||
* bbox param and serves a global capped tanker subset — making the
|
||||
* layer appear to "work" with stale data instead of failing loudly.
|
||||
*/
|
||||
export class BboxValidationError extends Error {
|
||||
readonly statusCode = 400;
|
||||
constructor(reason: string) {
|
||||
super(`bbox invalid: ${reason}`);
|
||||
this.name = 'BboxValidationError';
|
||||
}
|
||||
}
|
||||
|
||||
// Backwards-compatible alias for tests that imported BboxTooLargeError.
|
||||
// Prefer BboxValidationError for new code.
|
||||
export const BboxTooLargeError = BboxValidationError;
|
||||
|
||||
function isValidLatLon(lat: number, lon: number): boolean {
|
||||
return (
|
||||
Number.isFinite(lat) && Number.isFinite(lon) &&
|
||||
lat >= -90 && lat <= 90 && lon >= -180 && lon <= 180
|
||||
);
|
||||
}
|
||||
|
||||
function extractAndValidateBbox(req: GetVesselSnapshotRequest): { swLat: number; swLon: number; neLat: number; neLon: number } | null {
|
||||
const sw = { lat: Number(req.swLat), lon: Number(req.swLon) };
|
||||
const ne = { lat: Number(req.neLat), lon: Number(req.neLon) };
|
||||
// All zeroes (the default for unset proto doubles) → no bbox.
|
||||
if (sw.lat === 0 && sw.lon === 0 && ne.lat === 0 && ne.lon === 0) {
|
||||
return null;
|
||||
}
|
||||
if (!isValidLatLon(sw.lat, sw.lon)) {
|
||||
throw new BboxValidationError('sw corner outside lat/lon domain (-90..90 / -180..180)');
|
||||
}
|
||||
if (!isValidLatLon(ne.lat, ne.lon)) {
|
||||
throw new BboxValidationError('ne corner outside lat/lon domain (-90..90 / -180..180)');
|
||||
}
|
||||
if (sw.lat > ne.lat || sw.lon > ne.lon) {
|
||||
throw new BboxValidationError('sw corner must be south-west of ne corner');
|
||||
}
|
||||
if (ne.lat - sw.lat > MAX_BBOX_DEGREES || ne.lon - sw.lon > MAX_BBOX_DEGREES) {
|
||||
throw new BboxValidationError(`each dimension must be ≤ ${MAX_BBOX_DEGREES} degrees`);
|
||||
}
|
||||
return { swLat: sw.lat, swLon: sw.lon, neLat: ne.lat, neLon: ne.lon };
|
||||
}
|
||||
|
||||
export async function getVesselSnapshot(
|
||||
_ctx: ServerContext,
|
||||
req: GetVesselSnapshotRequest,
|
||||
): Promise<GetVesselSnapshotResponse> {
|
||||
try {
|
||||
const snapshot = await fetchVesselSnapshot(Boolean(req.includeCandidates));
|
||||
const bbox = extractAndValidateBbox(req);
|
||||
const snapshot = await fetchVesselSnapshot(
|
||||
Boolean(req.includeCandidates),
|
||||
Boolean(req.includeTankers),
|
||||
bbox,
|
||||
);
|
||||
return { snapshot };
|
||||
} catch {
|
||||
} catch (err) {
|
||||
// BboxValidationError carries statusCode=400; rethrowing lets the
|
||||
// gateway error-mapper surface it as HTTP 400 with the reason string.
|
||||
if (err instanceof BboxValidationError) throw err;
|
||||
return { snapshot: undefined };
|
||||
}
|
||||
}
|
||||
|
||||
@@ -257,7 +257,14 @@ async function fetchChokepointData(): Promise<ChokepointFetchResult> {
|
||||
|
||||
const [navResult, vesselResult, transitSummariesData, flowsData] = await Promise.all([
|
||||
listNavigationalWarnings(ctx, { area: '', pageSize: 0, cursor: '' }).catch((): ListNavigationalWarningsResponse => { navFailed = true; return { warnings: [], pagination: undefined }; }),
|
||||
getVesselSnapshot(ctx, { neLat: 90, neLon: 180, swLat: -90, swLon: -180, includeCandidates: false }).catch((): GetVesselSnapshotResponse => { vesselFailed = true; return { snapshot: undefined }; }),
|
||||
// All-zero bbox = "no filter, full snapshot" per the new bbox extractor
|
||||
// in get-vessel-snapshot.ts. Previously this passed (-90, -180, 90, 180)
|
||||
// because the handler ignored bbox entirely; the new 10° max-bbox guard
|
||||
// (added for the live-tanker contract) would reject that range. This
|
||||
// call doesn't need bbox filtering — it wants the global density +
|
||||
// disruption surface — so pass zeros and skip both candidate and tanker
|
||||
// payload tiers.
|
||||
getVesselSnapshot(ctx, { neLat: 0, neLon: 0, swLat: 0, swLon: 0, includeCandidates: false, includeTankers: false }).catch((): GetVesselSnapshotResponse => { vesselFailed = true; return { snapshot: undefined }; }),
|
||||
getCachedJson(TRANSIT_SUMMARIES_KEY, true).catch(() => null) as Promise<TransitSummariesPayload | null>,
|
||||
getCachedJson(FLOWS_KEY, true).catch(() => null) as Promise<Record<string, FlowEstimateEntry> | null>,
|
||||
]);
|
||||
|
||||
32
shared/brief-envelope.d.ts
vendored
@@ -19,7 +19,7 @@
|
||||
// stripped at compose time. See PR #3143 for the notify-endpoint fix
|
||||
// that established this rule.
|
||||
|
||||
export const BRIEF_ENVELOPE_VERSION: 2;
|
||||
export const BRIEF_ENVELOPE_VERSION: 3;
|
||||
|
||||
/**
|
||||
* Versions the renderer accepts from Redis on READ. Always contains
|
||||
@@ -69,6 +69,36 @@ export interface BriefDigest {
|
||||
threads: BriefThread[];
|
||||
/** Signals-to-watch. The "04 · Signals" page is omitted when empty. */
|
||||
signals: string[];
|
||||
/**
|
||||
* Non-personalised lead for the share-URL surface (v3+). Generated
|
||||
* by `generateDigestProsePublic` with profile/greeting stripped.
|
||||
* The renderer's public-mode lead block reads this when present
|
||||
* and OMITS the pull-quote when absent — never falls back to the
|
||||
* personalised `lead` (which would leak watched-asset/region
|
||||
* context). Optional for v2-envelope back-compat through the
|
||||
* 7-day TTL window.
|
||||
*/
|
||||
publicLead?: string;
|
||||
/**
|
||||
* Non-personalised "signals to watch" array for the share-URL
|
||||
* surface (v3+). The personalised `signals` array is generated
|
||||
* with `ctx.profile` set, so its phrasing can echo a user's
|
||||
* watched assets / regions ("Watch for OPEC headlines on your
|
||||
* Saudi exposure"). The public-share renderer MUST substitute
|
||||
* `publicSignals` (or omit the signals page entirely when absent)
|
||||
* — never serve the personalised `signals` to anonymous readers.
|
||||
*/
|
||||
publicSignals?: string[];
|
||||
/**
|
||||
* Non-personalised threads array for the share-URL surface (v3+).
|
||||
* Threads are mostly content-derived but the prompt instructs the
|
||||
* model to surface clusters that align with user interests; in
|
||||
* personalised mode that bias can leak. The public-share renderer
|
||||
* substitutes `publicThreads` when present, falls back to a
|
||||
* category-derived stub otherwise — never serves the personalised
|
||||
* `threads` to anonymous readers.
|
||||
*/
|
||||
publicThreads?: BriefThread[];
|
||||
}
|
||||
|
||||
export interface BriefStory {
|
||||
|
||||
@@ -29,9 +29,20 @@
|
||||
* the renderer deploys. Once that window passes,
|
||||
* SUPPORTED_ENVELOPE_VERSIONS can shrink to [2] in a follow-up.
|
||||
*
|
||||
* @type {2}
|
||||
* v3 (2026-04-25): BriefDigest.publicLead added — plus
|
||||
* publicSignals + publicThreads sibling fields. All three hold
|
||||
* non-personalised content generated by generateDigestProsePublic
|
||||
* (which also returns threads + signals stripped of profile context)
|
||||
* so api/brief/public/* NEVER serves any personalised digest field.
|
||||
* The renderer's redactForPublic substitutes lead/signals/threads
|
||||
* with their public siblings; absent siblings cause the affected
|
||||
* page to omit cleanly rather than fall back to personalised
|
||||
* content. v2 envelopes already in TTL stay readable through
|
||||
* SUPPORTED_ENVELOPE_VERSIONS.
|
||||
*
|
||||
* @type {3}
|
||||
*/
|
||||
export const BRIEF_ENVELOPE_VERSION = 2;
|
||||
export const BRIEF_ENVELOPE_VERSION = 3;
|
||||
|
||||
/**
|
||||
* Versions the renderer still accepts from Redis on READ. Must always
|
||||
@@ -42,4 +53,4 @@ export const BRIEF_ENVELOPE_VERSION = 2;
|
||||
*
|
||||
* @type {ReadonlySet<number>}
|
||||
*/
|
||||
export const SUPPORTED_ENVELOPE_VERSIONS = new Set([1, 2]);
|
||||
export const SUPPORTED_ENVELOPE_VERSIONS = new Set([1, 2, 3]);
|
||||
|
||||
16
shared/brief-filter.d.ts
vendored
@@ -48,12 +48,20 @@ export type DropMetricsFn = (event: {
|
||||
* callback runs before the `continue` that skips the story — callers
|
||||
* can use it to aggregate per-user drop counters without altering
|
||||
* filter behaviour.
|
||||
*
|
||||
* When `rankedStoryHashes` is provided, stories are re-ordered BEFORE
|
||||
* the cap is applied: stories whose `hash` matches a ranking entry
|
||||
* (by short-hash prefix, ≥4 chars) come first in ranking order;
|
||||
* stories not in the ranking come after in their original relative
|
||||
* order. Lets the canonical synthesis brain's editorial judgment of
|
||||
* importance survive the `maxStories` cut.
|
||||
*/
|
||||
export function filterTopStories(input: {
|
||||
stories: UpstreamTopStory[];
|
||||
sensitivity: AlertSensitivity;
|
||||
maxStories?: number;
|
||||
onDrop?: DropMetricsFn;
|
||||
rankedStoryHashes?: string[];
|
||||
}): BriefStory[];
|
||||
|
||||
/**
|
||||
@@ -108,4 +116,12 @@ export interface UpstreamTopStory {
|
||||
category?: unknown;
|
||||
countryCode?: unknown;
|
||||
importanceScore?: unknown;
|
||||
/**
|
||||
* Stable digest-story hash carried through from the cron's pool
|
||||
* (digestStoryToUpstreamTopStory at scripts/lib/brief-compose.mjs).
|
||||
* Used by `filterTopStories` when `rankedStoryHashes` is supplied
|
||||
* to re-order stories before the cap. Falls back to titleHash when
|
||||
* the upstream digest path didn't materialise a primary `hash`.
|
||||
*/
|
||||
hash?: unknown;
|
||||
}
|
||||
|
||||
@@ -98,10 +98,60 @@ function clip(v, cap) {
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {{ stories: UpstreamTopStory[]; sensitivity: AlertSensitivity; maxStories?: number; onDrop?: DropMetricsFn }} input
|
||||
* Re-order `stories` so entries whose `hash` matches an entry in
|
||||
* `rankedStoryHashes` come first, in ranking order. Entries not in
|
||||
* the ranking keep their original relative order and come after.
|
||||
* Match is by short-hash prefix: a ranking entry of "abc12345"
|
||||
* matches a story whose `hash` starts with "abc12345" (≥4 chars).
|
||||
* The canonical synthesis prompt emits 8-char prefixes; stories
|
||||
* carry the full hash. Defensive check: when ranking is missing /
|
||||
* empty / not an array, returns the original array unchanged.
|
||||
*
|
||||
* Pure helper — does not mutate the input. Stable for stories that
|
||||
* share rank slots (preserves original order within a slot).
|
||||
*
|
||||
* @param {Array<{ hash?: unknown }>} stories
|
||||
* @param {unknown} rankedStoryHashes
|
||||
* @returns {Array<{ hash?: unknown }>}
|
||||
*/
|
||||
function applyRankedOrder(stories, rankedStoryHashes) {
|
||||
if (!Array.isArray(rankedStoryHashes) || rankedStoryHashes.length === 0) {
|
||||
return stories;
|
||||
}
|
||||
const ranking = rankedStoryHashes
|
||||
.filter((x) => typeof x === 'string' && x.length >= 4)
|
||||
.map((x) => x);
|
||||
if (ranking.length === 0) return stories;
|
||||
|
||||
// For each story, compute its rank index — the smallest index of a
|
||||
// ranking entry that is a PREFIX of the story's hash. Stories with
|
||||
// no match get Infinity so they sort last while preserving their
|
||||
// original order via the secondary index.
|
||||
const annotated = stories.map((story, originalIndex) => {
|
||||
const storyHash = typeof story?.hash === 'string' ? story.hash : '';
|
||||
let rank = Infinity;
|
||||
if (storyHash.length > 0) {
|
||||
for (let i = 0; i < ranking.length; i++) {
|
||||
if (storyHash.startsWith(ranking[i])) {
|
||||
rank = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return { story, originalIndex, rank };
|
||||
});
|
||||
annotated.sort((a, b) => {
|
||||
if (a.rank !== b.rank) return a.rank - b.rank;
|
||||
return a.originalIndex - b.originalIndex;
|
||||
});
|
||||
return annotated.map((a) => a.story);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{ stories: UpstreamTopStory[]; sensitivity: AlertSensitivity; maxStories?: number; onDrop?: DropMetricsFn; rankedStoryHashes?: string[] }} input
|
||||
* @returns {BriefStory[]}
|
||||
*/
|
||||
export function filterTopStories({ stories, sensitivity, maxStories = 12, onDrop }) {
|
||||
export function filterTopStories({ stories, sensitivity, maxStories = 12, onDrop, rankedStoryHashes }) {
|
||||
if (!Array.isArray(stories)) return [];
|
||||
const allowed = ALLOWED_LEVELS_BY_SENSITIVITY[sensitivity];
|
||||
if (!allowed) return [];
|
||||
@@ -112,10 +162,20 @@ export function filterTopStories({ stories, sensitivity, maxStories = 12, onDrop
|
||||
// and synchronous — any throw is the caller's problem (tested above).
|
||||
const emit = typeof onDrop === 'function' ? onDrop : null;
|
||||
|
||||
// Optional editorial ranking — when supplied, stories are sorted by
|
||||
// the position of `story.hash` in `rankedStoryHashes` BEFORE the
|
||||
// cap is applied, so the canonical synthesis brain's judgment of
|
||||
// editorial importance survives the MAX_STORIES_PER_USER cut.
|
||||
// Stories not in the ranking go after, in their original order.
|
||||
// Match is by short-hash prefix (≥4 chars) to tolerate the
|
||||
// ranker's emit format (the prompt uses 8-char prefixes; the
|
||||
// story carries the full hash). Empty/missing array = no-op.
|
||||
const orderedStories = applyRankedOrder(stories, rankedStoryHashes);
|
||||
|
||||
/** @type {BriefStory[]} */
|
||||
const out = [];
|
||||
for (let i = 0; i < stories.length; i++) {
|
||||
const raw = stories[i];
|
||||
for (let i = 0; i < orderedStories.length; i++) {
|
||||
const raw = orderedStories[i];
|
||||
if (out.length >= maxStories) {
|
||||
// Cap-truncation: remaining stories are not evaluated. Emit one
|
||||
// event per skipped story so operators can reconcile in vs out
|
||||
@@ -125,7 +185,7 @@ export function filterTopStories({ stories, sensitivity, maxStories = 12, onDrop
|
||||
// undercounted by up to (DIGEST_MAX_ITEMS - MAX_STORIES_PER_USER)
|
||||
// per user per tick.
|
||||
if (emit) {
|
||||
for (let j = i; j < stories.length; j++) emit({ reason: 'cap' });
|
||||
for (let j = i; j < orderedStories.length; j++) emit({ reason: 'cap' });
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@
|
||||
}
|
||||
],
|
||||
"security": {
|
||||
"csp": "default-src 'self'; connect-src 'self' https: http://localhost:5173 http://127.0.0.1:* ws: wss: blob: data: https://abacus.worldmonitor.app; img-src 'self' data: blob: https:; style-src 'self' 'unsafe-inline'; script-src 'self' 'wasm-unsafe-eval' https://www.youtube.com https://abacus.worldmonitor.app https://*.clerk.accounts.dev; worker-src 'self' blob:; font-src 'self' data: https:; media-src 'self' data: blob: https: http://127.0.0.1:* http://localhost:*; frame-src 'self' http://127.0.0.1:* http://localhost:* https://worldmonitor.app https://tech.worldmonitor.app https://finance.worldmonitor.app https://commodity.worldmonitor.app https://happy.worldmonitor.app https://www.youtube.com https://www.youtube-nocookie.com https://webcams.windy.com https://*.clerk.accounts.dev;"
|
||||
"csp": "default-src 'self'; connect-src 'self' https: http://localhost:5173 http://127.0.0.1:* ws: wss: blob: data: https://abacus.worldmonitor.app; img-src 'self' data: blob: https:; style-src 'self' 'unsafe-inline'; script-src 'self' 'wasm-unsafe-eval' https://www.youtube.com https://abacus.worldmonitor.app https://*.clerk.accounts.dev; worker-src 'self' blob:; font-src 'self' data: https:; media-src 'self' data: blob: https: http://127.0.0.1:* http://localhost:*; frame-src 'self' http://127.0.0.1:* http://localhost:* https://worldmonitor.app https://tech.worldmonitor.app https://finance.worldmonitor.app https://commodity.worldmonitor.app https://happy.worldmonitor.app https://energy.worldmonitor.app https://www.youtube.com https://www.youtube-nocookie.com https://webcams.windy.com https://*.clerk.accounts.dev;"
|
||||
}
|
||||
},
|
||||
"bundle": {
|
||||
|
||||
@@ -44,6 +44,7 @@ import type { PipelineStatusPanel } from '@/components/PipelineStatusPanel';
|
||||
import type { StorageFacilityMapPanel } from '@/components/StorageFacilityMapPanel';
|
||||
import type { FuelShortagePanel } from '@/components/FuelShortagePanel';
|
||||
import type { EnergyDisruptionsPanel } from '@/components/EnergyDisruptionsPanel';
|
||||
import type { EnergyRiskOverviewPanel } from '@/components/EnergyRiskOverviewPanel';
|
||||
import type { ClimateNewsPanel } from '@/components/ClimateNewsPanel';
|
||||
import type { ConsumerPricesPanel } from '@/components/ConsumerPricesPanel';
|
||||
import type { DefensePatentsPanel } from '@/components/DefensePatentsPanel';
|
||||
@@ -351,6 +352,10 @@ export class App {
|
||||
const panel = this.state.panels['energy-disruptions'] as EnergyDisruptionsPanel | undefined;
|
||||
if (panel) primeTask('energy-disruptions', () => panel.fetchData());
|
||||
}
|
||||
if (shouldPrime('energy-risk-overview')) {
|
||||
const panel = this.state.panels['energy-risk-overview'] as EnergyRiskOverviewPanel | undefined;
|
||||
if (panel) primeTask('energy-risk-overview', () => panel.fetchData());
|
||||
}
|
||||
if (shouldPrime('climate-news')) {
|
||||
const panel = this.state.panels['climate-news'] as ClimateNewsPanel | undefined;
|
||||
if (panel) primeTask('climate-news', () => panel.fetchData());
|
||||
|
||||
@@ -70,6 +70,7 @@ import {
|
||||
StorageFacilityMapPanel,
|
||||
FuelShortagePanel,
|
||||
EnergyDisruptionsPanel,
|
||||
EnergyRiskOverviewPanel,
|
||||
MacroTilesPanel,
|
||||
FSIPanel,
|
||||
YieldCurvePanel,
|
||||
@@ -891,6 +892,7 @@ export class PanelLayoutManager implements AppModule {
|
||||
this.createPanel('storage-facility-map', () => new StorageFacilityMapPanel());
|
||||
this.createPanel('fuel-shortages', () => new FuelShortagePanel());
|
||||
this.createPanel('energy-disruptions', () => new EnergyDisruptionsPanel());
|
||||
this.createPanel('energy-risk-overview', () => new EnergyRiskOverviewPanel());
|
||||
this.createPanel('polymarket', () => new PredictionPanel());
|
||||
|
||||
this.createNewsPanel('gov', 'panels.gov');
|
||||
|
||||
@@ -433,6 +433,9 @@ export class DeckGLMap {
|
||||
private iranEvents: IranEvent[] = [];
|
||||
private aisDisruptions: AisDisruptionEvent[] = [];
|
||||
private aisDensity: AisDensityZone[] = [];
|
||||
private liveTankers: Array<{ mmsi: string; lat: number; lon: number; speed: number; shipType: number; name: string }> = [];
|
||||
private liveTankersAbort: AbortController | null = null;
|
||||
private liveTankersTimer: ReturnType<typeof setInterval> | null = null;
|
||||
private cableAdvisories: CableAdvisory[] = [];
|
||||
private repairShips: RepairShip[] = [];
|
||||
private healthByCableId: Record<string, CableHealthRecord> = {};
|
||||
@@ -1542,6 +1545,28 @@ export class DeckGLMap {
|
||||
this.layerCache.delete('fuel-shortages-layer');
|
||||
}
|
||||
|
||||
// Live tanker positions inside chokepoint bounding boxes. AIS ship type
|
||||
// 80-89 (tanker class). Refreshed every 60s; one Map<chokepointId, ...>
|
||||
// fetch per layer-tick. deckGLOnly per src/config/map-layer-definitions.ts.
|
||||
// Powered by the relay's tankerReports field (added in PR 3 U7 alongside
|
||||
// the existing military-only candidateReports). Energy Atlas parity-push.
|
||||
if (mapLayers.liveTankers) {
|
||||
// Start (or keep) the refresh loop while the layer is on. The
|
||||
// ensure helper handles the "first time on" kick + the 60s
|
||||
// setInterval; idempotent so calling it on every layers update is
|
||||
// safe. Render immediately if we already have data; the interval
|
||||
// re-renders when fresh data arrives.
|
||||
this.ensureLiveTankersLoop();
|
||||
if (this.liveTankers.length > 0) {
|
||||
layers.push(this.createLiveTankersLayer());
|
||||
}
|
||||
} else {
|
||||
// Layer toggled off → tear down the timer so we stop hitting the
|
||||
// relay even when the map is still on screen.
|
||||
this.stopLiveTankersLoop();
|
||||
this.layerCache.delete('live-tankers-layer');
|
||||
}
|
||||
|
||||
// Conflict zones layer
|
||||
if (mapLayers.conflicts) {
|
||||
layers.push(this.createConflictZonesLayer());
|
||||
@@ -2954,6 +2979,105 @@ export class DeckGLMap {
|
||||
});
|
||||
}
|
||||
|
||||
private createLiveTankersLayer(): ScatterplotLayer {
|
||||
return new ScatterplotLayer({
|
||||
id: 'live-tankers-layer',
|
||||
data: this.liveTankers,
|
||||
getPosition: (d) => [d.lon, d.lat],
|
||||
// Radius scales loosely with deadweight class: VLCC > Aframax > Handysize.
|
||||
// AIS ship type 80-89 covers all tanker subtypes; we have no DWT field
|
||||
// in the AIS message itself, so this is a constant fallback. Future
|
||||
// enhancement: enrich via a vessel-registry lookup.
|
||||
getRadius: 2500,
|
||||
getFillColor: (d) => {
|
||||
// Anchored (speed < 0.5 kn) — orange, signals waiting / loading /
|
||||
// potential congestion. Underway (speed >= 0.5 kn) — cyan, normal
|
||||
// transit. Unknown / missing speed — gray.
|
||||
if (!Number.isFinite(d.speed)) return [127, 140, 141, 200] as [number, number, number, number];
|
||||
if (d.speed < 0.5) return [255, 183, 3, 220] as [number, number, number, number]; // amber
|
||||
return [0, 209, 255, 220] as [number, number, number, number]; // cyan
|
||||
},
|
||||
radiusMinPixels: 3,
|
||||
radiusMaxPixels: 8,
|
||||
pickable: true,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Idempotent: ensures the 60s tanker-refresh loop is running. Called
|
||||
* each time the layer is observed enabled in the layers update. First
|
||||
* call kicks an immediate load; subsequent calls no-op. Pairs with
|
||||
* stopLiveTankersLoop() in destroy() and on layer-disable.
|
||||
*/
|
||||
private ensureLiveTankersLoop(): void {
|
||||
if (this.liveTankersTimer !== null) return; // already running
|
||||
void this.loadLiveTankers();
|
||||
this.liveTankersTimer = setInterval(() => {
|
||||
void this.loadLiveTankers();
|
||||
}, 60_000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Stop the refresh loop and abort any in-flight fetch. Called when the
|
||||
* layer is toggled off (and from destroy()) to keep the relay traffic
|
||||
* scoped to active viewers.
|
||||
*/
|
||||
private stopLiveTankersLoop(): void {
|
||||
if (this.liveTankersTimer !== null) {
|
||||
clearInterval(this.liveTankersTimer);
|
||||
this.liveTankersTimer = null;
|
||||
}
|
||||
if (this.liveTankersAbort) {
|
||||
this.liveTankersAbort.abort();
|
||||
this.liveTankersAbort = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tanker loader — called externally (or on a 60s tick) to refresh
|
||||
* `this.liveTankers`. Imports lazily so the service module isn't pulled
|
||||
* into the bundle for variants where the layer is disabled.
|
||||
*/
|
||||
public async loadLiveTankers(): Promise<void> {
|
||||
// Cancel any in-flight tick before starting another. Per skill
|
||||
// closure-scoped-state-teardown-order: don't null out the abort
|
||||
// controller before calling abort.
|
||||
if (this.liveTankersAbort) {
|
||||
this.liveTankersAbort.abort();
|
||||
}
|
||||
const controller = new AbortController();
|
||||
this.liveTankersAbort = controller;
|
||||
try {
|
||||
const { fetchLiveTankers } = await import('@/services/live-tankers');
|
||||
// Thread the signal so the in-flight RPC actually cancels when a
|
||||
// newer tick starts (or the layer toggles off). Without this, a
|
||||
// slow older refresh can race-write stale data after a newer one
|
||||
// already populated this.liveTankers.
|
||||
const zones = await fetchLiveTankers(undefined, { signal: controller.signal });
|
||||
// Drop the result if this controller was aborted mid-flight or if
|
||||
// a newer load has already replaced us. Without this guard, an
|
||||
// older fetch that completed despite signal.aborted (e.g. the
|
||||
// service returned cached data without checking the signal) would
|
||||
// overwrite the newer one's data.
|
||||
if (controller.signal.aborted || this.liveTankersAbort !== controller) {
|
||||
return;
|
||||
}
|
||||
const flat = zones.flatMap((z) => z.tankers).map((t) => ({
|
||||
mmsi: t.mmsi,
|
||||
lat: t.lat,
|
||||
lon: t.lon,
|
||||
speed: t.speed,
|
||||
shipType: t.shipType,
|
||||
name: t.name,
|
||||
}));
|
||||
this.liveTankers = flat;
|
||||
this.updateLayers();
|
||||
} catch {
|
||||
// Graceful: leave existing tankers in place; layer will continue
|
||||
// rendering last-known data until the next successful tick.
|
||||
}
|
||||
}
|
||||
|
||||
private createGpsJammingLayer(): H3HexagonLayer {
|
||||
return new H3HexagonLayer({
|
||||
id: 'gps-jamming-layer',
|
||||
@@ -7003,6 +7127,7 @@ export class DeckGLMap {
|
||||
clearInterval(this.aircraftFetchTimer);
|
||||
this.aircraftFetchTimer = null;
|
||||
}
|
||||
this.stopLiveTankersLoop();
|
||||
|
||||
|
||||
this.layerCache.clear();
|
||||
|
||||
284
src/components/EnergyRiskOverviewPanel.ts
Normal file
@@ -0,0 +1,284 @@
|
||||
// Energy Risk Overview Panel
|
||||
//
|
||||
// One consolidated executive surface composing five existing data sources:
|
||||
// 1. Hormuz status (vessels/day + status from src/services/hormuz-tracker.ts)
|
||||
// 2. EU Gas storage fill % (bootstrap-cached `euGasStorage` + RPC fallback)
|
||||
// 3. Brent crude price + 1-day delta (BZ=F via fetchCommodityQuotes)
|
||||
// 4. Active disruptions count (listEnergyDisruptions filtered to endAt === null)
|
||||
// 5. Data freshness (now - youngest fetchedAt across the four upstream signals)
|
||||
//
|
||||
// Plus a "Day N of crisis" counter computed at render time from a configurable
|
||||
// pinned start date. NOT an editorial issue counter — we don't ship weekly
|
||||
// briefings yet — but the same surface area at the top of the energy variant
|
||||
// grid that peer reference sites use as their first-fold consolidator.
|
||||
//
|
||||
// Degraded-mode contract: every tile renders independently. If one of the five
|
||||
// fetches rejects, that tile shows "—" and a `data-degraded="true"` attribute
|
||||
// for QA inspection; the others render normally. Promise.allSettled — never
|
||||
// Promise.all. This is the single most important behavior of the panel: a
|
||||
// stuck Hormuz tracker must not freeze the whole executive overview.
|
||||
|
||||
import { Panel } from './Panel';
|
||||
import { escapeHtml } from '@/utils/sanitize';
|
||||
import { getRpcBaseUrl } from '@/services/rpc-client';
|
||||
import { fetchHormuzTracker, type HormuzTrackerData } from '@/services/hormuz-tracker';
|
||||
import { getEuGasStorageData } from '@/services/economic';
|
||||
import { fetchCommodityQuotes } from '@/services/market';
|
||||
import { SupplyChainServiceClient } from '@/generated/client/worldmonitor/supply_chain/v1/service_client';
|
||||
import { buildOverviewState, type OverviewState } from './_energy-risk-overview-state';
|
||||
|
||||
const supplyChain = new SupplyChainServiceClient(getRpcBaseUrl(), {
|
||||
fetch: (...args: Parameters<typeof fetch>) => globalThis.fetch(...args),
|
||||
});
|
||||
|
||||
const BRENT_SYMBOL = 'BZ=F';
|
||||
const BRENT_META = [{ symbol: BRENT_SYMBOL, name: 'Brent Crude', display: 'BRENT' }];
|
||||
|
||||
// Default pinned crisis-start date for the running Hormuz situation. Overridable
|
||||
// via VITE_HORMUZ_CRISIS_START_DATE so the date can be re-pinned without a
|
||||
// redeploy when the editorial framing shifts.
|
||||
const DEFAULT_CRISIS_START_DATE = '2026-02-23';
|
||||
const CRISIS_START_DATE: string =
|
||||
(import.meta.env?.VITE_HORMUZ_CRISIS_START_DATE as string | undefined) ||
|
||||
DEFAULT_CRISIS_START_DATE;
|
||||
const CRISIS_START_MS = Date.parse(`${CRISIS_START_DATE}T00:00:00Z`);
|
||||
|
||||
// Map Hormuz status enum → severity color. Values come from
|
||||
// src/services/hormuz-tracker.ts:20: 'closed' | 'disrupted' | 'restricted' | 'open'.
|
||||
// NOT 'normal'/'reduced'/'critical' — that triplet was a misread in earlier
|
||||
// drafts and would silently render as undefined.
|
||||
const HORMUZ_STATUS_COLOR: Record<HormuzTrackerData['status'], string> = {
|
||||
closed: '#e74c3c', // red — passage closed
|
||||
disrupted: '#e74c3c', // red — significant disruption
|
||||
restricted: '#f39c12', // amber — partial constraints
|
||||
open: '#27ae60', // green — flowing normally
|
||||
};
|
||||
const HORMUZ_STATUS_LABEL: Record<HormuzTrackerData['status'], string> = {
|
||||
closed: 'Closed',
|
||||
disrupted: 'Disrupted',
|
||||
restricted: 'Restricted',
|
||||
open: 'Open',
|
||||
};
|
||||
|
||||
// State shape lives in _energy-risk-overview-state.ts so it can be tested
|
||||
// under node:test without pulling in Vite-only modules. The panel's
|
||||
// `state` field is typed loosely (just OverviewState) — the per-tile
|
||||
// renderers cast `value` based on the tile they're rendering. The only
|
||||
// downside is the Hormuz tile loses its enum literal type from
|
||||
// HormuzTrackerData['status']; renderers narrow it again at use site.
|
||||
|
||||
const EMPTY_STATE: OverviewState = {
|
||||
hormuz: { status: 'pending' },
|
||||
euGas: { status: 'pending' },
|
||||
brent: { status: 'pending' },
|
||||
activeDisruptions: { status: 'pending' },
|
||||
};
|
||||
|
||||
export class EnergyRiskOverviewPanel extends Panel {
|
||||
private state: OverviewState = EMPTY_STATE;
|
||||
private freshnessTickHandle: ReturnType<typeof setInterval> | null = null;
|
||||
|
||||
constructor() {
|
||||
super({
|
||||
id: 'energy-risk-overview',
|
||||
title: 'Global Energy Risk Overview',
|
||||
defaultRowSpan: 1,
|
||||
infoTooltip:
|
||||
'Consolidated executive view: Strait of Hormuz vessel status, EU gas ' +
|
||||
'storage fill, Brent crude price + 1-day change, active disruption ' +
|
||||
'count, data freshness, and a configurable crisis-day counter. Each ' +
|
||||
'tile renders independently; one source failing does not block the ' +
|
||||
'others.',
|
||||
});
|
||||
}
|
||||
|
||||
public destroy(): void {
|
||||
if (this.freshnessTickHandle !== null) {
|
||||
clearInterval(this.freshnessTickHandle);
|
||||
this.freshnessTickHandle = null;
|
||||
}
|
||||
super.destroy?.();
|
||||
}
|
||||
|
||||
public async fetchData(): Promise<void> {
|
||||
const [hormuz, euGas, brent, disruptions] = await Promise.allSettled([
|
||||
fetchHormuzTracker(),
|
||||
getEuGasStorageData(),
|
||||
fetchCommodityQuotes(BRENT_META),
|
||||
// ongoingOnly=true: the panel only ever shows the count of active
|
||||
// disruptions, so let the server filter rather than ship the full
|
||||
// historical 52-event payload to be filtered client-side. This was
|
||||
// a Greptile P2 finding (over-fetch); buildOverviewState's count
|
||||
// calculation handles either response (the redundant client-side
|
||||
// filter remains as defense-in-depth in the state builder).
|
||||
supplyChain.listEnergyDisruptions({ assetId: '', assetType: '', ongoingOnly: true }),
|
||||
]);
|
||||
this.state = buildOverviewState(hormuz, euGas, brent, disruptions, Date.now());
|
||||
|
||||
if (!this.element?.isConnected) return;
|
||||
this.render();
|
||||
|
||||
// Once we have data, kick a 60s freshness re-render so the "X minutes ago"
|
||||
// string ticks live. No new RPCs — this only updates the freshness label.
|
||||
if (this.freshnessTickHandle === null) {
|
||||
this.freshnessTickHandle = setInterval(() => {
|
||||
if (this.element?.isConnected) this.render();
|
||||
}, 60_000);
|
||||
}
|
||||
}
|
||||
|
||||
private render(): void {
|
||||
injectRiskOverviewStylesOnce();
|
||||
const html = `
|
||||
<div class="ero-grid">
|
||||
${this.renderHormuzTile()}
|
||||
${this.renderEuGasTile()}
|
||||
${this.renderBrentTile()}
|
||||
${this.renderActiveDisruptionsTile()}
|
||||
${this.renderFreshnessTile()}
|
||||
${this.renderCrisisDayTile()}
|
||||
</div>
|
||||
`;
|
||||
this.setContent(html);
|
||||
}
|
||||
|
||||
private renderHormuzTile(): string {
|
||||
const t = this.state.hormuz;
|
||||
if (t.status !== 'fulfilled' || !t.value) {
|
||||
return tileHtml('Hormuz', '—', '#7f8c8d', 'data-degraded="true"');
|
||||
}
|
||||
// After extracting state-builder into a Vite-free module, the Hormuz
|
||||
// tile's value.status is typed as plain string (not the enum literal
|
||||
// union). Cast at use site so the lookup tables index correctly.
|
||||
const status = t.value.status as HormuzTrackerData['status'];
|
||||
const color = HORMUZ_STATUS_COLOR[status] ?? '#7f8c8d';
|
||||
const label = HORMUZ_STATUS_LABEL[status] ?? t.value.status;
|
||||
return tileHtml('Hormuz', label, color);
|
||||
}
|
||||
|
||||
private renderEuGasTile(): string {
|
||||
const t = this.state.euGas;
|
||||
if (t.status !== 'fulfilled' || !t.value) {
|
||||
return tileHtml('EU Gas', '—', '#7f8c8d', 'data-degraded="true"');
|
||||
}
|
||||
const fill = t.value.fillPct.toFixed(0);
|
||||
// Below 30% during refill season is critical; below 50% is amber.
|
||||
const color = t.value.fillPct < 30 ? '#e74c3c' : t.value.fillPct < 50 ? '#f39c12' : '#27ae60';
|
||||
return tileHtml('EU Gas', `${fill}%`, color);
|
||||
}
|
||||
|
||||
private renderBrentTile(): string {
|
||||
const t = this.state.brent;
|
||||
if (t.status !== 'fulfilled' || !t.value) {
|
||||
return tileHtml('Brent', '—', '#7f8c8d', 'data-degraded="true"');
|
||||
}
|
||||
const price = `$${t.value.price.toFixed(2)}`;
|
||||
const change = t.value.change;
|
||||
const sign = change >= 0 ? '+' : '';
|
||||
const deltaText = `${sign}${change.toFixed(2)}%`;
|
||||
// Oil price up = bad for energy importers (the dominant Atlas reader).
|
||||
// Up = red. Down = green. Inverted from a usual market panel.
|
||||
const color = change >= 0 ? '#e74c3c' : '#27ae60';
|
||||
return tileHtml('Brent', price, color, '', deltaText);
|
||||
}
|
||||
|
||||
private renderActiveDisruptionsTile(): string {
|
||||
const t = this.state.activeDisruptions;
|
||||
if (t.status !== 'fulfilled' || !t.value) {
|
||||
return tileHtml('Active disruptions', '—', '#7f8c8d', 'data-degraded="true"');
|
||||
}
|
||||
const n = t.value.count;
|
||||
const color = n === 0 ? '#27ae60' : n < 5 ? '#f39c12' : '#e74c3c';
|
||||
return tileHtml('Active disruptions', String(n), color);
|
||||
}
|
||||
|
||||
private renderFreshnessTile(): string {
|
||||
// Youngest fetchedAt across all 4 upstream signals.
|
||||
const tiles = [this.state.hormuz, this.state.euGas, this.state.brent, this.state.activeDisruptions];
|
||||
const fetchedAts = tiles
|
||||
.map(t => t.fetchedAt)
|
||||
.filter((v): v is number => typeof v === 'number');
|
||||
if (fetchedAts.length === 0) {
|
||||
return tileHtml('Updated', '—', '#7f8c8d', 'data-degraded="true"');
|
||||
}
|
||||
const youngest = Math.max(...fetchedAts);
|
||||
const ageMin = Math.floor((Date.now() - youngest) / 60_000);
|
||||
const label = ageMin <= 0 ? 'just now' : ageMin === 1 ? '1 min ago' : `${ageMin} min ago`;
|
||||
return tileHtml('Updated', label, '#7f8c8d');
|
||||
}
|
||||
|
||||
private renderCrisisDayTile(): string {
|
||||
if (!Number.isFinite(CRISIS_START_MS)) {
|
||||
// Mis-configured env (Date.parse returned NaN). Fail loudly via "—"
|
||||
// rather than rendering "Day NaN" or "Day -50".
|
||||
return tileHtml('Hormuz crisis', '—', '#7f8c8d', 'data-degraded="true"');
|
||||
}
|
||||
const days = Math.floor((Date.now() - CRISIS_START_MS) / 86_400_000);
|
||||
if (days < 0) {
|
||||
// Future-dated start: still render but with a sentinel value.
|
||||
return tileHtml('Hormuz crisis', 'pending', '#7f8c8d');
|
||||
}
|
||||
return tileHtml('Hormuz crisis', `Day ${days}`, '#7f8c8d');
|
||||
}
|
||||
}
|
||||
|
||||
function tileHtml(label: string, value: string, color: string, attrs = '', sub = ''): string {
|
||||
const subHtml = sub ? `<div class="ero-tile__sub" style="color:${color}">${escapeHtml(sub)}</div>` : '';
|
||||
return `
|
||||
<div class="ero-tile" ${attrs}>
|
||||
<div class="ero-tile__label">${escapeHtml(label)}</div>
|
||||
<div class="ero-tile__value" style="color:${color}">${escapeHtml(value)}</div>
|
||||
${subHtml}
|
||||
</div>
|
||||
`;
|
||||
}
|
||||
|
||||
// CSS is injected once into <head> rather than emitted into the panel body.
|
||||
// Pre-fix, the freshness setInterval re-rendered every 60s and called
|
||||
// setContent(html + <style>...) — the style tag was torn out and re-inserted
|
||||
// on every tick. Now the panel HTML is style-free; the rules live in head.
|
||||
let _riskOverviewStylesInjected = false;
|
||||
function injectRiskOverviewStylesOnce(): void {
|
||||
if (_riskOverviewStylesInjected) return;
|
||||
if (typeof document === 'undefined') return;
|
||||
const style = document.createElement('style');
|
||||
style.setAttribute('data-ero-styles', '');
|
||||
style.textContent = RISK_OVERVIEW_CSS;
|
||||
document.head.appendChild(style);
|
||||
_riskOverviewStylesInjected = true;
|
||||
}
|
||||
|
||||
const RISK_OVERVIEW_CSS = `
|
||||
.ero-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(110px, 1fr));
|
||||
gap: 8px;
|
||||
padding: 8px;
|
||||
}
|
||||
.ero-tile {
|
||||
background: rgba(255, 255, 255, 0.04);
|
||||
border: 1px solid rgba(255, 255, 255, 0.08);
|
||||
border-radius: 6px;
|
||||
padding: 10px 12px;
|
||||
min-height: 64px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
}
|
||||
.ero-tile__label {
|
||||
font-size: 10px;
|
||||
text-transform: uppercase;
|
||||
letter-spacing: 0.04em;
|
||||
color: rgba(255, 255, 255, 0.55);
|
||||
margin-bottom: 4px;
|
||||
}
|
||||
.ero-tile__value {
|
||||
font-size: 18px;
|
||||
font-weight: 600;
|
||||
line-height: 1.1;
|
||||
}
|
||||
.ero-tile__sub {
|
||||
font-size: 12px;
|
||||
margin-top: 2px;
|
||||
}
|
||||
`;
|
||||
84
src/components/_energy-risk-overview-state.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
// Pure state-building logic for EnergyRiskOverviewPanel. Extracted from the
|
||||
// panel class so it can be imported under node:test without pulling in the
|
||||
// Vite-only modules the panel transitively depends on (i18n's import.meta.glob,
|
||||
// etc). Keep this file dep-free apart from generated types.
|
||||
|
||||
export interface TileState<T> {
|
||||
status: 'fulfilled' | 'rejected' | 'pending';
|
||||
value?: T;
|
||||
fetchedAt?: number;
|
||||
}
|
||||
|
||||
export interface OverviewState {
|
||||
hormuz: TileState<{ status: string }>;
|
||||
euGas: TileState<{ fillPct: number; fillPctChange1d: number }>;
|
||||
brent: TileState<{ price: number; change: number }>;
|
||||
activeDisruptions: TileState<{ count: number }>;
|
||||
}
|
||||
|
||||
// Minimal shapes — only the fields the state builder reads. Loose enough that
|
||||
// tests can pass plain objects without importing the full generated types.
|
||||
interface HormuzMin { status?: string }
|
||||
interface EuGasMin { unavailable?: boolean; fillPct?: number; fillPctChange1d?: number }
|
||||
interface BrentResultMin { data?: Array<{ price: number | null; change?: number | null }> }
|
||||
interface DisruptionsMin {
|
||||
upstreamUnavailable?: boolean;
|
||||
events?: Array<{ endAt?: string | null }>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an OverviewState from the four allSettled results. Pure: no I/O,
|
||||
* no Date.now() unless the caller passes a clock. Each tile resolves to
|
||||
* 'fulfilled' or 'rejected' independently — one source failing CANNOT
|
||||
* cascade into the others. This is the core degraded-mode contract the
|
||||
* panel guarantees.
|
||||
*/
|
||||
export function buildOverviewState(
|
||||
hormuz: PromiseSettledResult<HormuzMin | null | undefined>,
|
||||
euGas: PromiseSettledResult<EuGasMin | null | undefined>,
|
||||
brent: PromiseSettledResult<BrentResultMin | null | undefined>,
|
||||
disruptions: PromiseSettledResult<DisruptionsMin | null | undefined>,
|
||||
now: number,
|
||||
): OverviewState {
|
||||
return {
|
||||
hormuz: hormuz.status === 'fulfilled' && hormuz.value && hormuz.value.status
|
||||
? { status: 'fulfilled', value: { status: hormuz.value.status }, fetchedAt: now }
|
||||
: { status: 'rejected' },
|
||||
euGas: euGas.status === 'fulfilled' && euGas.value && !euGas.value.unavailable && (euGas.value.fillPct ?? 0) > 0
|
||||
? {
|
||||
status: 'fulfilled',
|
||||
value: {
|
||||
fillPct: euGas.value.fillPct as number,
|
||||
fillPctChange1d: euGas.value.fillPctChange1d ?? 0,
|
||||
},
|
||||
fetchedAt: now,
|
||||
}
|
||||
: { status: 'rejected' },
|
||||
brent: (() => {
|
||||
if (brent.status !== 'fulfilled' || !brent.value || !brent.value.data || brent.value.data.length === 0) {
|
||||
return { status: 'rejected' as const };
|
||||
}
|
||||
const q = brent.value.data[0];
|
||||
if (!q || q.price === null) return { status: 'rejected' as const };
|
||||
return {
|
||||
status: 'fulfilled' as const,
|
||||
value: { price: q.price, change: q.change ?? 0 },
|
||||
fetchedAt: now,
|
||||
};
|
||||
})(),
|
||||
activeDisruptions: disruptions.status === 'fulfilled' && disruptions.value && !disruptions.value.upstreamUnavailable
|
||||
? {
|
||||
status: 'fulfilled',
|
||||
value: { count: (disruptions.value.events ?? []).filter((e) => !e.endAt).length },
|
||||
fetchedAt: now,
|
||||
}
|
||||
: { status: 'rejected' },
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience for tests: count tiles that are in degraded ('rejected') state.
|
||||
*/
|
||||
export function countDegradedTiles(state: OverviewState): number {
|
||||
return Object.values(state).filter((t) => t.status === 'rejected').length;
|
||||
}
|
||||
@@ -92,6 +92,7 @@ export { PipelineStatusPanel } from './PipelineStatusPanel';
|
||||
export { StorageFacilityMapPanel } from './StorageFacilityMapPanel';
|
||||
export { FuelShortagePanel } from './FuelShortagePanel';
|
||||
export { EnergyDisruptionsPanel } from './EnergyDisruptionsPanel';
|
||||
export { EnergyRiskOverviewPanel } from './EnergyRiskOverviewPanel';
|
||||
export * from './ClimateNewsPanel';
|
||||
export * from './DiseaseOutbreaksPanel';
|
||||
export * from './SocialVelocityPanel';
|
||||
|
||||
@@ -110,6 +110,7 @@ export const COMMANDS: Command[] = [
|
||||
{ id: 'panel:storage-facility-map', keywords: ['storage', 'storage facilities', 'strategic storage', 'spr', 'lng', 'lng terminals', 'ugs', 'tank farms', 'storage atlas'], label: 'Panel: Strategic Storage Atlas', icon: '\u{1F6E2}\uFE0F', category: 'panels' },
|
||||
{ id: 'panel:fuel-shortages', keywords: ['fuel shortages', 'shortage', 'petrol shortage', 'diesel shortage', 'jet fuel shortage', 'rationing', 'stations closed'], label: 'Panel: Global Fuel Shortage Registry', icon: '⛽', category: 'panels' },
|
||||
{ id: 'panel:energy-disruptions', keywords: ['energy disruptions', 'disruption log', 'disruption timeline', 'energy events', 'sanctions events', 'pipeline sabotage', 'nord stream sabotage', 'drone strike', 'force majeure', 'mechanical failure'], label: 'Panel: Energy Disruptions Log', icon: '\u{1F4A5}', category: 'panels' },
|
||||
{ id: 'panel:energy-risk-overview', keywords: ['risk overview', 'energy risk', 'executive overview', 'energy dashboard', 'hormuz status', 'eu gas fill', 'brent price', 'crisis day', 'energy executive'], label: 'Panel: Global Energy Risk Overview', icon: '\u{1F4CA}', category: 'panels' },
|
||||
{ id: 'panel:gov', keywords: ['government', 'gov'], label: 'Panel: Government', icon: '\u{1F3DB}\uFE0F', category: 'panels' },
|
||||
{ id: 'panel:policy', keywords: ['policy', 'ai policy', 'regulation', 'tech policy'], label: 'Panel: AI Policy & Regulation', icon: '\u{1F4DC}', category: 'panels' },
|
||||
{ id: 'panel:thinktanks', keywords: ['think tanks', 'thinktanks', 'analysis'], label: 'Panel: Think Tanks', icon: '\u{1F9E0}', category: 'panels' },
|
||||
|
||||
@@ -103,6 +103,7 @@ export const LAYER_REGISTRY: Record<keyof MapLayers, LayerDefinition> = {
|
||||
// without `deckGLOnly` once both renderers gain real support.
|
||||
storageFacilities: def('storageFacilities', '🏗', 'storageFacilities', 'Storage Facilities', ['flat'], undefined, true),
|
||||
fuelShortages: def('fuelShortages', '⚙', 'fuelShortages', 'Fuel Shortages', ['flat'], undefined, true),
|
||||
liveTankers: def('liveTankers', '🚢', 'liveTankers', 'Live Tanker Positions', ['flat'], undefined, true),
|
||||
};
|
||||
|
||||
const VARIANT_LAYER_ORDER: Record<MapVariant, Array<keyof MapLayers>> = {
|
||||
@@ -141,7 +142,7 @@ const VARIANT_LAYER_ORDER: Record<MapVariant, Array<keyof MapLayers>> = {
|
||||
energy: [
|
||||
// Core energy infrastructure — mirror of ENERGY_MAP_LAYERS in panels.ts
|
||||
'pipelines', 'storageFacilities', 'fuelShortages', 'waterways', 'commodityPorts', 'commodityHubs',
|
||||
'ais', 'tradeRoutes', 'minerals',
|
||||
'ais', 'liveTankers', 'tradeRoutes', 'minerals',
|
||||
// Energy-adjacent context
|
||||
'sanctions', 'fires', 'climate', 'weather', 'outages', 'natural',
|
||||
'resilienceScore', 'dayNight',
|
||||
|
||||
@@ -81,6 +81,7 @@ const FULL_PANELS: Record<string, PanelConfig> = {
|
||||
'storage-facility-map': { name: 'Strategic Storage Atlas', enabled: true, priority: 2 },
|
||||
'fuel-shortages': { name: 'Global Fuel Shortage Registry', enabled: true, priority: 2 },
|
||||
'energy-disruptions': { name: 'Energy Disruptions Log', enabled: true, priority: 2 },
|
||||
'energy-risk-overview': { name: 'Global Energy Risk Overview', enabled: false, priority: 2 },
|
||||
'gulf-economies': { name: 'Gulf Economies', enabled: false, priority: 2 },
|
||||
'consumer-prices': { name: 'Consumer Prices', enabled: false, priority: 2 },
|
||||
'grocery-basket': { name: 'Grocery Index', enabled: false, priority: 2 },
|
||||
@@ -929,6 +930,7 @@ const COMMODITY_MOBILE_MAP_LAYERS: MapLayers = {
|
||||
// ============================================
|
||||
const ENERGY_PANELS: Record<string, PanelConfig> = {
|
||||
map: { name: 'Energy Atlas Map', enabled: true, priority: 1 },
|
||||
'energy-risk-overview': { name: 'Global Energy Risk Overview', enabled: true, priority: 1 },
|
||||
'chokepoint-strip': { name: 'Chokepoint Status', enabled: true, priority: 1 },
|
||||
'pipeline-status': { name: 'Oil & Gas Pipeline Status', enabled: true, priority: 1 },
|
||||
'storage-facility-map': { name: 'Strategic Storage Atlas', enabled: true, priority: 1 },
|
||||
@@ -1021,6 +1023,7 @@ const ENERGY_MAP_LAYERS: MapLayers = {
|
||||
diseaseOutbreaks: false,
|
||||
storageFacilities: true, // UGS / SPR / LNG / crude hubs (Day 9-10 registry)
|
||||
fuelShortages: true, // Global fuel shortage alerts (Day 11-12 registry)
|
||||
liveTankers: true, // AIS ship type 80-89 inside chokepoint bboxes (parity-push PR 3)
|
||||
};
|
||||
|
||||
const ENERGY_MOBILE_MAP_LAYERS: MapLayers = {
|
||||
@@ -1078,6 +1081,7 @@ const ENERGY_MOBILE_MAP_LAYERS: MapLayers = {
|
||||
diseaseOutbreaks: false,
|
||||
storageFacilities: true,
|
||||
fuelShortages: true,
|
||||
liveTankers: true,
|
||||
};
|
||||
|
||||
// ============================================
|
||||
@@ -1242,7 +1246,7 @@ export const PANEL_CATEGORY_MAP: Record<string, { labelKey: string; panelKeys: s
|
||||
},
|
||||
marketsFinance: {
|
||||
labelKey: 'header.panelCatMarketsFinance',
|
||||
panelKeys: ['commodities', 'energy-complex', 'pipeline-status', 'storage-facility-map', 'fuel-shortages', 'energy-disruptions', 'hormuz-tracker', 'energy-crisis', 'markets', 'economic', 'trade-policy', 'sanctions-pressure', 'supply-chain', 'finance', 'polymarket', 'macro-signals', 'gulf-economies', 'etf-flows', 'stablecoins', 'crypto', 'heatmap'],
|
||||
panelKeys: ['commodities', 'energy-complex', 'energy-risk-overview', 'pipeline-status', 'storage-facility-map', 'fuel-shortages', 'energy-disruptions', 'hormuz-tracker', 'energy-crisis', 'markets', 'economic', 'trade-policy', 'sanctions-pressure', 'supply-chain', 'finance', 'polymarket', 'macro-signals', 'gulf-economies', 'etf-flows', 'stablecoins', 'crypto', 'heatmap'],
|
||||
},
|
||||
topical: {
|
||||
labelKey: 'header.panelCatTopical',
|
||||
|
||||
@@ -7,6 +7,7 @@ export interface GetVesselSnapshotRequest {
|
||||
swLat: number;
|
||||
swLon: number;
|
||||
includeCandidates: boolean;
|
||||
includeTankers: boolean;
|
||||
}
|
||||
|
||||
export interface GetVesselSnapshotResponse {
|
||||
@@ -20,6 +21,7 @@ export interface VesselSnapshot {
|
||||
sequence: number;
|
||||
status?: AisSnapshotStatus;
|
||||
candidateReports: SnapshotCandidateReport[];
|
||||
tankerReports: SnapshotCandidateReport[];
|
||||
}
|
||||
|
||||
export interface AisDensityZone {
|
||||
@@ -156,6 +158,7 @@ export class MaritimeServiceClient {
|
||||
if (req.swLat != null && req.swLat !== 0) params.set("sw_lat", String(req.swLat));
|
||||
if (req.swLon != null && req.swLon !== 0) params.set("sw_lon", String(req.swLon));
|
||||
if (req.includeCandidates) params.set("include_candidates", String(req.includeCandidates));
|
||||
if (req.includeTankers) params.set("include_tankers", String(req.includeTankers));
|
||||
const url = this.baseURL + path + (params.toString() ? "?" + params.toString() : "");
|
||||
|
||||
const headers: Record<string, string> = {
|
||||
|
||||
@@ -7,6 +7,7 @@ export interface GetVesselSnapshotRequest {
|
||||
swLat: number;
|
||||
swLon: number;
|
||||
includeCandidates: boolean;
|
||||
includeTankers: boolean;
|
||||
}
|
||||
|
||||
export interface GetVesselSnapshotResponse {
|
||||
@@ -20,6 +21,7 @@ export interface VesselSnapshot {
|
||||
sequence: number;
|
||||
status?: AisSnapshotStatus;
|
||||
candidateReports: SnapshotCandidateReport[];
|
||||
tankerReports: SnapshotCandidateReport[];
|
||||
}
|
||||
|
||||
export interface AisDensityZone {
|
||||
@@ -168,6 +170,7 @@ export function createMaritimeServiceRoutes(
|
||||
swLat: Number(params.get("sw_lat") ?? "0"),
|
||||
swLon: Number(params.get("sw_lon") ?? "0"),
|
||||
includeCandidates: params.get("include_candidates") === "true",
|
||||
includeTankers: params.get("include_tankers") === "true",
|
||||
};
|
||||
if (options?.validateRequest) {
|
||||
const bodyViolations = options.validateRequest("getVesselSnapshot", body);
|
||||
|
||||
@@ -265,6 +265,7 @@ Sentry.init({
|
||||
/Cannot read properties of \w+ \(reading '[^']*[^\x00-\x7F][^']*'\)/, // Non-ASCII property name in message = mojibake/corrupted identifier from injected extension; our bundle emits ASCII-only identifiers (WORLDMONITOR-NS)
|
||||
/Octal literals are not allowed in strict mode/, // Runtime SyntaxError from injected extension script; our TS bundle never emits octal literals and doesn't eval (WORLDMONITOR-NV)
|
||||
/Unexpected identifier 'm'/, // Foreign script injection on Opera; pre-compiled bundle can't parse-fail at runtime (WORLDMONITOR-NT)
|
||||
/PlayerControlsInterface\.\w+ is not a function/, // Android Chrome WebView native bridge injection (Bilibili/UC/QQ-style host) — never emitted by our code (WORLDMONITOR-P2)
|
||||
],
|
||||
beforeSend(event) {
|
||||
const msg = event.exception?.values?.[0]?.value ?? '';
|
||||
|
||||
158
src/services/live-tankers.ts
Normal file
@@ -0,0 +1,158 @@
|
||||
// Live Tankers service — fetches per-vessel position reports for AIS ship
|
||||
// type 80-89 (tanker class) inside chokepoint bounding boxes. Powers the
|
||||
// LiveTankersLayer on the Energy Atlas map.
|
||||
//
|
||||
// Per the parity-push plan U8 (docs/plans/2026-04-25-003-feat-energy-parity-pushup-plan.md):
|
||||
// - Sources bbox centroids from `src/config/chokepoint-registry.ts`
|
||||
// (NOT `server/.../_chokepoint-ids.ts` — that file strips lat/lon).
|
||||
// - One getVesselSnapshot call per chokepoint, ±2° box around centroid.
|
||||
// - In-memory cache, 60s TTL per chokepoint key.
|
||||
// - On per-zone failure, returns last successful response (graceful
|
||||
// degradation; one outage doesn't blank the whole layer).
|
||||
//
|
||||
// The handler-side cache (server/worldmonitor/maritime/v1/get-vessel-snapshot.ts)
|
||||
// also caches by quantized bbox + tankers flag at 60s TTL, and the gateway
|
||||
// 'live' tier (server/gateway.ts) sets s-maxage=60 so concurrent identical
|
||||
// requests across users get absorbed at the CDN. This three-layer cache
|
||||
// (CDN → handler → service) means the per-tab 6-call/min worst case scales
|
||||
// sub-linearly with the user count.
|
||||
|
||||
import { CHOKEPOINT_REGISTRY, type ChokepointRegistryEntry } from '@/config/chokepoint-registry';
|
||||
import { getRpcBaseUrl } from '@/services/rpc-client';
|
||||
import { MaritimeServiceClient } from '@/generated/client/worldmonitor/maritime/v1/service_client';
|
||||
import type { SnapshotCandidateReport } from '@/generated/client/worldmonitor/maritime/v1/service_client';
|
||||
|
||||
const client = new MaritimeServiceClient(getRpcBaseUrl(), {
|
||||
fetch: (...args: Parameters<typeof fetch>) => globalThis.fetch(...args),
|
||||
});
|
||||
|
||||
// ±2° box around each chokepoint centroid. Tuned in the implementation
|
||||
// section of plan U8 — Hormuz traffic at peak transit is ~50-150 vessels
|
||||
// in this box, well below the server-side 200/zone cap. Implementer should
|
||||
// adjust if a specific zone (e.g. Malacca, much busier) consistently fills
|
||||
// the cap.
|
||||
const BBOX_HALF_DEGREES = 2;
|
||||
|
||||
// Cache TTL must match the gateway 'live' tier's s-maxage (60s). Going
|
||||
// shorter wastes CDN cache hits; going longer breaks the freshness contract.
|
||||
const CACHE_TTL_MS = 60_000;
|
||||
|
||||
// Default chokepoints whose live tankers we render. Energy-relevant subset
|
||||
// of the full chokepoint registry — global trade hubs that aren't oil/gas
|
||||
// chokepoints (e.g. Strait of Dover, English Channel) are skipped.
|
||||
const DEFAULT_CHOKEPOINT_IDS = new Set<string>([
|
||||
'hormuz_strait',
|
||||
'suez',
|
||||
'bab_el_mandeb',
|
||||
'malacca_strait',
|
||||
'panama',
|
||||
'bosphorus', // Turkish Straits per CHOKEPOINT_REGISTRY canonical id
|
||||
]);
|
||||
|
||||
interface CacheSlot {
|
||||
data: SnapshotCandidateReport[];
|
||||
fetchedAt: number;
|
||||
}
|
||||
|
||||
const cache = new Map<string, CacheSlot>();
|
||||
|
||||
export interface ChokepointTankers {
|
||||
chokepoint: ChokepointRegistryEntry;
|
||||
tankers: SnapshotCandidateReport[];
|
||||
/** True when this zone's last fetch failed and we're serving stale data. */
|
||||
stale: boolean;
|
||||
}
|
||||
|
||||
function getDefaultChokepoints(): ChokepointRegistryEntry[] {
|
||||
return CHOKEPOINT_REGISTRY.filter((c) => DEFAULT_CHOKEPOINT_IDS.has(c.id));
|
||||
}
|
||||
|
||||
function bboxFor(c: ChokepointRegistryEntry): {
|
||||
swLat: number; swLon: number; neLat: number; neLon: number;
|
||||
} {
|
||||
return {
|
||||
swLat: c.lat - BBOX_HALF_DEGREES,
|
||||
swLon: c.lon - BBOX_HALF_DEGREES,
|
||||
neLat: c.lat + BBOX_HALF_DEGREES,
|
||||
neLon: c.lon + BBOX_HALF_DEGREES,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchOne(c: ChokepointRegistryEntry, signal?: AbortSignal): Promise<SnapshotCandidateReport[]> {
|
||||
const bbox = bboxFor(c);
|
||||
const resp = await client.getVesselSnapshot(
|
||||
{
|
||||
...bbox,
|
||||
includeCandidates: false,
|
||||
includeTankers: true,
|
||||
},
|
||||
{ signal },
|
||||
);
|
||||
return resp.snapshot?.tankerReports ?? [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch tanker positions for a set of chokepoints, returning per-zone
|
||||
* results. Failed zones return their last successful data with `stale: true`;
|
||||
* if a zone has never succeeded, it's omitted from the return value.
|
||||
*
|
||||
* @param chokepoints - chokepoints to query. Defaults to the energy-relevant
|
||||
* subset (Hormuz, Suez, Bab el-Mandeb, Malacca, Panama,
|
||||
* Turkish Straits) when omitted.
|
||||
* @param options.signal - AbortSignal to cancel in-flight RPC calls when
|
||||
* the caller's context tears down (layer toggled off,
|
||||
* map destroyed, newer refresh started). Without this,
|
||||
* a slow older refresh can race-write stale data after
|
||||
* a newer one already populated the layer state.
|
||||
*/
|
||||
export async function fetchLiveTankers(
|
||||
chokepoints?: ChokepointRegistryEntry[],
|
||||
options: { signal?: AbortSignal } = {},
|
||||
): Promise<ChokepointTankers[]> {
|
||||
const targets = chokepoints ?? getDefaultChokepoints();
|
||||
const now = Date.now();
|
||||
const { signal } = options;
|
||||
|
||||
const results = await Promise.allSettled(
|
||||
targets.map(async (c) => {
|
||||
const slot = cache.get(c.id);
|
||||
if (slot && now - slot.fetchedAt < CACHE_TTL_MS) {
|
||||
return { chokepoint: c, tankers: slot.data, stale: false };
|
||||
}
|
||||
// Bail early if already aborted before the per-zone fetch starts —
|
||||
// saves a wasted RPC + cache write when the caller has moved on.
|
||||
if (signal?.aborted) {
|
||||
if (slot) return { chokepoint: c, tankers: slot.data, stale: true };
|
||||
throw new DOMException('aborted before fetch', 'AbortError');
|
||||
}
|
||||
try {
|
||||
const tankers = await fetchOne(c, signal);
|
||||
// Re-check abort after the fetch resolves: prevents a slow
|
||||
// resolver from clobbering cache after the caller cancelled.
|
||||
if (signal?.aborted) {
|
||||
if (slot) return { chokepoint: c, tankers: slot.data, stale: true };
|
||||
throw new DOMException('aborted after fetch', 'AbortError');
|
||||
}
|
||||
cache.set(c.id, { data: tankers, fetchedAt: now });
|
||||
return { chokepoint: c, tankers, stale: false };
|
||||
} catch (err) {
|
||||
// Per-zone failure: serve last-known data if any. The layer
|
||||
// continues rendering even if one chokepoint's relay is flaky.
|
||||
if (slot) return { chokepoint: c, tankers: slot.data, stale: true };
|
||||
throw err; // no last-known data → drop this zone
|
||||
}
|
||||
}),
|
||||
);
|
||||
|
||||
return results
|
||||
.filter((r): r is PromiseFulfilledResult<ChokepointTankers> => r.status === 'fulfilled')
|
||||
.map((r) => r.value);
|
||||
}
|
||||
|
||||
// Internal exports for test coverage; not part of the public surface.
|
||||
export const _internal = {
|
||||
bboxFor,
|
||||
getDefaultChokepoints,
|
||||
CACHE_TTL_MS,
|
||||
BBOX_HALF_DEGREES,
|
||||
};
|
||||
@@ -161,7 +161,7 @@ interface ParsedSnapshot {
|
||||
async function fetchSnapshotPayload(includeCandidates: boolean, signal?: AbortSignal): Promise<ParsedSnapshot | null> {
|
||||
const response = await snapshotBreaker.execute(
|
||||
async () => client.getVesselSnapshot(
|
||||
{ neLat: 0, neLon: 0, swLat: 0, swLon: 0, includeCandidates },
|
||||
{ neLat: 0, neLon: 0, swLat: 0, swLon: 0, includeCandidates, includeTankers: false },
|
||||
{ signal },
|
||||
),
|
||||
emptySnapshotFallback,
|
||||
|
||||
@@ -24,7 +24,7 @@ export type PipelinePublicBadge = 'flowing' | 'reduced' | 'offline' | 'disputed'
|
||||
|
||||
export interface PipelineEvidenceInput {
|
||||
physicalState?: string; // 'flowing'|'reduced'|'offline'|'unknown'
|
||||
physicalStateSource?: string; // 'operator'|'regulator'|'press'|'satellite'|'ais-relay'
|
||||
physicalStateSource?: string; // 'operator'|'regulator'|'press'|'satellite'|'ais-relay'|'gem'
|
||||
operatorStatement?: { text?: string; url?: string; date?: string } | null;
|
||||
commercialState?: string; // 'under_contract'|'expired'|'suspended'|'unknown'
|
||||
sanctionRefs?: ReadonlyArray<{ authority?: string; listId?: string; date?: string; url?: string }>;
|
||||
@@ -49,7 +49,7 @@ const EVIDENCE_STALENESS_DAYS = 14;
|
||||
* → "offline" (high-confidence offline with paperwork)
|
||||
* 2. physical_state = "offline" AND operatorStatement != null
|
||||
* → "offline" (operator-disclosed outage)
|
||||
* 3. physical_state = "offline" AND physicalStateSource ∈ {press, ais-relay, satellite}
|
||||
* 3. physical_state = "offline" AND physicalStateSource ∈ {press, ais-relay, satellite, gem}
|
||||
* → "disputed" (external-signal offline without operator/sanction confirmation)
|
||||
* 4. physical_state = "reduced"
|
||||
* → "reduced"
|
||||
@@ -77,7 +77,7 @@ export function derivePipelinePublicBadge(
|
||||
evidence.commercialState === 'expired' || evidence.commercialState === 'suspended';
|
||||
const hasOperatorStatement = evidence.operatorStatement != null &&
|
||||
((evidence.operatorStatement.text?.length ?? 0) > 0);
|
||||
const hasExternalSignal = ['press', 'ais-relay', 'satellite'].includes(
|
||||
const hasExternalSignal = ['press', 'ais-relay', 'satellite', 'gem'].includes(
|
||||
evidence.physicalStateSource ?? '',
|
||||
);
|
||||
|
||||
|
||||
@@ -680,6 +680,9 @@ export interface MapLayers {
|
||||
// across all other variants remain valid without touching them).
|
||||
storageFacilities?: boolean;
|
||||
fuelShortages?: boolean;
|
||||
/** Live tanker positions (AIS ship type 80-89) inside chokepoint bboxes.
|
||||
* Refreshed every 60s via getVesselSnapshot. Energy Atlas parity-push. */
|
||||
liveTankers?: boolean;
|
||||
}
|
||||
|
||||
export interface AIDataCenter {
|
||||
|
||||
@@ -192,7 +192,14 @@ describe('composeBriefFromDigestStories — continued', () => {
|
||||
assert.deepEqual(env.data.stories.map((s) => s.headline), ['A', 'B']);
|
||||
});
|
||||
|
||||
it('caps at 12 stories per brief', () => {
|
||||
it('caps at 12 stories per brief by default (env-tunable via DIGEST_MAX_STORIES_PER_USER)', () => {
|
||||
// Default kept at 12. Offline sweep harness against 2026-04-24
|
||||
// production replay showed cap=16 dropped visible_quality from
|
||||
// 0.916 → 0.716 at the active 0.45 threshold (positions 13-16
|
||||
// are mostly singletons or "should-separate" members at this
|
||||
// threshold, so they dilute without helping adjacency). The
|
||||
// constant is env-tunable so a Railway flip can experiment with
|
||||
// cap values once new sweep evidence justifies them.
|
||||
const many = Array.from({ length: 30 }, (_, i) =>
|
||||
digestStory({ hash: `h${i}`, title: `Story ${i}` }),
|
||||
);
|
||||
@@ -373,3 +380,138 @@ describe('composeBriefFromDigestStories — continued', () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ── synthesis splice (Codex Round-3 plan, Step 3) ─────────────────────────
|
||||
|
||||
describe('composeBriefFromDigestStories — synthesis splice', () => {
|
||||
it('substitutes envelope.digest.lead/threads/signals/publicLead from synthesis', () => {
|
||||
const env = composeBriefFromDigestStories(
|
||||
rule(),
|
||||
[digestStory({ hash: 'h1', title: 'Story 1' }), digestStory({ hash: 'h2', title: 'Story 2' })],
|
||||
{ clusters: 12, multiSource: 3 },
|
||||
{
|
||||
nowMs: NOW,
|
||||
synthesis: {
|
||||
lead: 'A canonical executive lead from the orchestration layer that exceeds the 40-char floor.',
|
||||
threads: [{ tag: 'Energy', teaser: 'Hormuz tensions resurface today.' }],
|
||||
signals: ['Watch for naval redeployment in the Gulf.'],
|
||||
publicLead: 'A non-personalised lead suitable for the share-URL surface.',
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.ok(env);
|
||||
assert.match(env.data.digest.lead, /A canonical executive lead/);
|
||||
assert.equal(env.data.digest.threads.length, 1);
|
||||
assert.equal(env.data.digest.threads[0].tag, 'Energy');
|
||||
assert.deepEqual(env.data.digest.signals, ['Watch for naval redeployment in the Gulf.']);
|
||||
assert.match(env.data.digest.publicLead, /share-URL surface/);
|
||||
});
|
||||
|
||||
it('falls back to stub lead when synthesis is omitted (legacy callers)', () => {
|
||||
const env = composeBriefFromDigestStories(
|
||||
rule(),
|
||||
[digestStory({ hash: 'h1' })],
|
||||
{ clusters: 0, multiSource: 0 },
|
||||
{ nowMs: NOW }, // no synthesis arg
|
||||
);
|
||||
assert.ok(env);
|
||||
// Stub lead from assembleStubbedBriefEnvelope: "Today's brief surfaces N threads…"
|
||||
assert.match(env.data.digest.lead, /Today's brief surfaces/);
|
||||
// publicLead absent on the stub path — the renderer's public-mode
|
||||
// fail-safe omits the pull-quote rather than leaking personalised lead.
|
||||
assert.equal(env.data.digest.publicLead, undefined);
|
||||
});
|
||||
|
||||
it('partial synthesis (only lead) does not clobber threads/signals stubs', () => {
|
||||
const env = composeBriefFromDigestStories(
|
||||
rule(),
|
||||
[digestStory({ hash: 'h1', title: 'X', sources: ['Reuters'] })],
|
||||
{ clusters: 0, multiSource: 0 },
|
||||
{
|
||||
nowMs: NOW,
|
||||
synthesis: {
|
||||
lead: 'Custom lead at least forty characters long for validator pass-through.',
|
||||
// threads + signals omitted — must keep the stub defaults.
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.ok(env);
|
||||
assert.match(env.data.digest.lead, /Custom lead/);
|
||||
// Threads default from deriveThreadsFromStories (stub path).
|
||||
assert.ok(env.data.digest.threads.length >= 1);
|
||||
});
|
||||
|
||||
it('rankedStoryHashes re-orders the surfaced pool BEFORE the cap is applied', () => {
|
||||
const stories = [
|
||||
digestStory({ hash: 'aaaa1111', title: 'First by digest order' }),
|
||||
digestStory({ hash: 'bbbb2222', title: 'Second by digest order' }),
|
||||
digestStory({ hash: 'cccc3333', title: 'Third by digest order' }),
|
||||
];
|
||||
const env = composeBriefFromDigestStories(
|
||||
rule(),
|
||||
stories,
|
||||
{ clusters: 0, multiSource: 0 },
|
||||
{
|
||||
nowMs: NOW,
|
||||
synthesis: {
|
||||
lead: 'Editorial lead at least forty characters long for validator pass-through.',
|
||||
// Re-rank: third story should lead, then first, then second.
|
||||
rankedStoryHashes: ['cccc3333', 'aaaa1111', 'bbbb2222'],
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.ok(env);
|
||||
assert.equal(env.data.stories[0].headline, 'Third by digest order');
|
||||
assert.equal(env.data.stories[1].headline, 'First by digest order');
|
||||
assert.equal(env.data.stories[2].headline, 'Second by digest order');
|
||||
});
|
||||
|
||||
it('rankedStoryHashes matches by short-hash prefix (model emits 8-char prefixes)', () => {
|
||||
const stories = [
|
||||
digestStory({ hash: 'longhash1234567890abc', title: 'First' }),
|
||||
digestStory({ hash: 'otherhashfullsuffix', title: 'Second' }),
|
||||
];
|
||||
const env = composeBriefFromDigestStories(
|
||||
rule(),
|
||||
stories,
|
||||
{ clusters: 0, multiSource: 0 },
|
||||
{
|
||||
nowMs: NOW,
|
||||
synthesis: {
|
||||
lead: 'Editorial lead at least forty characters long for validator pass-through.',
|
||||
// Model emits 8-char prefixes; helper must prefix-match the
|
||||
// story's full hash.
|
||||
rankedStoryHashes: ['otherhash', 'longhash'],
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.ok(env);
|
||||
assert.equal(env.data.stories[0].headline, 'Second');
|
||||
assert.equal(env.data.stories[1].headline, 'First');
|
||||
});
|
||||
|
||||
it('stories not present in rankedStoryHashes go after, in original order', () => {
|
||||
const stories = [
|
||||
digestStory({ hash: 'unranked-A', title: 'Unranked A' }),
|
||||
digestStory({ hash: 'ranked-B', title: 'Ranked B' }),
|
||||
digestStory({ hash: 'unranked-C', title: 'Unranked C' }),
|
||||
];
|
||||
const env = composeBriefFromDigestStories(
|
||||
rule(),
|
||||
stories,
|
||||
{ clusters: 0, multiSource: 0 },
|
||||
{
|
||||
nowMs: NOW,
|
||||
synthesis: {
|
||||
lead: 'Editorial lead at least forty characters long for validator pass-through.',
|
||||
rankedStoryHashes: ['ranked-B'],
|
||||
},
|
||||
},
|
||||
);
|
||||
assert.ok(env);
|
||||
assert.equal(env.data.stories[0].headline, 'Ranked B');
|
||||
// A and C keep their original relative order (A then C).
|
||||
assert.equal(env.data.stories[1].headline, 'Unranked A');
|
||||
assert.equal(env.data.stories[2].headline, 'Unranked C');
|
||||
});
|
||||
});
|
||||
|
||||
@@ -20,6 +20,7 @@ import {
|
||||
parseDigestProse,
|
||||
validateDigestProseShape,
|
||||
generateDigestProse,
|
||||
generateDigestProsePublic,
|
||||
enrichBriefEnvelopeWithLLM,
|
||||
buildStoryDescriptionPrompt,
|
||||
parseStoryDescription,
|
||||
@@ -47,7 +48,7 @@ function story(overrides = {}) {
|
||||
|
||||
function envelope(overrides = {}) {
|
||||
return {
|
||||
version: 2,
|
||||
version: 3,
|
||||
issuedAt: 1_745_000_000_000,
|
||||
data: {
|
||||
user: { name: 'Reader', tz: 'UTC' },
|
||||
@@ -249,8 +250,12 @@ describe('buildDigestPrompt', () => {
|
||||
const { system, user } = buildDigestPrompt([story(), story({ headline: 'Second', country: 'PS' })], 'critical');
|
||||
assert.match(system, /chief editor of WorldMonitor Brief/);
|
||||
assert.match(user, /Reader sensitivity level: critical/);
|
||||
assert.match(user, /01\. \[critical\] Iran threatens/);
|
||||
assert.match(user, /02\. \[critical\] Second/);
|
||||
// v3 prompt format: "01. [h:XXXX] [SEVERITY] Headline" — includes
|
||||
// a short hash prefix for ranking and uppercases severity to
|
||||
// emphasise editorial importance to the model. Hash falls back
|
||||
// to "p<NN>" position when story.hash is absent (test fixtures).
|
||||
assert.match(user, /01\. \[h:p?[a-z0-9]+\] \[CRITICAL\] Iran threatens/);
|
||||
assert.match(user, /02\. \[h:p?[a-z0-9]+\] \[CRITICAL\] Second/);
|
||||
});
|
||||
|
||||
it('caps at 12 stories', () => {
|
||||
@@ -259,6 +264,42 @@ describe('buildDigestPrompt', () => {
|
||||
const lines = user.split('\n').filter((l) => /^\d{2}\. /.test(l));
|
||||
assert.equal(lines.length, 12);
|
||||
});
|
||||
|
||||
it('opens lead with greeting when ctx.greeting set and not public', () => {
|
||||
const { user } = buildDigestPrompt([story()], 'critical', { greeting: 'Good morning', isPublic: false });
|
||||
assert.match(user, /Open the lead with: "Good morning\."/);
|
||||
});
|
||||
|
||||
it('omits greeting and profile when ctx.isPublic=true', () => {
|
||||
const { user } = buildDigestPrompt([story()], 'critical', {
|
||||
profile: 'Watching: oil futures, Strait of Hormuz',
|
||||
greeting: 'Good morning',
|
||||
isPublic: true,
|
||||
});
|
||||
assert.doesNotMatch(user, /Good morning/);
|
||||
assert.doesNotMatch(user, /Watching:/);
|
||||
});
|
||||
|
||||
it('includes profile lines when ctx.profile set and not public', () => {
|
||||
const { user } = buildDigestPrompt([story()], 'critical', {
|
||||
profile: 'Watching: oil futures',
|
||||
isPublic: false,
|
||||
});
|
||||
assert.match(user, /Reader profile/);
|
||||
assert.match(user, /Watching: oil futures/);
|
||||
});
|
||||
|
||||
it('emits stable [h:XXXX] short-hash prefix derived from story.hash', () => {
|
||||
const s = story({ hash: 'abc12345xyz9876' });
|
||||
const { user } = buildDigestPrompt([s], 'critical');
|
||||
// Short hash is first 8 chars of the digest story hash.
|
||||
assert.match(user, /\[h:abc12345\]/);
|
||||
});
|
||||
|
||||
it('asks model to emit rankedStoryHashes in JSON output (system prompt)', () => {
|
||||
const { system } = buildDigestPrompt([story()], 'critical');
|
||||
assert.match(system, /rankedStoryHashes/);
|
||||
});
|
||||
});
|
||||
|
||||
// ── parseDigestProse ───────────────────────────────────────────────────────
|
||||
@@ -426,8 +467,11 @@ describe('generateDigestProse', () => {
|
||||
// `threads`, which the renderer's assertBriefEnvelope requires.
|
||||
const llm1 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all', { ...cache, callLLM: llm1.callLLM });
|
||||
// Corrupt the stored row in place
|
||||
const badKey = [...cache.store.keys()].find((k) => k.startsWith('brief:llm:digest:v2:'));
|
||||
// Corrupt the stored row in place. Cache key prefix bumped to v3
|
||||
// (2026-04-25) when the digest hash gained ctx (profile, greeting,
|
||||
// isPublic) and per-story `hash` fields. v2 rows are ignored on
|
||||
// rollout; v3 is the active prefix.
|
||||
const badKey = [...cache.store.keys()].find((k) => k.startsWith('brief:llm:digest:v3:'));
|
||||
assert.ok(badKey, 'expected a digest prose cache entry');
|
||||
cache.store.set(badKey, { lead: 'short', /* missing threads + signals */ });
|
||||
const llm2 = makeLLM(validJson);
|
||||
@@ -452,6 +496,10 @@ describe('validateDigestProseShape', () => {
|
||||
assert.ok(out);
|
||||
assert.notEqual(out, good, 'must not return the caller object by reference');
|
||||
assert.equal(out.threads.length, 1);
|
||||
// v3: rankedStoryHashes is always present in the normalised
|
||||
// output (defaults to [] when source lacks the field — keeps the
|
||||
// shape stable for downstream consumers).
|
||||
assert.ok(Array.isArray(out.rankedStoryHashes));
|
||||
});
|
||||
|
||||
it('rejects missing threads', () => {
|
||||
@@ -469,6 +517,128 @@ describe('validateDigestProseShape', () => {
|
||||
assert.equal(validateDigestProseShape([good]), null);
|
||||
assert.equal(validateDigestProseShape('string'), null);
|
||||
});
|
||||
|
||||
it('preserves rankedStoryHashes when present (v3 path)', () => {
|
||||
const out = validateDigestProseShape({
|
||||
...good,
|
||||
rankedStoryHashes: ['abc12345', 'def67890', 'short', 'ok'],
|
||||
});
|
||||
assert.ok(out);
|
||||
// 'short' (5 chars) keeps; 'ok' (2 chars) drops below the ≥4-char floor.
|
||||
assert.deepEqual(out.rankedStoryHashes, ['abc12345', 'def67890', 'short']);
|
||||
});
|
||||
|
||||
it('drops malformed rankedStoryHashes entries without rejecting the payload', () => {
|
||||
const out = validateDigestProseShape({
|
||||
...good,
|
||||
rankedStoryHashes: ['valid_hash', null, 42, '', ' ', 'bb'],
|
||||
});
|
||||
assert.ok(out, 'malformed ranking entries do not invalidate the whole object');
|
||||
assert.deepEqual(out.rankedStoryHashes, ['valid_hash']);
|
||||
});
|
||||
|
||||
it('returns empty rankedStoryHashes when field absent (v2-shaped row passes)', () => {
|
||||
const out = validateDigestProseShape(good);
|
||||
assert.deepEqual(out.rankedStoryHashes, []);
|
||||
});
|
||||
});
|
||||
|
||||
// ── generateDigestProsePublic + cache-key independence (Codex Round-2 #4) ──
|
||||
|
||||
describe('generateDigestProsePublic — public cache shared across users', () => {
|
||||
const stories = [story(), story({ headline: 'Second', country: 'PS' })];
|
||||
const validJson = JSON.stringify({
|
||||
lead: 'A non-personalised editorial lead generated for the share-URL surface, free of profile context.',
|
||||
threads: [{ tag: 'Energy', teaser: 'Hormuz tensions resurface today.' }],
|
||||
signals: ['Watch for naval redeployment in the Gulf.'],
|
||||
});
|
||||
|
||||
it('two distinct callers with identical (sensitivity, story-pool) hit the SAME cache row', async () => {
|
||||
// The whole point of generateDigestProsePublic: when the share
|
||||
// URL is opened by 1000 different anonymous readers, only the
|
||||
// first call hits the LLM. Every subsequent call serves the
|
||||
// same cached output. (Internally: hashDigestInput substitutes
|
||||
// 'public' for userId when ctx.isPublic === true.)
|
||||
const cache = makeCache();
|
||||
const llm1 = makeLLM(validJson);
|
||||
await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm1.callLLM });
|
||||
assert.equal(llm1.calls.length, 1);
|
||||
|
||||
// Second call — different "user" context (the wrapper takes no
|
||||
// userId, so this is just a second invocation), same pool.
|
||||
// Should hit cache, NOT re-LLM.
|
||||
const llm2 = makeLLM(() => { throw new Error('would not be called'); });
|
||||
const out = await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm2.callLLM });
|
||||
assert.ok(out);
|
||||
assert.equal(llm2.calls.length, 0, 'public cache shared across calls — no per-user inflation');
|
||||
});
|
||||
|
||||
it('does NOT collide with the personalised cache for the same story pool', async () => {
|
||||
// Defensive: a private call (with profile/greeting/userId) and a
|
||||
// public call must produce DIFFERENT cache keys. Otherwise a
|
||||
// private call could poison the public cache row (or vice versa).
|
||||
const cache = makeCache();
|
||||
const llm = makeLLM(validJson);
|
||||
|
||||
await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm.callLLM });
|
||||
const publicKeys = [...cache.store.keys()];
|
||||
|
||||
await generateDigestProse('user_xyz', stories, 'critical',
|
||||
{ ...cache, callLLM: llm.callLLM },
|
||||
{ profile: 'Watching: oil', greeting: 'Good morning', isPublic: false },
|
||||
);
|
||||
const privateKeys = [...cache.store.keys()].filter((k) => !publicKeys.includes(k));
|
||||
|
||||
assert.equal(publicKeys.length, 1, 'one public cache row');
|
||||
assert.equal(privateKeys.length, 1, 'private call writes its own row');
|
||||
assert.notEqual(publicKeys[0], privateKeys[0], 'public + private rows must use distinct keys');
|
||||
// Public key contains literal "public:" segment — userId substitution
|
||||
assert.match(publicKeys[0], /:public:/);
|
||||
// Private key contains the userId
|
||||
assert.match(privateKeys[0], /:user_xyz:/);
|
||||
});
|
||||
|
||||
it('greeting changes invalidate the personalised cache (per Brain B parity)', async () => {
|
||||
// Brain B's old cache (digest:ai-summary:v1) included greeting in
|
||||
// the key — morning prose differed from afternoon prose. The
|
||||
// canonical synthesis preserves that semantic via greetingBucket.
|
||||
const cache = makeCache();
|
||||
const llm1 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm1.callLLM },
|
||||
{ greeting: 'Good morning', isPublic: false },
|
||||
);
|
||||
const llm2 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm2.callLLM },
|
||||
{ greeting: 'Good evening', isPublic: false },
|
||||
);
|
||||
assert.equal(llm2.calls.length, 1, 'greeting bucket change re-keys the cache');
|
||||
});
|
||||
|
||||
it('profile changes invalidate the personalised cache', async () => {
|
||||
const cache = makeCache();
|
||||
const llm1 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm1.callLLM },
|
||||
{ profile: 'Watching: oil', isPublic: false },
|
||||
);
|
||||
const llm2 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm2.callLLM },
|
||||
{ profile: 'Watching: gas', isPublic: false },
|
||||
);
|
||||
assert.equal(llm2.calls.length, 1, 'profile change re-keys the cache');
|
||||
});
|
||||
|
||||
it('writes to cache under brief:llm:digest:v3 prefix (not v2)', async () => {
|
||||
const cache = makeCache();
|
||||
const llm = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all', { ...cache, callLLM: llm.callLLM });
|
||||
const keys = [...cache.store.keys()];
|
||||
assert.ok(keys.some((k) => k.startsWith('brief:llm:digest:v3:')), 'v3 prefix used');
|
||||
assert.ok(!keys.some((k) => k.startsWith('brief:llm:digest:v2:')), 'no v2 writes');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildStoryDescriptionPrompt', () => {
|
||||
|
||||
@@ -414,8 +414,56 @@ describe('renderBriefMagazine — envelope validation', () => {
|
||||
});
|
||||
|
||||
describe('BRIEF_ENVELOPE_VERSION', () => {
|
||||
it('is the literal 2 (bump requires cross-producer coordination)', () => {
|
||||
assert.equal(BRIEF_ENVELOPE_VERSION, 2);
|
||||
it('is the literal 3 (bump requires cross-producer coordination)', () => {
|
||||
// Bumped 2 → 3 (2026-04-25) when BriefDigest gained the optional
|
||||
// `publicLead` field for the share-URL surface. v2 envelopes still
|
||||
// in the 7-day TTL window remain readable — see
|
||||
// SUPPORTED_ENVELOPE_VERSIONS = [1, 2, 3]. Test below covers v1
|
||||
// back-compat; v2 back-compat is exercised by the missing-publicLead
|
||||
// path in the BriefDigest validator (publicLead === undefined is OK).
|
||||
assert.equal(BRIEF_ENVELOPE_VERSION, 3);
|
||||
});
|
||||
});
|
||||
|
||||
describe('renderBriefMagazine — v3 publicLead field (Codex Round-3 Medium #2)', () => {
|
||||
it('accepts a v3 envelope with publicLead', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.publicLead = 'A non-personalised editorial lead for share-URL surface readers.';
|
||||
// Should NOT throw — publicLead is now an allowed digest key.
|
||||
const html = renderBriefMagazine(env);
|
||||
assert.ok(typeof html === 'string' && html.length > 0);
|
||||
});
|
||||
|
||||
it('rejects a publicLead that is not a non-empty string', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.publicLead = 42;
|
||||
assert.throws(
|
||||
() => renderBriefMagazine(env),
|
||||
/envelope\.data\.digest\.publicLead, when present, must be a non-empty string/,
|
||||
);
|
||||
});
|
||||
|
||||
it('accepts a v2 envelope still in TTL window without publicLead (back-compat)', () => {
|
||||
// v2 envelopes already in Redis at v3 rollout MUST keep rendering
|
||||
// — SUPPORTED_ENVELOPE_VERSIONS = [1, 2, 3]. publicLead is
|
||||
// optional; absence is the v2 shape.
|
||||
const env = envelope();
|
||||
env.version = 2;
|
||||
delete env.data.digest.publicLead;
|
||||
const html = renderBriefMagazine(env);
|
||||
assert.ok(typeof html === 'string' && html.length > 0);
|
||||
});
|
||||
|
||||
it('rejects an envelope with an unknown digest key (closed-key-set still enforced)', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.synthesisLevel = 1; // would-be ad-hoc metadata
|
||||
assert.throws(
|
||||
() => renderBriefMagazine(env),
|
||||
/envelope\.data\.digest has unexpected key "synthesisLevel"/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -699,6 +747,175 @@ describe('renderBriefMagazine — publicMode', () => {
|
||||
const b = renderBriefMagazine(env, {});
|
||||
assert.equal(a, b);
|
||||
});
|
||||
|
||||
// ── Public-share lead fail-safe (Codex Round-2 High security) ──────
|
||||
//
|
||||
// Personalised `digest.lead` carries profile context (watched assets,
|
||||
// saved regions, etc.). On the public-share surface we MUST render
|
||||
// `publicLead` (a non-personalised parallel synthesis) instead, OR
|
||||
// omit the pull-quote entirely. NEVER fall back to the personalised
|
||||
// lead.
|
||||
|
||||
it('renders publicLead in the pull-quote when v3 envelope carries it', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.lead = 'Personal lead with watched-asset details that must NOT leak.';
|
||||
env.data.digest.publicLead = 'A non-personalised editorial lead suitable for share readers.';
|
||||
const html = renderBriefMagazine(env, { publicMode: true });
|
||||
assert.ok(
|
||||
html.includes('non-personalised editorial lead'),
|
||||
'pull-quote must render the publicLead text',
|
||||
);
|
||||
assert.ok(
|
||||
!html.includes('watched-asset details'),
|
||||
'personalised lead text must NEVER appear on the public surface',
|
||||
);
|
||||
});
|
||||
|
||||
it('OMITS the pull-quote when publicLead is absent (v2 envelope back-compat)', () => {
|
||||
// v2 envelopes still in TTL window have no publicLead. Public-mode
|
||||
// render MUST omit the blockquote rather than render the
|
||||
// personalised lead.
|
||||
const env = envelope();
|
||||
env.version = 2;
|
||||
env.data.digest.lead = 'Personal lead with watched-asset details that must NOT leak.';
|
||||
delete env.data.digest.publicLead;
|
||||
const html = renderBriefMagazine(env, { publicMode: true });
|
||||
assert.ok(
|
||||
!html.includes('watched-asset details'),
|
||||
'personalised lead text must NEVER appear on the public surface',
|
||||
);
|
||||
// Sanity: the rest of the page (greeting + greeting block) is
|
||||
// still rendered — only the blockquote is omitted.
|
||||
assert.ok(html.includes('At The Top Of The Hour'));
|
||||
});
|
||||
|
||||
it('OMITS the pull-quote when publicLead is empty string (defensive)', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.lead = 'Personal lead that must NOT leak.';
|
||||
// Defensive: publicLead set to empty string by a buggy producer.
|
||||
// The render path treats empty as absent, omitting the pull-quote.
|
||||
// (assertBriefEnvelope rejects publicLead='' as a non-empty-string
|
||||
// violation, so this only matters if a future code path bypasses
|
||||
// validation — belt-and-braces.)
|
||||
env.data.digest.publicLead = '';
|
||||
// Validator rejects empty publicLead first, so render throws —
|
||||
// proves the contract is enforced before redactForPublic runs.
|
||||
assert.throws(
|
||||
() => renderBriefMagazine(env, { publicMode: true }),
|
||||
/publicLead, when present, must be a non-empty string/,
|
||||
);
|
||||
});
|
||||
|
||||
it('private (non-public) render still uses the personalised lead', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.lead = 'Personal lead for the authenticated reader.';
|
||||
env.data.digest.publicLead = 'Generic public lead.';
|
||||
const html = renderBriefMagazine(env); // private path
|
||||
assert.ok(html.includes('Personal lead for the authenticated reader'));
|
||||
assert.ok(!html.includes('Generic public lead'), 'publicLead is share-only');
|
||||
});
|
||||
|
||||
// ── Public signals + threads fail-safe (extends Codex Round-2 High security) ──
|
||||
|
||||
it('substitutes publicSignals when present — personalised signals never reach the public surface', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.lead = 'Personal lead.';
|
||||
env.data.digest.publicLead = 'Generic public lead.';
|
||||
// Personalised signals can echo a user's watched assets ("your
|
||||
// Saudi exposure"). Anonymous public readers must never see this.
|
||||
env.data.digest.signals = ['Watch Saudi crude exposure on your watchlist for OPEC moves'];
|
||||
env.data.digest.publicSignals = ['Watch OPEC for production-quota signals'];
|
||||
const html = renderBriefMagazine(env, { publicMode: true });
|
||||
assert.ok(html.includes('OPEC for production-quota'), 'publicSignals must render');
|
||||
assert.ok(!html.includes('your watchlist'), 'personalised signals must NEVER appear on public');
|
||||
assert.ok(!html.includes('Saudi crude exposure'), 'personalised signal phrase must NEVER appear on public');
|
||||
});
|
||||
|
||||
it('OMITS the signals page when publicSignals is absent (fail-safe — never serves personalised signals)', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.lead = 'Personal lead.';
|
||||
env.data.digest.publicLead = 'Generic public lead.';
|
||||
env.data.digest.signals = ['Watch your private watchlist for OPEC moves'];
|
||||
delete env.data.digest.publicSignals;
|
||||
const html = renderBriefMagazine(env, { publicMode: true });
|
||||
// Renderer's hasSignals gate hides the signals page when the
|
||||
// array is empty. Personalised signal phrase must NOT appear.
|
||||
assert.ok(!html.includes('your private watchlist'), 'personalised signals must NEVER appear on public');
|
||||
assert.ok(!html.includes('Digest / 04'), 'signals page section must be omitted');
|
||||
});
|
||||
|
||||
it('substitutes publicThreads when present — personalised thread teasers never reach public', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.lead = 'Personal lead.';
|
||||
env.data.digest.publicLead = 'Generic public lead.';
|
||||
env.data.digest.threads = [
|
||||
{ tag: 'Energy', teaser: 'Saudi exposure on your portfolio is at risk this week' },
|
||||
];
|
||||
env.data.digest.publicThreads = [
|
||||
{ tag: 'Energy', teaser: 'OPEC production quota debate intensifies' },
|
||||
];
|
||||
const html = renderBriefMagazine(env, { publicMode: true });
|
||||
assert.ok(html.includes('OPEC production quota'), 'publicThreads must render');
|
||||
assert.ok(!html.includes('your portfolio'), 'personalised thread teaser must NEVER appear on public');
|
||||
});
|
||||
|
||||
it('falls back to category-derived threads stub when publicThreads absent', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.lead = 'Personal lead.';
|
||||
env.data.digest.publicLead = 'Generic public lead.';
|
||||
env.data.digest.threads = [
|
||||
{ tag: 'Energy', teaser: 'Saudi exposure on your portfolio is at risk this week' },
|
||||
];
|
||||
delete env.data.digest.publicThreads;
|
||||
const html = renderBriefMagazine(env, { publicMode: true });
|
||||
assert.ok(!html.includes('your portfolio'), 'personalised thread must NEVER appear on public');
|
||||
// Stub teaser pattern — generic phrasing derived from story
|
||||
// categories. Renderer still produces a threads page.
|
||||
assert.ok(
|
||||
html.includes('thread on the desk today') || html.includes('threads on the desk today'),
|
||||
'category-derived threads stub renders',
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects malformed publicSignals (validator contract)', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.publicSignals = ['ok signal', 42]; // 42 is not a string
|
||||
assert.throws(
|
||||
() => renderBriefMagazine(env, { publicMode: true }),
|
||||
/publicSignals\[1\] must be a non-empty string/,
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects malformed publicThreads (validator contract)', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.publicThreads = [{ tag: 'Energy' }]; // missing teaser
|
||||
assert.throws(
|
||||
() => renderBriefMagazine(env, { publicMode: true }),
|
||||
/publicThreads\[0\]\.teaser must be a non-empty string/,
|
||||
);
|
||||
});
|
||||
|
||||
it('private render ignores publicSignals + publicThreads — uses personalised', () => {
|
||||
const env = envelope();
|
||||
env.version = 3;
|
||||
env.data.digest.signals = ['Personalised signal for authenticated reader'];
|
||||
env.data.digest.publicSignals = ['Generic public signal'];
|
||||
env.data.digest.threads = [{ tag: 'Energy', teaser: 'Personalised teaser' }];
|
||||
env.data.digest.publicThreads = [{ tag: 'Energy', teaser: 'Generic public teaser' }];
|
||||
const html = renderBriefMagazine(env);
|
||||
assert.ok(html.includes('Personalised signal'), 'private render uses personalised signals');
|
||||
assert.ok(!html.includes('Generic public signal'), 'public siblings ignored on private path');
|
||||
assert.ok(html.includes('Personalised teaser'), 'private render uses personalised threads');
|
||||
});
|
||||
});
|
||||
|
||||
// ── Regression: cover greeting follows envelope.data.digest.greeting ─────────
|
||||
|
||||
@@ -32,13 +32,16 @@ const src = readFileSync(
|
||||
);
|
||||
|
||||
describe('digestFor cache key includes sensitivity', () => {
|
||||
it('memoization key interpolates candidate.sensitivity', () => {
|
||||
it('memoization key interpolates cand.rule.sensitivity', () => {
|
||||
// The key must include sensitivity alongside variant+lang+windowStart
|
||||
// so stricter users do not inherit a looser populator's pool.
|
||||
// Post-canonical-window-fix: digestFor receives the annotated candidate
|
||||
// (`cand`) instead of just the rule, and reaches sensitivity via
|
||||
// cand.rule.sensitivity.
|
||||
assert.match(
|
||||
src,
|
||||
/const\s+key\s*=\s*`\$\{candidate\.variant[^`]*?\$\{candidate\.sensitivity[^`]*?\$\{windowStart\}`/,
|
||||
'digestFor cache key must interpolate candidate.sensitivity',
|
||||
/const\s+key\s*=\s*`\$\{cand\.rule\.variant[^`]*?\$\{cand\.rule\.sensitivity[^`]*?\$\{windowStart\}`/,
|
||||
'digestFor cache key must interpolate cand.rule.sensitivity',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -50,11 +53,11 @@ describe('digestFor cache key includes sensitivity', () => {
|
||||
// something else).
|
||||
//
|
||||
// Anchor the match to the cache-key template-literal context so it
|
||||
// cannot be satisfied by an unrelated `chosenCandidate.sensitivity
|
||||
// ?? 'high'` elsewhere in the file (e.g. the new operator log line).
|
||||
// cannot be satisfied by an unrelated `cand.rule.sensitivity ?? 'high'`
|
||||
// elsewhere in the file (e.g. the new operator log line).
|
||||
assert.match(
|
||||
src,
|
||||
/\$\{candidate\.sensitivity\s*\?\?\s*'high'\}\s*:\s*\$\{windowStart\}/,
|
||||
/\$\{cand\.rule\.sensitivity\s*\?\?\s*'high'\}\s*:\s*\$\{windowStart\}/,
|
||||
'cache key default for sensitivity must be "high" to align with buildDigest default, anchored inside the cache-key template literal',
|
||||
);
|
||||
});
|
||||
@@ -63,12 +66,12 @@ describe('digestFor cache key includes sensitivity', () => {
|
||||
// Sanity: ensure the key construction is not pulled out into a
|
||||
// separate helper whose shape this test can no longer see.
|
||||
const digestForBlock = src.match(
|
||||
/async\s+function\s+digestFor\s*\(candidate\)\s*\{[\s\S]*?\n\s*\}/,
|
||||
/async\s+function\s+digestFor\s*\(cand\)\s*\{[\s\S]*?\n\s*\}/,
|
||||
);
|
||||
assert.ok(digestForBlock, 'digestFor function block should exist');
|
||||
assert.match(
|
||||
digestForBlock[0],
|
||||
/candidate\.sensitivity/,
|
||||
/cand\.rule\.sensitivity/,
|
||||
'sensitivity must be referenced inside digestFor',
|
||||
);
|
||||
});
|
||||
|
||||
466
tests/digest-orchestration-helpers.test.mjs
Normal file
@@ -0,0 +1,466 @@
|
||||
// Pure-function unit tests for the canonical-synthesis orchestration
|
||||
// helpers extracted from scripts/seed-digest-notifications.mjs.
|
||||
//
|
||||
// Covers plan acceptance criteria:
|
||||
// A6.h — three-level synthesis fallback chain
|
||||
// A6.i — subject-line correctness ("Intelligence Brief" vs "Digest")
|
||||
// A6.l — compose-only tick still works for weekly user (sortedAll fallback)
|
||||
// A6.m — winner walks past empty-pool top-priority candidate
|
||||
//
|
||||
// Acceptance criteria A6.a-d (multi-rule, twice_daily, weekly window
|
||||
// parity, all-channel reads) require a full mock of the cron's main()
|
||||
// loop with Upstash + Convex stubs — out of scope for this PR's
|
||||
// pure-function coverage. They are exercised via the parity log line
|
||||
// (A5) in production observability instead.
|
||||
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import {
|
||||
digestWindowStartMs,
|
||||
pickWinningCandidateWithPool,
|
||||
runSynthesisWithFallback,
|
||||
subjectForBrief,
|
||||
} from '../scripts/lib/digest-orchestration-helpers.mjs';
|
||||
|
||||
// ── subjectForBrief — A6.i ────────────────────────────────────────────────
|
||||
|
||||
describe('subjectForBrief — synthesis-level → email subject', () => {
|
||||
it('synthesis level 1 + non-empty briefLead → Intelligence Brief', () => {
|
||||
assert.equal(
|
||||
subjectForBrief({ briefLead: 'A real lead', synthesisLevel: 1, shortDate: 'Apr 25' }),
|
||||
'WorldMonitor Intelligence Brief — Apr 25',
|
||||
);
|
||||
});
|
||||
|
||||
it('synthesis level 2 + non-empty briefLead → Intelligence Brief (L2 still editorial)', () => {
|
||||
assert.equal(
|
||||
subjectForBrief({ briefLead: 'A degraded lead', synthesisLevel: 2, shortDate: 'Apr 25' }),
|
||||
'WorldMonitor Intelligence Brief — Apr 25',
|
||||
);
|
||||
});
|
||||
|
||||
it('synthesis level 3 → Digest (stub fallback ships less editorial subject)', () => {
|
||||
assert.equal(
|
||||
subjectForBrief({ briefLead: 'a stub', synthesisLevel: 3, shortDate: 'Apr 25' }),
|
||||
'WorldMonitor Digest — Apr 25',
|
||||
);
|
||||
});
|
||||
|
||||
it('null briefLead → Digest regardless of level (no signal for editorial subject)', () => {
|
||||
assert.equal(
|
||||
subjectForBrief({ briefLead: null, synthesisLevel: 1, shortDate: 'Apr 25' }),
|
||||
'WorldMonitor Digest — Apr 25',
|
||||
);
|
||||
});
|
||||
|
||||
it('empty-string briefLead → Digest', () => {
|
||||
assert.equal(
|
||||
subjectForBrief({ briefLead: '', synthesisLevel: 1, shortDate: 'Apr 25' }),
|
||||
'WorldMonitor Digest — Apr 25',
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ── pickWinningCandidateWithPool — A6.l + A6.m ────────────────────────────
|
||||
|
||||
function rule(overrides) {
|
||||
return {
|
||||
userId: 'u1',
|
||||
variant: 'full',
|
||||
sensitivity: 'all',
|
||||
aiDigestEnabled: true,
|
||||
updatedAt: 1,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function annotated(rule, due, lastSentAt = null) {
|
||||
return { rule, lastSentAt, due };
|
||||
}
|
||||
|
||||
describe('pickWinningCandidateWithPool — winner walk', () => {
|
||||
it('A6.l — picks ANY eligible rule when none are due (compose-only tick)', async () => {
|
||||
// Weekly user on a non-due tick: no rules due, but the dashboard
|
||||
// contract says we still compose a brief from the user's
|
||||
// preferred rule. sortedAll fallback covers this.
|
||||
const weeklyRule = rule({ variant: 'full', digestMode: 'weekly' });
|
||||
const annotatedList = [annotated(weeklyRule, false)];
|
||||
const digestFor = async () => [{ hash: 'h1', title: 'A story' }];
|
||||
const lines = [];
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
annotatedList,
|
||||
digestFor,
|
||||
(l) => lines.push(l),
|
||||
'u1',
|
||||
);
|
||||
assert.ok(result, 'compose-only tick must still pick a winner');
|
||||
assert.equal(result.winner.rule, weeklyRule);
|
||||
assert.equal(result.winner.due, false);
|
||||
assert.equal(result.stories.length, 1);
|
||||
});
|
||||
|
||||
it('A6.m — walks past empty-pool top-priority due rule to lower-priority due rule with stories', async () => {
|
||||
// A user with two due rules: full:critical (top priority by
|
||||
// compareRules) has empty pool; regional:high (lower priority)
|
||||
// has stories. Winner must be regional:high — not null.
|
||||
const fullCritical = rule({ variant: 'full', sensitivity: 'critical', updatedAt: 100 });
|
||||
const regionalHigh = rule({ variant: 'regional', sensitivity: 'high', updatedAt: 50 });
|
||||
const annotatedList = [annotated(fullCritical, true), annotated(regionalHigh, true)];
|
||||
|
||||
const digestFor = async (c) => {
|
||||
if (c.rule === fullCritical) return []; // empty pool
|
||||
if (c.rule === regionalHigh) return [{ hash: 'h2', title: 'Story from regional' }];
|
||||
return [];
|
||||
};
|
||||
const lines = [];
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
annotatedList,
|
||||
digestFor,
|
||||
(l) => lines.push(l),
|
||||
'u1',
|
||||
);
|
||||
assert.ok(result, 'lower-priority candidate with stories must still win');
|
||||
assert.equal(result.winner.rule, regionalHigh);
|
||||
// Empty-pool log emitted for the skipped top-priority candidate
|
||||
assert.ok(
|
||||
lines.some((l) => l.includes('outcome=empty-pool') && l.includes('variant=full')),
|
||||
'empty-pool line must be logged for the skipped candidate',
|
||||
);
|
||||
});
|
||||
|
||||
it('prefers DUE rules over not-due rules even when not-due is higher priority', async () => {
|
||||
// Higher-priority rule isn't due; lower-priority rule IS due.
|
||||
// Plan rule: pick from due candidates first. Codex Round-3 High #1.
|
||||
const higherPriorityNotDue = rule({ variant: 'full', sensitivity: 'critical', updatedAt: 100 });
|
||||
const lowerPriorityDue = rule({ variant: 'regional', sensitivity: 'high', updatedAt: 50 });
|
||||
const annotatedList = [
|
||||
annotated(higherPriorityNotDue, false), // higher priority, NOT due
|
||||
annotated(lowerPriorityDue, true), // lower priority, DUE
|
||||
];
|
||||
const digestFor = async () => [{ hash: 'h', title: 'X' }];
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
annotatedList,
|
||||
digestFor,
|
||||
() => {},
|
||||
'u1',
|
||||
);
|
||||
assert.ok(result);
|
||||
assert.equal(result.winner.rule, lowerPriorityDue, 'due rule wins over higher-priority not-due');
|
||||
});
|
||||
|
||||
it('returns null when EVERY candidate has an empty pool', async () => {
|
||||
const annotatedList = [annotated(rule({ variant: 'a' }), true), annotated(rule({ variant: 'b' }), false)];
|
||||
const digestFor = async () => [];
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
annotatedList,
|
||||
digestFor,
|
||||
() => {},
|
||||
'u1',
|
||||
);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it('returns null on empty annotated list (no rules for user)', async () => {
|
||||
const result = await pickWinningCandidateWithPool([], async () => [{ hash: 'h' }], () => {}, 'u1');
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it('does not call digestFor twice for the same rule (dedup across passes)', async () => {
|
||||
// A rule that's due appears in BOTH sortedDue and sortedAll —
|
||||
// walk must dedupe so digestFor (Upstash GET) only fires once.
|
||||
const dueRule = rule({ variant: 'full' });
|
||||
const annotatedList = [annotated(dueRule, true)];
|
||||
let calls = 0;
|
||||
const digestFor = async () => { calls++; return [{ hash: 'h' }]; };
|
||||
await pickWinningCandidateWithPool(annotatedList, digestFor, () => {}, 'u1');
|
||||
assert.equal(calls, 1, 'same rule must not be tried twice');
|
||||
});
|
||||
|
||||
it('passes the FULL annotated candidate to digestFor (not just the rule) so callers can derive a per-candidate window from cand.lastSentAt', async () => {
|
||||
// Regression guard for the canonical-vs-send window divergence.
|
||||
// digestFor needs lastSentAt to compute its windowStart via
|
||||
// digestWindowStartMs; passing only the rule strips that signal
|
||||
// and forces a fixed-24h fallback that the email/Slack body
|
||||
// doesn't honour.
|
||||
const dueRule = rule({ variant: 'full' });
|
||||
const passedArgs = [];
|
||||
const digestFor = async (cand) => { passedArgs.push(cand); return [{ hash: 'h' }]; };
|
||||
await pickWinningCandidateWithPool(
|
||||
[annotated(dueRule, true, 1_700_000_000_000)],
|
||||
digestFor,
|
||||
() => {},
|
||||
'u1',
|
||||
);
|
||||
assert.equal(passedArgs.length, 1);
|
||||
assert.equal(passedArgs[0].rule, dueRule);
|
||||
assert.equal(passedArgs[0].lastSentAt, 1_700_000_000_000);
|
||||
assert.equal(passedArgs[0].due, true);
|
||||
});
|
||||
|
||||
it('walks past a filter-rejected top-priority candidate to a lower-priority candidate that composes successfully (Risk 2 regression guard)', async () => {
|
||||
// Pre-fix behaviour: helper returned the first NON-EMPTY pool as
|
||||
// winner. If composer then dropped every story (URL/headline/shape
|
||||
// filters), the caller bailed without trying lower-priority rules.
|
||||
// Fix: tryCompose callback lets the helper continue walking when
|
||||
// a candidate's pool survives buildDigest but compose returns null.
|
||||
const fullCritical = rule({ variant: 'full', sensitivity: 'critical', updatedAt: 100 });
|
||||
const regionalHigh = rule({ variant: 'regional', sensitivity: 'high', updatedAt: 50 });
|
||||
const annotatedList = [annotated(fullCritical, true), annotated(regionalHigh, true)];
|
||||
const digestFor = async () => [{ hash: 'h', title: 'pool member' }];
|
||||
// tryCompose: top candidate gets filtered to nothing (returns null);
|
||||
// lower-priority survives.
|
||||
const tryCompose = (cand) => {
|
||||
if (cand.rule === fullCritical) return null; // simulate URL/headline filter dropping all
|
||||
if (cand.rule === regionalHigh) return { envelope: 'ok' };
|
||||
return null;
|
||||
};
|
||||
const lines = [];
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
annotatedList,
|
||||
digestFor,
|
||||
(l) => lines.push(l),
|
||||
'u1',
|
||||
tryCompose,
|
||||
);
|
||||
assert.ok(result, 'lower-priority candidate must still win after top-priority filter-rejection');
|
||||
assert.equal(result.winner.rule, regionalHigh);
|
||||
assert.deepEqual(result.composeResult, { envelope: 'ok' });
|
||||
assert.ok(
|
||||
lines.some((l) => l.includes('outcome=filter-rejected') && l.includes('variant=full')),
|
||||
'filter-rejected line must be logged for the skipped top candidate',
|
||||
);
|
||||
});
|
||||
|
||||
it('returns null when EVERY candidate is rejected by tryCompose (no fallthrough has a survivor)', async () => {
|
||||
const a = rule({ variant: 'a' });
|
||||
const b = rule({ variant: 'b' });
|
||||
const annotatedList = [annotated(a, true), annotated(b, true)];
|
||||
const digestFor = async () => [{ hash: 'h' }];
|
||||
const tryCompose = () => null; // nothing ever composes
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
annotatedList,
|
||||
digestFor,
|
||||
() => {},
|
||||
'u1',
|
||||
tryCompose,
|
||||
);
|
||||
assert.equal(result, null);
|
||||
});
|
||||
|
||||
it('forwards tryCompose return value as composeResult on success (lets caller skip a redundant compose call)', async () => {
|
||||
const r = rule({ variant: 'full' });
|
||||
const composedEnvelope = { data: { stories: [{ hash: 'h' }] } };
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
[annotated(r, true)],
|
||||
async () => [{ hash: 'h' }],
|
||||
() => {},
|
||||
'u1',
|
||||
() => composedEnvelope,
|
||||
);
|
||||
assert.ok(result);
|
||||
assert.equal(result.composeResult, composedEnvelope);
|
||||
});
|
||||
|
||||
it('without tryCompose, preserves legacy "first non-empty pool wins" semantics (existing callers/tests unaffected)', async () => {
|
||||
const r = rule({ variant: 'full' });
|
||||
const result = await pickWinningCandidateWithPool(
|
||||
[annotated(r, true)],
|
||||
async () => [{ hash: 'h' }],
|
||||
() => {},
|
||||
'u1',
|
||||
// no tryCompose
|
||||
);
|
||||
assert.ok(result);
|
||||
assert.equal(result.winner.rule, r);
|
||||
assert.equal(result.composeResult, undefined);
|
||||
});
|
||||
});
|
||||
|
||||
// ── digestWindowStartMs — Risk 1 (canonical vs send window parity) ────────
|
||||
|
||||
describe('digestWindowStartMs — single source of truth for compose + send window', () => {
|
||||
it('returns lastSentAt verbatim when present (rule has shipped before)', () => {
|
||||
const lastSentAt = 1_700_000_000_000;
|
||||
assert.equal(digestWindowStartMs(lastSentAt, 1_700_086_400_000, 24 * 60 * 60 * 1000), lastSentAt);
|
||||
});
|
||||
|
||||
it('falls back to nowMs - defaultLookbackMs when lastSentAt is null (first send)', () => {
|
||||
const nowMs = 1_700_086_400_000;
|
||||
const lookback = 24 * 60 * 60 * 1000;
|
||||
assert.equal(digestWindowStartMs(null, nowMs, lookback), nowMs - lookback);
|
||||
});
|
||||
|
||||
it('falls back when lastSentAt is undefined', () => {
|
||||
const nowMs = 1_700_086_400_000;
|
||||
const lookback = 24 * 60 * 60 * 1000;
|
||||
assert.equal(digestWindowStartMs(undefined, nowMs, lookback), nowMs - lookback);
|
||||
});
|
||||
|
||||
it('weekly user (lastSentAt = 7d ago) → window covers exactly the prior 7d', () => {
|
||||
const sevenDaysMs = 7 * 24 * 60 * 60 * 1000;
|
||||
const nowMs = 2_000_000_000_000;
|
||||
const lastSentAt = nowMs - sevenDaysMs;
|
||||
const windowStart = digestWindowStartMs(lastSentAt, nowMs, 24 * 60 * 60 * 1000);
|
||||
// The compose-path brief lead and the send-loop email body both
|
||||
// call buildDigest(rule, windowStart) with this same value, so a
|
||||
// weekly user's lead now summarizes the same 7-day pool that
|
||||
// ships in the email body. Pre-fix, the lead came from a 24h pool
|
||||
// while the email shipped 7d.
|
||||
assert.equal(windowStart, lastSentAt);
|
||||
assert.equal(nowMs - windowStart, sevenDaysMs);
|
||||
});
|
||||
|
||||
it('twice-daily user (lastSentAt = 12h ago) → 12h window matches what ships', () => {
|
||||
const twelveHoursMs = 12 * 60 * 60 * 1000;
|
||||
const nowMs = 2_000_000_000_000;
|
||||
const lastSentAt = nowMs - twelveHoursMs;
|
||||
const windowStart = digestWindowStartMs(lastSentAt, nowMs, 24 * 60 * 60 * 1000);
|
||||
assert.equal(windowStart, lastSentAt);
|
||||
assert.equal(nowMs - windowStart, twelveHoursMs);
|
||||
});
|
||||
|
||||
it('zero is a valid lastSentAt (epoch — exotic but legal); does not fall through to default', () => {
|
||||
// ?? operator is explicit about this; guards against regressions
|
||||
// toward `||` which would treat 0 as missing.
|
||||
const nowMs = 1_700_000_000_000;
|
||||
assert.equal(digestWindowStartMs(0, nowMs, 24 * 60 * 60 * 1000), 0);
|
||||
});
|
||||
});
|
||||
|
||||
// ── runSynthesisWithFallback — A6.h ───────────────────────────────────────
|
||||
|
||||
const validProse = {
|
||||
lead: 'A long-enough executive lead about Hormuz and the Gaza humanitarian crisis, written in editorial tone.',
|
||||
threads: [{ tag: 'Energy', teaser: 'Hormuz tensions resurface today.' }],
|
||||
signals: ['Watch for naval redeployment.'],
|
||||
};
|
||||
|
||||
function makeDeps(callLLM) {
|
||||
const cache = new Map();
|
||||
return {
|
||||
callLLM,
|
||||
cacheGet: async (k) => cache.has(k) ? cache.get(k) : null,
|
||||
cacheSet: async (k, v) => { cache.set(k, v); },
|
||||
};
|
||||
}
|
||||
|
||||
describe('runSynthesisWithFallback — three-level chain', () => {
|
||||
it('L1 success — canonical synthesis returned, level=1', async () => {
|
||||
const deps = makeDeps(async () => JSON.stringify(validProse));
|
||||
const trace = [];
|
||||
const result = await runSynthesisWithFallback(
|
||||
'u1',
|
||||
[{ hash: 'h1', headline: 'Story 1', threatLevel: 'critical' }],
|
||||
'all',
|
||||
{ profile: 'Watching: oil', greeting: 'Good morning' },
|
||||
deps,
|
||||
(level, kind) => trace.push({ level, kind }),
|
||||
);
|
||||
assert.ok(result.synthesis);
|
||||
assert.equal(result.level, 1);
|
||||
assert.match(result.synthesis.lead, /editorial tone/);
|
||||
assert.deepEqual(trace, [{ level: 1, kind: 'success' }]);
|
||||
});
|
||||
|
||||
it('L1 LLM down → L2 succeeds, level=2', async () => {
|
||||
// Note: generateDigestProse internally absorbs callLLM throws and
|
||||
// returns null (its return-null-on-failure contract). So
|
||||
// runSynthesisWithFallback sees the L1 attempt as a "fall" event,
|
||||
// not a "throw". Test verifies the BEHAVIOR (L2 wins) rather than
|
||||
// the trace event kind.
|
||||
let firstCall = true;
|
||||
const deps = makeDeps(async () => {
|
||||
if (firstCall) { firstCall = false; throw new Error('L1 LLM down'); }
|
||||
return JSON.stringify(validProse);
|
||||
});
|
||||
const trace = [];
|
||||
const result = await runSynthesisWithFallback(
|
||||
'u1',
|
||||
[{ hash: 'h1', headline: 'Story 1', threatLevel: 'critical' }],
|
||||
'all',
|
||||
{ profile: 'Watching: oil', greeting: 'Good morning' },
|
||||
deps,
|
||||
(level, kind) => trace.push({ level, kind }),
|
||||
);
|
||||
assert.ok(result.synthesis);
|
||||
assert.equal(result.level, 2);
|
||||
// Trace: L1 fell (callLLM throw absorbed → null), L2 succeeded.
|
||||
assert.equal(trace[0].level, 1);
|
||||
assert.equal(trace[0].kind, 'fall');
|
||||
assert.equal(trace[1].level, 2);
|
||||
assert.equal(trace[1].kind, 'success');
|
||||
});
|
||||
|
||||
it('L1 returns null + L2 returns null → L3 stub, level=3', async () => {
|
||||
const deps = makeDeps(async () => null); // both calls return null
|
||||
const trace = [];
|
||||
const result = await runSynthesisWithFallback(
|
||||
'u1',
|
||||
[{ hash: 'h1', headline: 'Story 1', threatLevel: 'critical' }],
|
||||
'all',
|
||||
{ profile: null, greeting: null },
|
||||
deps,
|
||||
(level, kind) => trace.push({ level, kind }),
|
||||
);
|
||||
assert.equal(result.synthesis, null);
|
||||
assert.equal(result.level, 3);
|
||||
// Trace shows L1 fell, L2 fell, L3 success (synthesis=null is the
|
||||
// stub path's contract).
|
||||
assert.deepEqual(trace.map((t) => `${t.level}:${t.kind}`), [
|
||||
'1:fall',
|
||||
'2:fall',
|
||||
'3:success',
|
||||
]);
|
||||
});
|
||||
|
||||
it('cache.cacheGet throws — generateDigestProse swallows it, L1 still succeeds via LLM call', async () => {
|
||||
// generateDigestProse's cache try/catch catches ALL throws (not
|
||||
// just misses), so a cache-layer outage falls through to a fresh
|
||||
// LLM call and returns successfully. Documented contract: cache
|
||||
// is best-effort. This test locks the contract — if a future
|
||||
// refactor narrows the catch, fallback semantics change.
|
||||
const deps = {
|
||||
callLLM: async () => JSON.stringify(validProse),
|
||||
cacheGet: async () => { throw new Error('cache outage'); },
|
||||
cacheSet: async () => {},
|
||||
};
|
||||
const result = await runSynthesisWithFallback(
|
||||
'u1',
|
||||
[{ hash: 'h1', headline: 'Story 1', threatLevel: 'critical' }],
|
||||
'all',
|
||||
{ profile: null, greeting: null },
|
||||
deps,
|
||||
);
|
||||
assert.ok(result.synthesis);
|
||||
assert.equal(result.level, 1);
|
||||
});
|
||||
|
||||
it('callLLM down on every call → L3 stub, no exception escapes', async () => {
|
||||
const deps = makeDeps(async () => { throw new Error('LLM totally down'); });
|
||||
const result = await runSynthesisWithFallback(
|
||||
'u1',
|
||||
[{ hash: 'h1', headline: 'Story 1', threatLevel: 'critical' }],
|
||||
'all',
|
||||
{ profile: null, greeting: null },
|
||||
deps,
|
||||
);
|
||||
// generateDigestProse absorbs each callLLM throw → returns null;
|
||||
// fallback chain reaches L3 stub. The brief still ships.
|
||||
assert.equal(result.synthesis, null);
|
||||
assert.equal(result.level, 3);
|
||||
});
|
||||
|
||||
it('omits trace callback safely (defensive — production callers may not pass one)', async () => {
|
||||
const deps = makeDeps(async () => JSON.stringify(validProse));
|
||||
// No trace argument
|
||||
const result = await runSynthesisWithFallback(
|
||||
'u1',
|
||||
[{ hash: 'h1', headline: 'Story 1', threatLevel: 'critical' }],
|
||||
'all',
|
||||
{ profile: null, greeting: null },
|
||||
deps,
|
||||
);
|
||||
assert.equal(result.level, 1);
|
||||
assert.ok(result.synthesis);
|
||||
});
|
||||
});
|
||||
418
tests/energy-risk-overview-panel.test.mts
Normal file
@@ -0,0 +1,418 @@
|
||||
// @ts-check
|
||||
//
|
||||
// Tests for src/components/EnergyRiskOverviewPanel.ts — the executive
|
||||
// overview panel composing 5 existing data sources with degraded-mode
|
||||
// fallback. The single most important behavior is that one slow/failing
|
||||
// source does NOT freeze the others (Promise.allSettled, never .all).
|
||||
//
|
||||
// Test strategy:
|
||||
//
|
||||
// 1. Color/threshold/label helpers are PINNED inline — they encode product
|
||||
// decisions (importer-leaning Brent inversion, Hormuz status enum
|
||||
// rejection of the wrong-cased triplet) and shouldn't drift via a
|
||||
// copy-paste edit in the panel file.
|
||||
//
|
||||
// 2. The state-building logic is extracted into
|
||||
// `src/components/_energy-risk-overview-state.ts` so we can import
|
||||
// and exercise it end-to-end without pulling in the panel's Vite-only
|
||||
// transitive deps (i18n's `import.meta.glob`, etc). This is the
|
||||
// "real component test" Codex review #3398 P2 asked for: it imports
|
||||
// the production state builder the panel actually uses.
|
||||
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { test, describe } from 'node:test';
|
||||
import { buildOverviewState, countDegradedTiles } from '../src/components/_energy-risk-overview-state.ts';
|
||||
|
||||
// Pure helpers extracted from the panel for unit testing. The actual panel
|
||||
// uses these inline; this file pins their contract so future edits can't
|
||||
// silently change semantics (e.g. flipping the Brent up=red convention).
|
||||
|
||||
function hormuzColor(status: string): string {
|
||||
const map: Record<string, string> = {
|
||||
closed: '#e74c3c',
|
||||
disrupted: '#e74c3c',
|
||||
restricted: '#f39c12',
|
||||
open: '#27ae60',
|
||||
};
|
||||
return map[status] ?? '#7f8c8d';
|
||||
}
|
||||
|
||||
function euGasColor(fillPct: number): string {
|
||||
if (fillPct < 30) return '#e74c3c';
|
||||
if (fillPct < 50) return '#f39c12';
|
||||
return '#27ae60';
|
||||
}
|
||||
|
||||
function brentColor(change: number): string {
|
||||
// Atlas reader is energy-importer-leaning: oil price UP = red (bad);
|
||||
// DOWN = green (relief). Inverted from a default market panel.
|
||||
return change >= 0 ? '#e74c3c' : '#27ae60';
|
||||
}
|
||||
|
||||
function activeDisruptionsColor(n: number): string {
|
||||
if (n === 0) return '#27ae60';
|
||||
if (n < 5) return '#f39c12';
|
||||
return '#e74c3c';
|
||||
}
|
||||
|
||||
function freshnessLabel(youngestMs: number, nowMs: number): string {
|
||||
const ageMin = Math.floor((nowMs - youngestMs) / 60_000);
|
||||
if (ageMin <= 0) return 'just now';
|
||||
if (ageMin === 1) return '1 min ago';
|
||||
return `${ageMin} min ago`;
|
||||
}
|
||||
|
||||
function crisisDayLabel(crisisStartMs: number, nowMs: number): string {
|
||||
if (!Number.isFinite(crisisStartMs)) return '—';
|
||||
const days = Math.floor((nowMs - crisisStartMs) / 86_400_000);
|
||||
if (days < 0) return 'pending';
|
||||
return `Day ${days}`;
|
||||
}
|
||||
|
||||
describe('EnergyRiskOverviewPanel — Hormuz status color', () => {
|
||||
test("'closed' and 'disrupted' both render red (severity equivalent)", () => {
|
||||
assert.equal(hormuzColor('closed'), '#e74c3c');
|
||||
assert.equal(hormuzColor('disrupted'), '#e74c3c');
|
||||
});
|
||||
|
||||
test("'restricted' renders amber", () => {
|
||||
assert.equal(hormuzColor('restricted'), '#f39c12');
|
||||
});
|
||||
|
||||
test("'open' renders green", () => {
|
||||
assert.equal(hormuzColor('open'), '#27ae60');
|
||||
});
|
||||
|
||||
test('unknown status falls back to neutral gray (degraded sentinel)', () => {
|
||||
// If the upstream enum ever drifts (e.g. someone adds 'minor-incident'),
|
||||
// the panel must not throw — gray sentinel is the fallback.
|
||||
assert.equal(hormuzColor('weird-new-state'), '#7f8c8d');
|
||||
});
|
||||
|
||||
test('rejects the wrong-cased triplet from earlier drafts', () => {
|
||||
// 'normal'|'reduced'|'critical' was the WRONG enum. None of those values
|
||||
// are valid; all should fall to gray sentinel.
|
||||
assert.equal(hormuzColor('normal'), '#7f8c8d');
|
||||
assert.equal(hormuzColor('reduced'), '#7f8c8d');
|
||||
assert.equal(hormuzColor('critical'), '#7f8c8d');
|
||||
});
|
||||
});
|
||||
|
||||
describe('EnergyRiskOverviewPanel — EU Gas color thresholds', () => {
|
||||
test('< 30% fill → red', () => {
|
||||
assert.equal(euGasColor(28), '#e74c3c');
|
||||
assert.equal(euGasColor(0), '#e74c3c');
|
||||
assert.equal(euGasColor(29.9), '#e74c3c');
|
||||
});
|
||||
|
||||
test('30%–49% fill → amber', () => {
|
||||
assert.equal(euGasColor(30), '#f39c12');
|
||||
assert.equal(euGasColor(42), '#f39c12');
|
||||
assert.equal(euGasColor(49.9), '#f39c12');
|
||||
});
|
||||
|
||||
test('≥ 50% fill → green', () => {
|
||||
assert.equal(euGasColor(50), '#27ae60');
|
||||
assert.equal(euGasColor(90), '#27ae60');
|
||||
assert.equal(euGasColor(100), '#27ae60');
|
||||
});
|
||||
});
|
||||
|
||||
describe('EnergyRiskOverviewPanel — Brent color (importer-leaning inversion)', () => {
|
||||
test('positive change → red (oil up = bad for importers)', () => {
|
||||
assert.equal(brentColor(0.5), '#e74c3c');
|
||||
assert.equal(brentColor(10), '#e74c3c');
|
||||
assert.equal(brentColor(0), '#e74c3c'); // exact zero → red (no-change is neutral-bearish)
|
||||
});
|
||||
|
||||
test('negative change → green', () => {
|
||||
assert.equal(brentColor(-0.5), '#27ae60');
|
||||
assert.equal(brentColor(-12), '#27ae60');
|
||||
});
|
||||
});
|
||||
|
||||
describe('EnergyRiskOverviewPanel — active disruptions color', () => {
|
||||
test('0 active → green', () => {
|
||||
assert.equal(activeDisruptionsColor(0), '#27ae60');
|
||||
});
|
||||
|
||||
test('1-4 active → amber', () => {
|
||||
assert.equal(activeDisruptionsColor(1), '#f39c12');
|
||||
assert.equal(activeDisruptionsColor(4), '#f39c12');
|
||||
});
|
||||
|
||||
test('5+ active → red', () => {
|
||||
assert.equal(activeDisruptionsColor(5), '#e74c3c');
|
||||
assert.equal(activeDisruptionsColor(50), '#e74c3c');
|
||||
});
|
||||
});
|
||||
|
||||
describe('EnergyRiskOverviewPanel — freshness label', () => {
|
||||
test('age 0 → "just now"', () => {
|
||||
const now = Date.now();
|
||||
assert.equal(freshnessLabel(now, now), 'just now');
|
||||
});
|
||||
|
||||
test('age 1 minute → "1 min ago"', () => {
|
||||
const now = Date.now();
|
||||
assert.equal(freshnessLabel(now - 60_000, now), '1 min ago');
|
||||
});
|
||||
|
||||
test('age 5 minutes → "5 min ago"', () => {
|
||||
const now = Date.now();
|
||||
assert.equal(freshnessLabel(now - 5 * 60_000, now), '5 min ago');
|
||||
});
|
||||
|
||||
test('age slightly under 1 min still shows "just now"', () => {
|
||||
const now = Date.now();
|
||||
assert.equal(freshnessLabel(now - 30_000, now), 'just now');
|
||||
});
|
||||
});
|
||||
|
||||
describe('EnergyRiskOverviewPanel — crisis-day counter', () => {
|
||||
test('today exactly 0 days from start → "Day 0"', () => {
|
||||
const start = Date.UTC(2026, 3, 25); // 2026-04-25
|
||||
const now = Date.UTC(2026, 3, 25, 12, 0, 0); // same day, noon
|
||||
assert.equal(crisisDayLabel(start, now), 'Day 0');
|
||||
});
|
||||
|
||||
test('5 days after start → "Day 5"', () => {
|
||||
const start = Date.UTC(2026, 3, 25);
|
||||
const now = Date.UTC(2026, 3, 30);
|
||||
assert.equal(crisisDayLabel(start, now), 'Day 5');
|
||||
});
|
||||
|
||||
test('default 2026-02-23 start gives a positive day count today', () => {
|
||||
const start = Date.parse('2026-02-23T00:00:00Z');
|
||||
const now = Date.parse('2026-04-25T12:00:00Z');
|
||||
assert.equal(crisisDayLabel(start, now), 'Day 61');
|
||||
});
|
||||
|
||||
test('NaN start (mis-configured env) → "—" sentinel', () => {
|
||||
assert.equal(crisisDayLabel(NaN, Date.now()), '—');
|
||||
});
|
||||
|
||||
test('future-dated start → "pending" sentinel', () => {
|
||||
const start = Date.now() + 86_400_000; // tomorrow
|
||||
assert.equal(crisisDayLabel(start, Date.now()), 'pending');
|
||||
});
|
||||
});
|
||||
|
||||
describe('EnergyRiskOverviewPanel — degraded-mode contract', () => {
|
||||
// The real panel uses Promise.allSettled and renders each tile
|
||||
// independently. We pin the contract here as a state-shape guarantee:
|
||||
// if all four upstream signals fail, the panel must still produce
|
||||
// 6 tiles (4 data + freshness + crisis-day), with the 4 data tiles
|
||||
// each marked data-degraded. We assert this against a stub state.
|
||||
|
||||
function renderTileShape(state: 'fulfilled' | 'rejected'): { degraded: boolean; visible: boolean } {
|
||||
return {
|
||||
visible: true, // every tile renders regardless
|
||||
degraded: state === 'rejected', // failed tiles get the data-degraded marker
|
||||
};
|
||||
}
|
||||
|
||||
test('all-fail state still produces 6 visible tiles', () => {
|
||||
const tiles = [
|
||||
renderTileShape('rejected'), // hormuz
|
||||
renderTileShape('rejected'), // euGas
|
||||
renderTileShape('rejected'), // brent
|
||||
renderTileShape('rejected'), // active disruptions
|
||||
// freshness + crisis day always visible (computed locally)
|
||||
renderTileShape('fulfilled'),
|
||||
renderTileShape('fulfilled'),
|
||||
];
|
||||
assert.equal(tiles.filter(t => t.visible).length, 6);
|
||||
assert.equal(tiles.filter(t => t.degraded).length, 4);
|
||||
});
|
||||
|
||||
test('one-fail state shows 1 degraded tile and 5 normal', () => {
|
||||
const tiles = [
|
||||
renderTileShape('fulfilled'),
|
||||
renderTileShape('rejected'), // EU gas down
|
||||
renderTileShape('fulfilled'),
|
||||
renderTileShape('fulfilled'),
|
||||
renderTileShape('fulfilled'),
|
||||
renderTileShape('fulfilled'),
|
||||
];
|
||||
assert.equal(tiles.filter(t => t.degraded).length, 1);
|
||||
});
|
||||
});
|
||||
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
// Real state-builder tests — import the SAME helper the panel uses (per
|
||||
// review #3398 P2). Exercises the Promise.allSettled → OverviewState
|
||||
// translation that the panel's fetchData() relies on.
|
||||
// ─────────────────────────────────────────────────────────────────────────
|
||||
|
||||
const NOW = 1735000000000; // fixed clock so fetchedAt assertions are deterministic
|
||||
|
||||
function fulfilled<T>(value: T): PromiseFulfilledResult<T> {
|
||||
return { status: 'fulfilled', value };
|
||||
}
|
||||
function rejected(reason = new Error('test')): PromiseRejectedResult {
|
||||
return { status: 'rejected', reason };
|
||||
}
|
||||
|
||||
describe('EnergyRiskOverviewPanel — buildOverviewState (real component logic)', () => {
|
||||
test('all four sources fulfilled → 0 degraded tiles', () => {
|
||||
const state = buildOverviewState(
|
||||
fulfilled({ status: 'open' }),
|
||||
fulfilled({ unavailable: false, fillPct: 75, fillPctChange1d: 0.5 }),
|
||||
fulfilled({ data: [{ price: 88.5, change: -0.3 }] }),
|
||||
fulfilled({ upstreamUnavailable: false, events: [{ endAt: null }, { endAt: '2026-01-01' }, { endAt: null }] }),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(countDegradedTiles(state), 0);
|
||||
assert.equal(state.hormuz.status, 'fulfilled');
|
||||
assert.equal(state.hormuz.value?.status, 'open');
|
||||
assert.equal(state.euGas.value?.fillPct, 75);
|
||||
assert.equal(state.brent.value?.price, 88.5);
|
||||
assert.equal(state.activeDisruptions.value?.count, 2, 'only events with endAt === null are active');
|
||||
assert.equal(state.hormuz.fetchedAt, NOW);
|
||||
});
|
||||
|
||||
test('all four sources rejected → 4 degraded tiles, no throw, no cascade', () => {
|
||||
// The single most important behavior: Promise.allSettled never throws,
|
||||
// every tile resolves to a state independently. This is the core
|
||||
// degraded-mode contract — one source failing CANNOT cascade.
|
||||
const state = buildOverviewState(
|
||||
rejected(),
|
||||
rejected(),
|
||||
rejected(),
|
||||
rejected(),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(countDegradedTiles(state), 4);
|
||||
for (const t of Object.values(state)) {
|
||||
assert.equal(t.status, 'rejected');
|
||||
assert.equal(t.fetchedAt, undefined, 'rejected tiles must not carry a fetchedAt');
|
||||
}
|
||||
});
|
||||
|
||||
test('mixed: hormuz fulfilled, others rejected → only hormuz tile populated', () => {
|
||||
const state = buildOverviewState(
|
||||
fulfilled({ status: 'disrupted' }),
|
||||
rejected(),
|
||||
rejected(),
|
||||
rejected(),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(countDegradedTiles(state), 3);
|
||||
assert.equal(state.hormuz.status, 'fulfilled');
|
||||
assert.equal(state.hormuz.value?.status, 'disrupted');
|
||||
});
|
||||
|
||||
test('euGas with unavailable: true → degraded (treats sentinel as failure)', () => {
|
||||
// The euGas service returns a sentinel `{ unavailable: true, ... }`
|
||||
// shape on relay outage. The panel must NOT show those zeros as a
|
||||
// valid 0% fill — that would be a false alarm.
|
||||
const state = buildOverviewState(
|
||||
fulfilled({ status: 'open' }),
|
||||
fulfilled({ unavailable: true, fillPct: 0, fillPctChange1d: 0 }),
|
||||
fulfilled({ data: [{ price: 88, change: 0 }] }),
|
||||
fulfilled({ upstreamUnavailable: false, events: [] }),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.euGas.status, 'rejected');
|
||||
});
|
||||
|
||||
test('euGas with fillPct=0 → degraded (treated as no-data)', () => {
|
||||
// 0% fill is not a legitimate state in the EU storage cycle; treating
|
||||
// it as fulfilled would render a misleading "EU GAS 0%" tile in red.
|
||||
const state = buildOverviewState(
|
||||
rejected(),
|
||||
fulfilled({ unavailable: false, fillPct: 0, fillPctChange1d: 0 }),
|
||||
rejected(),
|
||||
rejected(),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.euGas.status, 'rejected');
|
||||
});
|
||||
|
||||
test('brent with empty data array → degraded', () => {
|
||||
const state = buildOverviewState(
|
||||
rejected(),
|
||||
rejected(),
|
||||
fulfilled({ data: [] }),
|
||||
rejected(),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.brent.status, 'rejected');
|
||||
});
|
||||
|
||||
test('brent with first quote price=null → degraded (no-data sentinel)', () => {
|
||||
const state = buildOverviewState(
|
||||
rejected(),
|
||||
rejected(),
|
||||
fulfilled({ data: [{ price: null, change: 0 }] }),
|
||||
rejected(),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.brent.status, 'rejected');
|
||||
});
|
||||
|
||||
test('disruptions with upstreamUnavailable: true → degraded', () => {
|
||||
const state = buildOverviewState(
|
||||
rejected(),
|
||||
rejected(),
|
||||
rejected(),
|
||||
fulfilled({ upstreamUnavailable: true, events: [] }),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.activeDisruptions.status, 'rejected');
|
||||
});
|
||||
|
||||
test('disruptions ongoing-only filter: only events with endAt===null count', () => {
|
||||
const state = buildOverviewState(
|
||||
rejected(),
|
||||
rejected(),
|
||||
rejected(),
|
||||
fulfilled({
|
||||
upstreamUnavailable: false,
|
||||
events: [
|
||||
{ endAt: null }, // ongoing
|
||||
{ endAt: '2026-04-20' }, // resolved
|
||||
{ endAt: undefined }, // ongoing (undefined is falsy too)
|
||||
{ endAt: '' }, // ongoing (empty string is falsy)
|
||||
{ endAt: null }, // ongoing
|
||||
],
|
||||
}),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.activeDisruptions.value?.count, 4);
|
||||
});
|
||||
|
||||
test('hormuz fulfilled but value.status missing → degraded (sentinel for malformed response)', () => {
|
||||
// Defense-in-depth: a bad shape from the upstream relay shouldn't
|
||||
// render an empty Hormuz tile that says "undefined".
|
||||
const state = buildOverviewState(
|
||||
fulfilled({} as { status?: string }),
|
||||
rejected(),
|
||||
rejected(),
|
||||
rejected(),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.hormuz.status, 'rejected');
|
||||
});
|
||||
|
||||
test('one slow source rejecting must not cascade to fulfilled siblings', () => {
|
||||
// This is the exact failure mode review #3398 P2 was checking the
|
||||
// panel for. With Promise.all, one rejection would short-circuit the
|
||||
// whole batch. With Promise.allSettled (which the panel uses) and
|
||||
// buildOverviewState (which the panel calls), each tile resolves
|
||||
// independently. Pin that contract.
|
||||
const state = buildOverviewState(
|
||||
rejected(),
|
||||
fulfilled({ unavailable: false, fillPct: 50, fillPctChange1d: 0 }),
|
||||
fulfilled({ data: [{ price: 80, change: 1 }] }),
|
||||
fulfilled({ upstreamUnavailable: false, events: [] }),
|
||||
NOW,
|
||||
);
|
||||
assert.equal(state.hormuz.status, 'rejected');
|
||||
assert.equal(state.euGas.status, 'fulfilled');
|
||||
assert.equal(state.brent.status, 'fulfilled');
|
||||
assert.equal(state.activeDisruptions.status, 'fulfilled');
|
||||
assert.equal(countDegradedTiles(state), 1);
|
||||
});
|
||||
});
|
||||
117
tests/fixtures/gem-pipelines-sample.json
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
{
|
||||
"source": "Global Energy Monitor — Oil & Gas Infrastructure Trackers (CC-BY 4.0)",
|
||||
"sourceVersion": "2026-Q1-fixture",
|
||||
"sourceUrl": "https://globalenergymonitor.org/projects/global-oil-gas-infrastructure-tracker/",
|
||||
"downloadedAt": "2026-04-25",
|
||||
"_note": "Trimmed 6-row fixture for parser tests. Real input has the same shape — the operator pre-converts the GEM Excel release to this JSON form externally (Numbers / pandas / csvkit). Real production runs ingest hundreds of rows; this fixture is intentionally minimal and covers the status, productClass, capacity-unit, and bbox-validity mapping cases.",
|
||||
"pipelines": [
|
||||
{
|
||||
"name": "Test Operating Gas Trunk",
|
||||
"operator": "Test Gas Operator",
|
||||
"fuel": "Natural Gas",
|
||||
"product": "",
|
||||
"fromCountry": "NO",
|
||||
"toCountry": "DE",
|
||||
"transitCountries": [],
|
||||
"capacity": 24,
|
||||
"capacityUnit": "bcm/y",
|
||||
"lengthKm": 850,
|
||||
"status": "Operating",
|
||||
"startYear": 1995,
|
||||
"startLat": 58.5,
|
||||
"startLon": 1.7,
|
||||
"endLat": 53.5,
|
||||
"endLon": 8.5
|
||||
},
|
||||
{
|
||||
"name": "Test Construction Gas Pipe",
|
||||
"operator": "Test Builder",
|
||||
"fuel": "Natural Gas",
|
||||
"product": "",
|
||||
"fromCountry": "AZ",
|
||||
"toCountry": "TR",
|
||||
"transitCountries": ["GE"],
|
||||
"capacity": 16,
|
||||
"capacityUnit": "bcm/y",
|
||||
"lengthKm": 1840,
|
||||
"status": "Construction",
|
||||
"startYear": 2027,
|
||||
"startLat": 40.4,
|
||||
"startLon": 49.9,
|
||||
"endLat": 40.9,
|
||||
"endLon": 28.9
|
||||
},
|
||||
{
|
||||
"name": "Test Cancelled Gas Pipeline",
|
||||
"operator": "Test Cancelled Sponsor",
|
||||
"fuel": "Natural Gas",
|
||||
"product": "",
|
||||
"fromCountry": "GR",
|
||||
"toCountry": "BG",
|
||||
"transitCountries": [],
|
||||
"capacity": 3,
|
||||
"capacityUnit": "bcm/y",
|
||||
"lengthKm": 180,
|
||||
"status": "Cancelled",
|
||||
"startYear": null,
|
||||
"startLat": 40.6,
|
||||
"startLon": 22.9,
|
||||
"endLat": 42.5,
|
||||
"endLon": 25.0
|
||||
},
|
||||
{
|
||||
"name": "Test Crude Oil Trunk",
|
||||
"operator": "Test Oil Operator",
|
||||
"fuel": "Oil",
|
||||
"product": "Crude Oil",
|
||||
"fromCountry": "KZ",
|
||||
"toCountry": "CN",
|
||||
"transitCountries": [],
|
||||
"capacity": 400000,
|
||||
"capacityUnit": "bbl/d",
|
||||
"lengthKm": 2200,
|
||||
"status": "Operating",
|
||||
"startYear": 2009,
|
||||
"startLat": 47.1,
|
||||
"startLon": 51.9,
|
||||
"endLat": 45.0,
|
||||
"endLon": 87.6
|
||||
},
|
||||
{
|
||||
"name": "Test Refined Products Line",
|
||||
"operator": "Test Products Operator",
|
||||
"fuel": "Oil",
|
||||
"product": "Refined Products",
|
||||
"fromCountry": "US",
|
||||
"toCountry": "US",
|
||||
"transitCountries": [],
|
||||
"capacity": 0.65,
|
||||
"capacityUnit": "Mbd",
|
||||
"lengthKm": 1500,
|
||||
"status": "Operating",
|
||||
"startYear": 1962,
|
||||
"startLat": 29.7,
|
||||
"startLon": -94.2,
|
||||
"endLat": 41.0,
|
||||
"endLon": -73.9
|
||||
},
|
||||
{
|
||||
"name": "Test Mothballed Crude Bypass",
|
||||
"operator": "Test Mothballed Operator",
|
||||
"fuel": "Oil",
|
||||
"product": "Crude Oil",
|
||||
"fromCountry": "IQ",
|
||||
"toCountry": "TR",
|
||||
"transitCountries": [],
|
||||
"capacity": 1000000,
|
||||
"capacityUnit": "bbl/d",
|
||||
"lengthKm": 970,
|
||||
"status": "Mothballed",
|
||||
"startYear": 1977,
|
||||
"startLat": 35.5,
|
||||
"startLon": 44.4,
|
||||
"endLat": 36.2,
|
||||
"endLon": 36.1
|
||||
}
|
||||
]
|
||||
}
|
||||
281
tests/import-gem-pipelines.test.mjs
Normal file
@@ -0,0 +1,281 @@
|
||||
// @ts-check
|
||||
//
|
||||
// Tests for scripts/import-gem-pipelines.mjs — the GEM Oil & Gas Infrastructure
|
||||
// Tracker → registry-shape parser. Test-first per the plan's Execution note: the
|
||||
// schema-sentinel + status/productClass/capacity-unit mapping is the highest-
|
||||
// risk failure mode, so coverage for it lands before the implementation does.
|
||||
//
|
||||
// Fixture: tests/fixtures/gem-pipelines-sample.json — operator-shape JSON
|
||||
// (Excel pre-converted externally; the parser is local-file-only, no xlsx
|
||||
// dep, no runtime URL fetch).
|
||||
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { test, describe } from 'node:test';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
import { parseGemPipelines, REQUIRED_COLUMNS } from '../scripts/import-gem-pipelines.mjs';
|
||||
import { validateRegistry } from '../scripts/_pipeline-registry.mjs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const fixturePath = resolve(__dirname, 'fixtures/gem-pipelines-sample.json');
|
||||
const fixture = JSON.parse(readFileSync(fixturePath, 'utf-8'));
|
||||
|
||||
describe('import-gem-pipelines — schema sentinel', () => {
|
||||
test('REQUIRED_COLUMNS is exported and non-empty', () => {
|
||||
assert.ok(Array.isArray(REQUIRED_COLUMNS));
|
||||
assert.ok(REQUIRED_COLUMNS.length >= 5);
|
||||
});
|
||||
|
||||
test('throws on missing required column', () => {
|
||||
const broken = {
|
||||
...fixture,
|
||||
pipelines: fixture.pipelines.map((p) => {
|
||||
const { name: _drop, ...rest } = p;
|
||||
return rest;
|
||||
}),
|
||||
};
|
||||
assert.throws(
|
||||
() => parseGemPipelines(broken),
|
||||
/missing|name|schema/i,
|
||||
'parser must throw on column drift, not silently accept',
|
||||
);
|
||||
});
|
||||
|
||||
test('throws on non-object input', () => {
|
||||
assert.throws(() => parseGemPipelines(null), /input/i);
|
||||
assert.throws(() => parseGemPipelines([]), /input|pipelines/i);
|
||||
});
|
||||
|
||||
test('throws when pipelines field is missing', () => {
|
||||
assert.throws(() => parseGemPipelines({ source: 'test' }), /pipelines/i);
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — fuel split', () => {
|
||||
test('splits gas + oil into two arrays', () => {
|
||||
const { gas, oil } = parseGemPipelines(fixture);
|
||||
assert.equal(gas.length, 3, 'fixture has 3 gas rows');
|
||||
assert.equal(oil.length, 3, 'fixture has 3 oil rows');
|
||||
});
|
||||
|
||||
test('gas pipelines do NOT carry productClass (gas registry forbids it)', () => {
|
||||
const { gas } = parseGemPipelines(fixture);
|
||||
for (const p of gas) {
|
||||
assert.equal(p.productClass, undefined, `${p.name}: gas should not have productClass`);
|
||||
}
|
||||
});
|
||||
|
||||
test('every oil pipeline declares a productClass from the enum', () => {
|
||||
const { oil } = parseGemPipelines(fixture);
|
||||
for (const p of oil) {
|
||||
assert.ok(
|
||||
['crude', 'products', 'mixed'].includes(p.productClass),
|
||||
`${p.name} has invalid productClass: ${p.productClass}`,
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — status mapping', () => {
|
||||
test("'Operating' maps to physicalState='flowing'", () => {
|
||||
const { gas, oil } = parseGemPipelines(fixture);
|
||||
const op = [...gas, ...oil].filter((p) => p.name.includes('Operating'));
|
||||
assert.ok(op.length > 0);
|
||||
for (const p of op) {
|
||||
assert.equal(p.evidence.physicalState, 'flowing');
|
||||
}
|
||||
});
|
||||
|
||||
test("'Construction' maps to physicalState='unknown' (planned/not commissioned)", () => {
|
||||
const { gas } = parseGemPipelines(fixture);
|
||||
const ctr = gas.find((p) => p.name.includes('Construction'));
|
||||
assert.ok(ctr);
|
||||
assert.equal(ctr.evidence.physicalState, 'unknown');
|
||||
});
|
||||
|
||||
test("'Cancelled' / 'Mothballed' map to physicalState='offline'", () => {
|
||||
const { gas, oil } = parseGemPipelines(fixture);
|
||||
const cancelled = gas.find((p) => p.name.includes('Cancelled'));
|
||||
const mothballed = oil.find((p) => p.name.includes('Mothballed'));
|
||||
assert.ok(cancelled);
|
||||
assert.ok(mothballed);
|
||||
assert.equal(cancelled.evidence.physicalState, 'offline');
|
||||
assert.equal(mothballed.evidence.physicalState, 'offline');
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — productClass mapping', () => {
|
||||
test("'Crude Oil' product → productClass='crude'", () => {
|
||||
const { oil } = parseGemPipelines(fixture);
|
||||
const crude = oil.find((p) => p.name.includes('Crude Oil Trunk'));
|
||||
assert.ok(crude);
|
||||
assert.equal(crude.productClass, 'crude');
|
||||
});
|
||||
|
||||
test("'Refined Products' product → productClass='products'", () => {
|
||||
const { oil } = parseGemPipelines(fixture);
|
||||
const refined = oil.find((p) => p.name.includes('Refined Products'));
|
||||
assert.ok(refined);
|
||||
assert.equal(refined.productClass, 'products');
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — capacity-unit conversion', () => {
|
||||
test('gas capacity in bcm/y is preserved unchanged', () => {
|
||||
const { gas } = parseGemPipelines(fixture);
|
||||
const opGas = gas.find((p) => p.name.includes('Operating'));
|
||||
assert.ok(opGas);
|
||||
assert.equal(opGas.capacityBcmYr, 24);
|
||||
});
|
||||
|
||||
test('oil capacity in bbl/d is converted to Mbd (thousand barrels per day)', () => {
|
||||
const { oil } = parseGemPipelines(fixture);
|
||||
const crude = oil.find((p) => p.name.includes('Crude Oil Trunk'));
|
||||
assert.ok(crude);
|
||||
// Schema convention: the field is named `capacityMbd` (the customary
|
||||
// industry abbreviation) but the VALUE is in millions of barrels per
|
||||
// day, NOT thousands — matching the existing on-main hand-curated rows
|
||||
// (e.g. CPC pipeline ships as `capacityMbd: 1.4` for 1.4M bbl/d).
|
||||
// So 400_000 bbl/d ÷ 1_000_000 = 0.4 capacityMbd.
|
||||
assert.equal(crude.capacityMbd, 0.4);
|
||||
});
|
||||
|
||||
test('oil capacity already in Mbd is preserved unchanged', () => {
|
||||
const { oil } = parseGemPipelines(fixture);
|
||||
const refined = oil.find((p) => p.name.includes('Refined Products'));
|
||||
assert.ok(refined);
|
||||
assert.equal(refined.capacityMbd, 0.65);
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — minimum-viable evidence', () => {
|
||||
test('every emitted candidate has physicalStateSource=gem', () => {
|
||||
const { gas, oil } = parseGemPipelines(fixture);
|
||||
for (const p of [...gas, ...oil]) {
|
||||
assert.equal(p.evidence.physicalStateSource, 'gem');
|
||||
}
|
||||
});
|
||||
|
||||
test('every emitted candidate has classifierVersion=gem-import-v1', () => {
|
||||
const { gas, oil } = parseGemPipelines(fixture);
|
||||
for (const p of [...gas, ...oil]) {
|
||||
assert.equal(p.evidence.classifierVersion, 'gem-import-v1');
|
||||
}
|
||||
});
|
||||
|
||||
test('every emitted candidate has classifierConfidence ≤ 0.5', () => {
|
||||
const { gas, oil } = parseGemPipelines(fixture);
|
||||
for (const p of [...gas, ...oil]) {
|
||||
assert.ok(p.evidence.classifierConfidence <= 0.5);
|
||||
assert.ok(p.evidence.classifierConfidence >= 0);
|
||||
}
|
||||
});
|
||||
|
||||
test('every emitted candidate has empty sanctionRefs and null operatorStatement', () => {
|
||||
const { gas, oil } = parseGemPipelines(fixture);
|
||||
for (const p of [...gas, ...oil]) {
|
||||
assert.deepEqual(p.evidence.sanctionRefs, []);
|
||||
assert.equal(p.evidence.operatorStatement, null);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — registry-shape conformance', () => {
|
||||
// Compute the repeat count from the floor + the fixture row count so this
|
||||
// test stays correct if the fixture is trimmed or the floor is raised. The
|
||||
// hardcoded `for (let i = 0; i < 70; i++)` was fragile — Greptile P2 on PR
|
||||
// #3406. +5 over the floor leaves a safety margin without inflating the test.
|
||||
const REGISTRY_FLOOR = 200;
|
||||
|
||||
test('emitted gas registry passes validateRegistry', () => {
|
||||
const { gas } = parseGemPipelines(fixture);
|
||||
const reps = Math.ceil(REGISTRY_FLOOR / gas.length) + 5;
|
||||
const repeated = [];
|
||||
for (let i = 0; i < reps; i++) {
|
||||
for (const p of gas) repeated.push({ ...p, id: `${p.id}-rep${i}` });
|
||||
}
|
||||
const reg = {
|
||||
pipelines: Object.fromEntries(repeated.map((p) => [p.id, p])),
|
||||
};
|
||||
assert.equal(validateRegistry(reg), true);
|
||||
});
|
||||
|
||||
test('emitted oil registry passes validateRegistry', () => {
|
||||
const { oil } = parseGemPipelines(fixture);
|
||||
const reps = Math.ceil(REGISTRY_FLOOR / oil.length) + 5;
|
||||
const repeated = [];
|
||||
for (let i = 0; i < reps; i++) {
|
||||
for (const p of oil) repeated.push({ ...p, id: `${p.id}-rep${i}` });
|
||||
}
|
||||
const reg = {
|
||||
pipelines: Object.fromEntries(repeated.map((p) => [p.id, p])),
|
||||
};
|
||||
assert.equal(validateRegistry(reg), true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — determinism (review-fix #3)', () => {
|
||||
test('two parser runs on identical input produce identical output', () => {
|
||||
// Regression: pre-fix, lastEvidenceUpdate used new Date() per run, so
|
||||
// re-running parseGemPipelines on the same JSON on different days
|
||||
// produced different output → noisy diffs every quarterly re-import.
|
||||
// Now derived from envelope.downloadedAt, so output is byte-identical.
|
||||
const r1 = JSON.stringify(parseGemPipelines(fixture));
|
||||
const r2 = JSON.stringify(parseGemPipelines(fixture));
|
||||
assert.equal(r1, r2);
|
||||
});
|
||||
|
||||
test('lastEvidenceUpdate derives from envelope.downloadedAt', () => {
|
||||
// Fixture has downloadedAt: 2026-04-25 → emitted as 2026-04-25T00:00:00Z.
|
||||
const { gas } = parseGemPipelines(fixture);
|
||||
for (const p of gas) {
|
||||
assert.equal(p.evidence.lastEvidenceUpdate, '2026-04-25T00:00:00Z');
|
||||
}
|
||||
});
|
||||
|
||||
test('missing downloadedAt → epoch sentinel (loud failure, not silent today)', () => {
|
||||
// If the operator forgets the date field, the emitted timestamp should
|
||||
// be obviously wrong rather than today's wall clock — surfaces the
|
||||
// gap in code review of the data file.
|
||||
const noDate = { ...fixture };
|
||||
delete noDate.downloadedAt;
|
||||
delete noDate.sourceVersion;
|
||||
const { gas } = parseGemPipelines(noDate);
|
||||
for (const p of gas) {
|
||||
assert.equal(p.evidence.lastEvidenceUpdate, '1970-01-01T00:00:00Z');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('import-gem-pipelines — coordinate validity', () => {
|
||||
test('rows with invalid lat/lon are dropped (not silently kept with lat=0)', () => {
|
||||
const broken = {
|
||||
...fixture,
|
||||
pipelines: [
|
||||
...fixture.pipelines,
|
||||
{
|
||||
name: 'Test Bad Coords',
|
||||
operator: 'X',
|
||||
fuel: 'Natural Gas',
|
||||
product: '',
|
||||
fromCountry: 'XX',
|
||||
toCountry: 'YY',
|
||||
transitCountries: [],
|
||||
capacity: 5,
|
||||
capacityUnit: 'bcm/y',
|
||||
lengthKm: 100,
|
||||
status: 'Operating',
|
||||
startYear: 2020,
|
||||
startLat: 200, // out of range
|
||||
startLon: 0,
|
||||
endLat: 0,
|
||||
endLon: 0,
|
||||
},
|
||||
],
|
||||
};
|
||||
const { gas } = parseGemPipelines(broken);
|
||||
const bad = gas.find((p) => p.name.includes('Bad Coords'));
|
||||
assert.equal(bad, undefined, 'row with out-of-range lat must be dropped, not coerced');
|
||||
});
|
||||
});
|
||||
98
tests/live-tankers-service.test.mts
Normal file
@@ -0,0 +1,98 @@
|
||||
// @ts-check
|
||||
//
|
||||
// Tests for src/services/live-tankers.ts — the chokepoint-bbox tanker fetch
|
||||
// helper. We test the pure helpers (bbox derivation, default-chokepoint
|
||||
// filter, cache-TTL constant) since the network-fetching path needs the
|
||||
// running getVesselSnapshot RPC + relay to exercise meaningfully.
|
||||
//
|
||||
// The real Promise.allSettled + caching behavior is more naturally
|
||||
// exercised by the existing E2E browser smoke test once the layer is live;
|
||||
// these tests pin the surface that doesn't require network.
|
||||
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { test, describe } from 'node:test';
|
||||
import { _internal } from '../src/services/live-tankers.ts';
|
||||
|
||||
const { bboxFor, getDefaultChokepoints, BBOX_HALF_DEGREES, CACHE_TTL_MS } = _internal;
|
||||
|
||||
describe('live-tankers — defaults', () => {
|
||||
test('default chokepoint set is the energy-relevant 6', () => {
|
||||
const ids = getDefaultChokepoints().map((c) => c.id).sort();
|
||||
assert.deepEqual(ids, [
|
||||
'bab_el_mandeb',
|
||||
'bosphorus',
|
||||
'hormuz_strait',
|
||||
'malacca_strait',
|
||||
'panama',
|
||||
'suez',
|
||||
]);
|
||||
});
|
||||
|
||||
test('cache TTL matches the gateway live-tier s-maxage (60s)', () => {
|
||||
// If these drift apart, the CDN cache will serve stale data while the
|
||||
// service-level cache is still warm — confusing. Pin both at 60_000ms.
|
||||
assert.equal(CACHE_TTL_MS, 60_000);
|
||||
});
|
||||
|
||||
test('bbox half-width is ±2 degrees', () => {
|
||||
assert.equal(BBOX_HALF_DEGREES, 2);
|
||||
});
|
||||
});
|
||||
|
||||
describe('live-tankers — AbortSignal behavior', () => {
|
||||
test('fetchLiveTankers accepts an options.signal parameter', async () => {
|
||||
// Pin the signature so future edits can't accidentally drop the signal
|
||||
// parameter and silently re-introduce the race-write bug Codex flagged
|
||||
// on PR #3402: a slow older refresh overwriting a newer one because
|
||||
// the abort controller wasn't actually wired into the fetch.
|
||||
const { fetchLiveTankers } = await import('../src/services/live-tankers.ts');
|
||||
const controller = new AbortController();
|
||||
controller.abort(); // pre-aborted
|
||||
const result = await fetchLiveTankers([], { signal: controller.signal });
|
||||
assert.deepEqual(result, [], 'empty chokepoint list → empty result regardless of signal state');
|
||||
});
|
||||
});
|
||||
|
||||
describe('live-tankers — bbox derivation', () => {
|
||||
test('bbox is centered on the chokepoint with ±2° padding', () => {
|
||||
const synth = {
|
||||
id: 'test',
|
||||
displayName: 'Test',
|
||||
geoId: 'test',
|
||||
relayName: 'Test',
|
||||
portwatchName: 'Test',
|
||||
corridorRiskName: null,
|
||||
baselineId: null,
|
||||
shockModelSupported: false,
|
||||
routeIds: [],
|
||||
lat: 26.5,
|
||||
lon: 56.5,
|
||||
};
|
||||
const bbox = bboxFor(synth);
|
||||
assert.equal(bbox.swLat, 24.5);
|
||||
assert.equal(bbox.swLon, 54.5);
|
||||
assert.equal(bbox.neLat, 28.5);
|
||||
assert.equal(bbox.neLon, 58.5);
|
||||
});
|
||||
|
||||
test('bbox total span is 4° on both axes (under the 10° handler guard)', () => {
|
||||
const synth = {
|
||||
id: 'test',
|
||||
displayName: 'Test',
|
||||
geoId: 'test',
|
||||
relayName: 'Test',
|
||||
portwatchName: 'Test',
|
||||
corridorRiskName: null,
|
||||
baselineId: null,
|
||||
shockModelSupported: false,
|
||||
routeIds: [],
|
||||
lat: 0,
|
||||
lon: 0,
|
||||
};
|
||||
const bbox = bboxFor(synth);
|
||||
assert.equal(bbox.neLat - bbox.swLat, 4);
|
||||
assert.equal(bbox.neLon - bbox.swLon, 4);
|
||||
assert.ok(bbox.neLat - bbox.swLat <= 10, 'must stay under handler 10° guard');
|
||||
assert.ok(bbox.neLon - bbox.swLon <= 10, 'must stay under handler 10° guard');
|
||||
});
|
||||
});
|
||||
258
tests/pipeline-dedup.test.mjs
Normal file
@@ -0,0 +1,258 @@
|
||||
// @ts-check
|
||||
//
|
||||
// Tests for scripts/_pipeline-dedup.mjs — the haversine + Jaccard dedup
|
||||
// helper. Both criteria (≤5km AND ≥0.6) must hold for a match. Existing rows
|
||||
// always win to preserve hand-curated evidence.
|
||||
|
||||
import { strict as assert } from 'node:assert';
|
||||
import { test, describe } from 'node:test';
|
||||
import { dedupePipelines, _internal } from '../scripts/_pipeline-dedup.mjs';
|
||||
|
||||
const { jaccard, averageEndpointDistanceKm, tokenize, uniqueId } = _internal;
|
||||
|
||||
function makePipeline(id, name, startLat, startLon, endLat, endLon) {
|
||||
return {
|
||||
id,
|
||||
name,
|
||||
startPoint: { lat: startLat, lon: startLon },
|
||||
endPoint: { lat: endLat, lon: endLon },
|
||||
};
|
||||
}
|
||||
|
||||
describe('pipeline-dedup — internal helpers', () => {
|
||||
test('tokenize lowercases, splits, drops stopwords', () => {
|
||||
const tokens = tokenize('Trans-Siberian Pipeline System');
|
||||
assert.deepEqual(tokens.sort(), ['siberian', 'trans']);
|
||||
});
|
||||
|
||||
test('tokenize removes punctuation and accents', () => {
|
||||
const tokens = tokenize('Caño Limón–Coveñas Pipeline');
|
||||
// After NFKD normalization + ascii-only filter, accented chars survive
|
||||
// as their base letter; we accept either exact or close behaviour.
|
||||
assert.ok(tokens.includes('limon') || tokens.includes('lim'),
|
||||
`expected Limón to tokenize; got ${tokens.join(',')}`);
|
||||
});
|
||||
|
||||
test('jaccard returns 1.0 for identical token sets', () => {
|
||||
assert.equal(jaccard('Test Pipeline System', 'Test Pipeline'), 1.0);
|
||||
});
|
||||
|
||||
test('jaccard returns 0 for fully disjoint names', () => {
|
||||
assert.equal(jaccard('Druzhba North', 'Nord Stream'), 0);
|
||||
});
|
||||
|
||||
test('jaccard 0.5 for half-overlap', () => {
|
||||
assert.equal(jaccard('Trans Adriatic', 'Trans Caspian'), 1 / 3);
|
||||
});
|
||||
|
||||
test('haversine distance is symmetric', () => {
|
||||
const a = makePipeline('a', 'A', 60, 30, 54, 13);
|
||||
const b = makePipeline('b', 'B', 60.001, 30.001, 54.001, 13.001);
|
||||
assert.ok(averageEndpointDistanceKm(a, b) < 1, 'sub-km on tiny offsets');
|
||||
});
|
||||
|
||||
test('haversine distance for far-apart pipelines is large', () => {
|
||||
const a = makePipeline('a', 'A', 60, 30, 54, 13); // RU→DE
|
||||
const b = makePipeline('b', 'B', 30, -90, 25, -85); // Gulf of Mexico
|
||||
assert.ok(averageEndpointDistanceKm(a, b) > 5000);
|
||||
});
|
||||
|
||||
test('uniqueId preserves base when free, suffixes when taken', () => {
|
||||
const taken = new Set(['foo', 'foo-2']);
|
||||
assert.equal(uniqueId('bar', taken), 'bar');
|
||||
assert.equal(uniqueId('foo', taken), 'foo-3');
|
||||
});
|
||||
});
|
||||
|
||||
describe('pipeline-dedup — match logic', () => {
|
||||
test('happy path: completely-different name + far endpoints → added', () => {
|
||||
const existing = [makePipeline('druzhba-north', 'Druzhba Pipeline (Northern Branch)',
|
||||
52.6, 49.4, 52.32, 14.06)];
|
||||
const candidates = [makePipeline('nord-stream-1', 'Nord Stream 1',
|
||||
60.08, 29.05, 54.14, 13.66)];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 1);
|
||||
assert.equal(skippedDuplicates.length, 0);
|
||||
});
|
||||
|
||||
test('match by both criteria: close endpoints + similar name → skipped (existing wins)', () => {
|
||||
const existing = [makePipeline('druzhba-north', 'Druzhba Pipeline',
|
||||
52.6, 49.4, 52.32, 14.06)];
|
||||
const candidates = [makePipeline('druzhba-import', 'Druzhba Pipeline',
|
||||
52.601, 49.401, 52.321, 14.061)];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 0);
|
||||
assert.equal(skippedDuplicates.length, 1);
|
||||
assert.equal(skippedDuplicates[0].matchedExistingId, 'druzhba-north');
|
||||
});
|
||||
|
||||
test('identical names + one shared terminus (≤25 km) → deduped (PR #3406 Dampier-Bunbury regression)', () => {
|
||||
// Real-world case from PR #3406 review: GEM digitized only the southern
|
||||
// 60% of the line, so the shared Bunbury terminus matched at 13.7 km
|
||||
// but the average-endpoint distance was 287 km (over the 5 km gate).
|
||||
// Identical token sets + ≥1 close pairing = same physical pipeline.
|
||||
const existing = [makePipeline('dampier-bunbury', 'Dampier to Bunbury Natural Gas Pipeline',
|
||||
-20.68, 116.72, -33.33, 115.63)];
|
||||
const candidates = [makePipeline('dampier-to-bunbury-natural-gas-pipeline-au',
|
||||
'Dampier to Bunbury Natural Gas Pipeline',
|
||||
-33.265797, 115.755682, -24.86854, 113.674968)];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 0);
|
||||
assert.equal(skippedDuplicates.length, 1);
|
||||
assert.equal(skippedDuplicates[0].matchedExistingId, 'dampier-bunbury');
|
||||
});
|
||||
|
||||
test('name-match only (endpoints in different ocean) → added', () => {
|
||||
const existing = [makePipeline('nord-stream-1', 'Nord Stream 1',
|
||||
60.08, 29.05, 54.14, 13.66)];
|
||||
const candidates = [makePipeline('imposter', 'Nord Stream 1',
|
||||
40.0, -100.0, 35.0, -90.0)]; // different continent
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 1, 'low haversine confidence overrides high name match');
|
||||
assert.equal(skippedDuplicates.length, 0);
|
||||
});
|
||||
|
||||
test('endpoint-match only (different name) → added (real distinct pipelines can share endpoints)', () => {
|
||||
const existing = [makePipeline('yamal-europe', 'Yamal–Europe',
|
||||
67.0, 75.0, 52.0, 14.0)];
|
||||
const candidates = [makePipeline('different-route', 'Trans-Siberian Coal Slurry',
|
||||
67.001, 75.001, 52.001, 14.001)];
|
||||
const { toAdd } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 1, 'name disambiguates: same endpoints, different infrastructure');
|
||||
});
|
||||
|
||||
test('reverse-direction match: candidate endpoints flipped → still detected', () => {
|
||||
const existing = [makePipeline('druzhba', 'Druzhba',
|
||||
52.6, 49.4, 52.32, 14.06)];
|
||||
// Same pipeline, route described in reverse direction
|
||||
const candidates = [makePipeline('druzhba-flipped', 'Druzhba',
|
||||
52.32, 14.06, 52.6, 49.4)];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 0);
|
||||
assert.equal(skippedDuplicates.length, 1);
|
||||
});
|
||||
|
||||
test('stopword-only difference: "Pipeline System" vs "Line" → matches by Jaccard', () => {
|
||||
const existing = [makePipeline('trans-sib', 'Trans-Siberian Pipeline System',
|
||||
55, 30, 60, 90)];
|
||||
const candidates = [makePipeline('trans-sib-cand', 'Trans-Siberian Line',
|
||||
55.001, 30.001, 60.001, 90.001)];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 0);
|
||||
assert.equal(skippedDuplicates.length, 1);
|
||||
assert.ok(skippedDuplicates[0].jaccard >= 0.6);
|
||||
});
|
||||
});
|
||||
|
||||
describe('pipeline-dedup — id collision', () => {
|
||||
test('candidate with id colliding existing gets suffixed -2', () => {
|
||||
const existing = [makePipeline('foo', 'Foo Pipeline', 0, 0, 1, 1)];
|
||||
const candidates = [makePipeline('foo', 'Bar Pipeline', 50, 50, 60, 60)];
|
||||
const { toAdd } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 1);
|
||||
assert.equal(toAdd[0].id, 'foo-2');
|
||||
});
|
||||
|
||||
test('three candidates colliding the same existing id get -2, -3, -4', () => {
|
||||
const existing = [makePipeline('foo', 'Foo Pipeline', 0, 0, 1, 1)];
|
||||
const candidates = [
|
||||
makePipeline('foo', 'Bar Pipeline', 50, 50, 60, 60),
|
||||
makePipeline('foo', 'Baz Pipeline', 70, 70, 80, 80),
|
||||
makePipeline('foo', 'Qux Pipeline', 30, -30, 40, -40),
|
||||
];
|
||||
const { toAdd } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 3);
|
||||
assert.deepEqual(
|
||||
toAdd.map((p) => p.id).sort(),
|
||||
['foo-2', 'foo-3', 'foo-4'],
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('pipeline-dedup — determinism', () => {
|
||||
test('two invocations on identical inputs produce identical output', () => {
|
||||
const existing = [
|
||||
makePipeline('a', 'Alpha Pipeline', 10, 10, 20, 20),
|
||||
makePipeline('b', 'Beta Pipeline', 30, 30, 40, 40),
|
||||
];
|
||||
const candidates = [
|
||||
makePipeline('a', 'Alpha Pipeline', 10.001, 10.001, 20.001, 20.001),
|
||||
makePipeline('c', 'Gamma Pipeline', 50, 50, 60, 60),
|
||||
];
|
||||
const r1 = dedupePipelines(existing, candidates);
|
||||
const r2 = dedupePipelines(existing, candidates);
|
||||
assert.deepEqual(
|
||||
r1.toAdd.map((p) => p.id),
|
||||
r2.toAdd.map((p) => p.id),
|
||||
);
|
||||
assert.deepEqual(
|
||||
r1.skippedDuplicates.map((d) => d.matchedExistingId),
|
||||
r2.skippedDuplicates.map((d) => d.matchedExistingId),
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('pipeline-dedup — within-batch dedup (review fix)', () => {
|
||||
test('two candidates that match each other but not any existing → only first is added', () => {
|
||||
// Regression: pre-fix, dedup compared each candidate ONLY against the
|
||||
// original `existing` array, so two GEM rows for the same pipeline (e.g.
|
||||
// a primary entry and a duplicate from a different source spreadsheet)
|
||||
// would BOTH end up in the registry.
|
||||
const candidates = [
|
||||
makePipeline('east-west-saudi', 'East-West Crude Pipeline', 25, 49, 24, 38),
|
||||
// Same pipeline, slightly different name + endpoints (within match
|
||||
// tolerance). Should be skipped as a duplicate of the first candidate.
|
||||
makePipeline('saudi-petroline', 'East-West Crude', 25.001, 49.001, 24.001, 38.001),
|
||||
];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines([], candidates);
|
||||
assert.equal(toAdd.length, 1, 'second matching candidate must be skipped');
|
||||
assert.equal(skippedDuplicates.length, 1);
|
||||
assert.equal(toAdd[0].id, 'east-west-saudi', 'first-accepted candidate wins (deterministic)');
|
||||
assert.equal(skippedDuplicates[0].matchedExistingId, 'east-west-saudi',
|
||||
'skipped candidate matches the earlier-accepted one, not anything in `existing`');
|
||||
});
|
||||
|
||||
test('three candidates with transitive matches collapse to one', () => {
|
||||
const candidates = [
|
||||
makePipeline('a', 'Druzhba', 52.6, 49.4, 52.32, 14.06),
|
||||
makePipeline('b', 'Druzhba Pipeline', 52.601, 49.401, 52.321, 14.061),
|
||||
makePipeline('c', 'Druzhba Line', 52.602, 49.402, 52.322, 14.062),
|
||||
];
|
||||
const { toAdd } = dedupePipelines([], candidates);
|
||||
assert.equal(toAdd.length, 1, 'three matching candidates must collapse to the first one accepted');
|
||||
});
|
||||
|
||||
test('existing wins over already-accepted candidate', () => {
|
||||
// If a candidate matches an existing row, it must be reported as
|
||||
// matching the existing row (existing-vs-toAdd precedence). Names
|
||||
// chosen so Jaccard exceeds 0.6 after stopword removal.
|
||||
const existing = [makePipeline('canon', 'Druzhba Northern', 52.6, 49.4, 52.32, 14.06)];
|
||||
const candidates = [
|
||||
makePipeline('cand-1', 'Druzhba Northern', 60, 30, 50, 14), // doesn't match existing (far endpoints)
|
||||
makePipeline('cand-2', 'Druzhba Northern', 52.601, 49.401, 52.321, 14.061), // matches existing (near + Jaccard=1)
|
||||
];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, candidates);
|
||||
assert.equal(toAdd.length, 1, 'cand-1 added; cand-2 skipped against existing');
|
||||
assert.equal(skippedDuplicates[0].matchedExistingId, 'canon',
|
||||
'cand-2 should be reported as matching the existing canon, not the earlier candidate');
|
||||
});
|
||||
});
|
||||
|
||||
describe('pipeline-dedup — empty inputs', () => {
|
||||
test('empty existing + N candidates → all N added, none skipped', () => {
|
||||
const candidates = [
|
||||
makePipeline('a', 'A', 0, 0, 1, 1),
|
||||
makePipeline('b', 'B', 5, 5, 6, 6),
|
||||
];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines([], candidates);
|
||||
assert.equal(toAdd.length, 2);
|
||||
assert.equal(skippedDuplicates.length, 0);
|
||||
});
|
||||
|
||||
test('N existing + empty candidates → empty result', () => {
|
||||
const existing = [makePipeline('a', 'A', 0, 0, 1, 1)];
|
||||
const { toAdd, skippedDuplicates } = dedupePipelines(existing, []);
|
||||
assert.equal(toAdd.length, 0);
|
||||
assert.equal(skippedDuplicates.length, 0);
|
||||
});
|
||||
});
|
||||
@@ -11,6 +11,7 @@ import {
|
||||
GAS_CANONICAL_KEY,
|
||||
OIL_CANONICAL_KEY,
|
||||
VALID_OIL_PRODUCT_CLASSES,
|
||||
VALID_SOURCES,
|
||||
} from '../scripts/_pipeline-registry.mjs';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
@@ -87,7 +88,7 @@ describe('pipeline registries — evidence', () => {
|
||||
const hasEvidence =
|
||||
p.evidence.operatorStatement != null ||
|
||||
p.evidence.sanctionRefs.length > 0 ||
|
||||
['ais-relay', 'satellite', 'press'].includes(p.evidence.physicalStateSource);
|
||||
['ais-relay', 'satellite', 'press', 'gem'].includes(p.evidence.physicalStateSource);
|
||||
assert.ok(hasEvidence, `${p.id} has no supporting evidence for state=${p.evidence.physicalState}`);
|
||||
}
|
||||
});
|
||||
@@ -156,7 +157,7 @@ describe('pipeline registries — productClass', () => {
|
||||
const { productClass: _drop, ...stripped } = oilSample;
|
||||
const bad = {
|
||||
pipelines: Object.fromEntries(
|
||||
Array.from({ length: 8 }, (_, i) => [`p${i}`, { ...stripped, id: `p${i}` }]),
|
||||
Array.from({ length: 210 }, (_, i) => [`p${i}`, { ...stripped, id: `p${i}` }]),
|
||||
),
|
||||
};
|
||||
assert.equal(validateRegistry(bad), false);
|
||||
@@ -166,7 +167,7 @@ describe('pipeline registries — productClass', () => {
|
||||
const oilSample = oil.pipelines[Object.keys(oil.pipelines)[0]!];
|
||||
const bad = {
|
||||
pipelines: Object.fromEntries(
|
||||
Array.from({ length: 8 }, (_, i) => [
|
||||
Array.from({ length: 210 }, (_, i) => [
|
||||
`p${i}`,
|
||||
{ ...oilSample, id: `p${i}`, productClass: 'diesel-only' },
|
||||
]),
|
||||
@@ -179,7 +180,7 @@ describe('pipeline registries — productClass', () => {
|
||||
const gasSample = gas.pipelines[Object.keys(gas.pipelines)[0]!];
|
||||
const bad = {
|
||||
pipelines: Object.fromEntries(
|
||||
Array.from({ length: 8 }, (_, i) => [
|
||||
Array.from({ length: 210 }, (_, i) => [
|
||||
`p${i}`,
|
||||
{ ...gasSample, id: `p${i}`, productClass: 'crude' },
|
||||
]),
|
||||
@@ -201,7 +202,7 @@ describe('pipeline registries — validateRegistry rejects bad input', () => {
|
||||
test('rejects a pipeline with no evidence', () => {
|
||||
const bad = {
|
||||
pipelines: Object.fromEntries(
|
||||
Array.from({ length: 8 }, (_, i) => [`p${i}`, {
|
||||
Array.from({ length: 210 }, (_, i) => [`p${i}`, {
|
||||
id: `p${i}`, name: 'x', operator: 'y', commodityType: 'gas',
|
||||
fromCountry: 'US', toCountry: 'CA', transitCountries: [],
|
||||
capacityBcmYr: 1, startPoint: { lat: 0, lon: 0 }, endPoint: { lat: 1, lon: 1 },
|
||||
@@ -216,3 +217,87 @@ describe('pipeline registries — validateRegistry rejects bad input', () => {
|
||||
assert.equal(validateRegistry(bad), false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('pipeline registries — GEM source enum', () => {
|
||||
test('VALID_SOURCES exported and includes the existing six members plus gem', () => {
|
||||
// Same source-of-truth pattern as VALID_OIL_PRODUCT_CLASSES (PR #3383):
|
||||
// export the Set so future tests can't drift from the validator.
|
||||
assert.ok(VALID_SOURCES.has('operator'));
|
||||
assert.ok(VALID_SOURCES.has('regulator'));
|
||||
assert.ok(VALID_SOURCES.has('press'));
|
||||
assert.ok(VALID_SOURCES.has('satellite'));
|
||||
assert.ok(VALID_SOURCES.has('ais-relay'));
|
||||
assert.ok(VALID_SOURCES.has('gem'));
|
||||
});
|
||||
|
||||
test('validateRegistry accepts GEM-sourced minimum-viable evidence (state=unknown)', () => {
|
||||
// GEM rows ship as state=unknown until classifier promotes them.
|
||||
// physicalStateSource='gem' is sufficient evidence per the audit.
|
||||
const gasSample = gas.pipelines[Object.keys(gas.pipelines)[0]!];
|
||||
const good = {
|
||||
pipelines: Object.fromEntries(
|
||||
Array.from({ length: 210 }, (_, i) => [`p${i}`, {
|
||||
...gasSample,
|
||||
id: `p${i}`,
|
||||
evidence: {
|
||||
physicalState: 'unknown',
|
||||
physicalStateSource: 'gem',
|
||||
commercialState: 'unknown',
|
||||
operatorStatement: null,
|
||||
sanctionRefs: [],
|
||||
classifierVersion: 'gem-import-v1',
|
||||
classifierConfidence: 0.4,
|
||||
lastEvidenceUpdate: '2026-04-25T00:00:00Z',
|
||||
},
|
||||
}])
|
||||
),
|
||||
};
|
||||
assert.equal(validateRegistry(good), true);
|
||||
});
|
||||
|
||||
test('validateRegistry accepts GEM-sourced offline row (state=offline + only source=gem)', () => {
|
||||
// Per plan U1 audit: 'gem' is evidence-bearing for non-flowing badges,
|
||||
// parity with press/satellite/ais-relay. An offline row with no operator
|
||||
// statement and no sanctionRefs but physicalStateSource='gem' should pass
|
||||
// validation (the public-badge derivation downstream will then map it
|
||||
// to "disputed" via the external-signal rule).
|
||||
const gasSample = gas.pipelines[Object.keys(gas.pipelines)[0]!];
|
||||
const good = {
|
||||
pipelines: Object.fromEntries(
|
||||
Array.from({ length: 210 }, (_, i) => [`p${i}`, {
|
||||
...gasSample,
|
||||
id: `p${i}`,
|
||||
evidence: {
|
||||
physicalState: 'offline',
|
||||
physicalStateSource: 'gem',
|
||||
commercialState: 'unknown',
|
||||
operatorStatement: null,
|
||||
sanctionRefs: [],
|
||||
classifierVersion: 'gem-import-v1',
|
||||
classifierConfidence: 0.4,
|
||||
lastEvidenceUpdate: '2026-04-25T00:00:00Z',
|
||||
},
|
||||
}])
|
||||
),
|
||||
};
|
||||
assert.equal(validateRegistry(good), true);
|
||||
});
|
||||
|
||||
test('validateRegistry still rejects unknown physicalStateSource values', () => {
|
||||
// Adding 'gem' must not loosen the enum — unknown sources still fail.
|
||||
const gasSample = gas.pipelines[Object.keys(gas.pipelines)[0]!];
|
||||
const bad = {
|
||||
pipelines: Object.fromEntries(
|
||||
Array.from({ length: 210 }, (_, i) => [`p${i}`, {
|
||||
...gasSample,
|
||||
id: `p${i}`,
|
||||
evidence: {
|
||||
...gasSample.evidence,
|
||||
physicalStateSource: 'rumor',
|
||||
},
|
||||
}])
|
||||
),
|
||||
};
|
||||
assert.equal(validateRegistry(bad), false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -40,7 +40,7 @@ function extractGetRoutes() {
|
||||
function extractCacheTierKeys() {
|
||||
const gatewayPath = join(root, 'server', 'gateway.ts');
|
||||
const src = readFileSync(gatewayPath, 'utf-8');
|
||||
const re = /'\/(api\/[^']+)':\s*'(fast|medium|slow|slow-browser|static|daily|no-store)'/g;
|
||||
const re = /'\/(api\/[^']+)':\s*'(fast|medium|slow|slow-browser|static|daily|no-store|live)'/g;
|
||||
const entries = {};
|
||||
let m;
|
||||
while ((m = re.exec(src)) !== null) {
|
||||
|
||||
163
tests/seed-unrest-gdelt-fetch.test.mjs
Normal file
@@ -0,0 +1,163 @@
|
||||
// Tests for the GDELT proxy retry path in scripts/seed-unrest-events.mjs.
|
||||
//
|
||||
// Locks the behavioural contract introduced in PR #3395:
|
||||
//
|
||||
// 1. Single attempt success — happy path, no retries fire.
|
||||
// 2. Transient proxy failure recoverable by retry — first attempt(s)
|
||||
// fail, a later attempt succeeds, returns parsed JSON.
|
||||
// 3. All attempts fail — throws the LAST error so ops sees the most
|
||||
// recent failure mode (Cloudflare 522 vs ECONNRESET drift).
|
||||
// 4. Malformed proxy body — JSON.parse throws SyntaxError; the helper
|
||||
// bails immediately rather than burning attempts on a deterministic
|
||||
// parse failure.
|
||||
// 5. Missing CONNECT proxy creds — fetchGdeltEvents throws with a
|
||||
// clear "PROXY_URL env var is not set" pointer for ops, with NO
|
||||
// proxy fetcher invocation (no wasted network).
|
||||
//
|
||||
// Pre-PR-#3395 behaviour to AVOID regressing into:
|
||||
// - Direct fetch was tried first and failed UND_ERR_CONNECT_TIMEOUT
|
||||
// on every Railway tick (0% success). Re-introducing a "soft"
|
||||
// direct fallback would just add latency and log noise.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
|
||||
process.env.UPSTASH_REDIS_REST_URL = 'https://redis.test';
|
||||
process.env.UPSTASH_REDIS_REST_TOKEN = 'fake-token';
|
||||
|
||||
const { fetchGdeltViaProxy, fetchGdeltEvents } = await import('../scripts/seed-unrest-events.mjs');
|
||||
|
||||
const URL = 'https://api.gdeltproject.org/api/v1/gkg_geojson?query=test';
|
||||
const PROXY_AUTH = 'user:pass@gate.decodo.com:7000';
|
||||
|
||||
function jsonBuffer(obj) {
|
||||
return { buffer: Buffer.from(JSON.stringify(obj), 'utf8') };
|
||||
}
|
||||
|
||||
const noSleep = async () => {};
|
||||
const noJitter = () => 0;
|
||||
|
||||
// ─── 1. happy path: first attempt succeeds ─────────────────────────────
|
||||
|
||||
test('proxy success on first attempt → returns parsed JSON, no retries', async () => {
|
||||
let calls = 0;
|
||||
const _proxyFetcher = async () => {
|
||||
calls++;
|
||||
return jsonBuffer({ features: [{ name: 'A' }] });
|
||||
};
|
||||
const result = await fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
||||
_proxyFetcher,
|
||||
_sleep: noSleep,
|
||||
_jitter: noJitter,
|
||||
});
|
||||
assert.deepEqual(result, { features: [{ name: 'A' }] });
|
||||
assert.equal(calls, 1, 'should NOT retry on success');
|
||||
});
|
||||
|
||||
// ─── 2. transient flake: 2 failures + 1 success ────────────────────────
|
||||
|
||||
test('two proxy failures, third attempt succeeds → returns parsed JSON', async () => {
|
||||
let calls = 0;
|
||||
const _proxyFetcher = async () => {
|
||||
calls++;
|
||||
if (calls < 3) throw new Error(`Proxy CONNECT: HTTP/1.1 522 Server Error`);
|
||||
return jsonBuffer({ features: [{ name: 'B' }] });
|
||||
};
|
||||
let sleepCount = 0;
|
||||
const _sleep = async () => { sleepCount++; };
|
||||
const result = await fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
||||
_proxyFetcher,
|
||||
_sleep,
|
||||
_jitter: noJitter,
|
||||
_maxAttempts: 3,
|
||||
});
|
||||
assert.deepEqual(result, { features: [{ name: 'B' }] });
|
||||
assert.equal(calls, 3, 'should retry until success');
|
||||
assert.equal(sleepCount, 2, 'should sleep between attempts only (not after final)');
|
||||
});
|
||||
|
||||
// ─── 3. all attempts fail ──────────────────────────────────────────────
|
||||
|
||||
test('all attempts fail → throws LAST error', async () => {
|
||||
let calls = 0;
|
||||
const errors = [
|
||||
new Error('Proxy CONNECT: HTTP/1.1 522 Server Error'),
|
||||
new Error('CONNECT tunnel timeout'),
|
||||
new Error('Client network socket disconnected'),
|
||||
];
|
||||
const _proxyFetcher = async () => {
|
||||
throw errors[calls++];
|
||||
};
|
||||
await assert.rejects(
|
||||
fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
||||
_proxyFetcher,
|
||||
_sleep: noSleep,
|
||||
_jitter: noJitter,
|
||||
_maxAttempts: 3,
|
||||
}),
|
||||
/Client network socket disconnected/,
|
||||
);
|
||||
assert.equal(calls, 3);
|
||||
});
|
||||
|
||||
// ─── 4. parse failure short-circuits retry ─────────────────────────────
|
||||
|
||||
test('malformed proxy body → throws SyntaxError immediately, no retry', async () => {
|
||||
let calls = 0;
|
||||
const _proxyFetcher = async () => {
|
||||
calls++;
|
||||
return { buffer: Buffer.from('<html>this is not json</html>', 'utf8') };
|
||||
};
|
||||
await assert.rejects(
|
||||
fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
||||
_proxyFetcher,
|
||||
_sleep: noSleep,
|
||||
_jitter: noJitter,
|
||||
_maxAttempts: 3,
|
||||
}),
|
||||
SyntaxError,
|
||||
);
|
||||
assert.equal(calls, 1, 'parse error must not trigger retries');
|
||||
});
|
||||
|
||||
// ─── 5. fetchGdeltEvents: missing proxy creds ──────────────────────────
|
||||
|
||||
test('fetchGdeltEvents with no proxy creds → throws clear ops-actionable error, no fetcher call', async () => {
|
||||
let fetcherCalled = false;
|
||||
await assert.rejects(
|
||||
fetchGdeltEvents({
|
||||
_resolveProxyForConnect: () => null,
|
||||
_proxyFetcher: async () => { fetcherCalled = true; return jsonBuffer({}); },
|
||||
_sleep: noSleep,
|
||||
_jitter: noJitter,
|
||||
}),
|
||||
/PROXY_URL env var is not set/,
|
||||
);
|
||||
assert.equal(fetcherCalled, false, 'must not attempt proxy fetch when creds missing');
|
||||
});
|
||||
|
||||
// ─── 6. fetchGdeltEvents: end-to-end with retry path ───────────────────
|
||||
|
||||
test('fetchGdeltEvents with one transient proxy failure → recovers and aggregates events', async () => {
|
||||
let calls = 0;
|
||||
const _proxyFetcher = async () => {
|
||||
calls++;
|
||||
if (calls === 1) throw new Error('Proxy CONNECT: HTTP/1.1 522 Server Error');
|
||||
// Five mentions at the same lat/lon — passes the count >= 5 floor in the aggregator.
|
||||
const features = Array.from({ length: 5 }, () => ({
|
||||
properties: { name: 'Cairo, Egypt', urltone: -3 },
|
||||
geometry: { type: 'Point', coordinates: [31.2, 30.0] },
|
||||
}));
|
||||
return jsonBuffer({ features });
|
||||
};
|
||||
const events = await fetchGdeltEvents({
|
||||
_resolveProxyForConnect: () => PROXY_AUTH,
|
||||
_proxyFetcher,
|
||||
_sleep: noSleep,
|
||||
_jitter: noJitter,
|
||||
_maxAttempts: 3,
|
||||
});
|
||||
assert.equal(calls, 2, 'should retry exactly once after the 522');
|
||||
assert.equal(events.length, 1, 'five mentions at one location → one aggregated event');
|
||||
assert.equal(events[0].country, 'Egypt');
|
||||
});
|
||||
@@ -363,6 +363,29 @@ describe('existing beforeSend filters', () => {
|
||||
assert.ok(beforeSend(event) !== null, 'All-maplibre first-party tile fetch failure must still reach Sentry');
|
||||
});
|
||||
|
||||
it('suppresses "Failed to fetch (<host>)" when stack is extension-only (covered by generic extension rule)', () => {
|
||||
// WORLDMONITOR-P5: AdBlock-class extensions wrap window.fetch and their
|
||||
// replacement can fail unrelated to our backend. The generic extension rule
|
||||
// (`!hasFirstParty && extension frame`) already drops this; the test locks
|
||||
// that property in for the `Failed to fetch (<host>)` message shape.
|
||||
const event = makeEvent('Failed to fetch (abacus.worldmonitor.app)', 'TypeError', [
|
||||
{ filename: 'chrome-extension://hoklmmgfnpapgjgcpechhaamimifchmp/frame_ant/frame_ant.js', lineno: 2, function: 'window.fetch' },
|
||||
]);
|
||||
assert.equal(beforeSend(event), null, 'Extension-only fetch failure should be suppressed');
|
||||
});
|
||||
|
||||
it('does NOT suppress "Failed to fetch (<host>)" when stack has both first-party and extension frames', () => {
|
||||
// Safety property: a first-party panels-*.js frame means our code initiated
|
||||
// the fetch — must surface even if an extension also wrapped it, so a real
|
||||
// api.worldmonitor.app outage isn't silenced for users who happen to run
|
||||
// fetch-wrapping extensions.
|
||||
const event = makeEvent('Failed to fetch (api.worldmonitor.app)', 'TypeError', [
|
||||
{ filename: '/assets/panels-wF5GXf0N.js', lineno: 24, function: 'window.fetch' },
|
||||
{ filename: 'chrome-extension://hoklmmgfnpapgjgcpechhaamimifchmp/frame_ant/frame_ant.js', lineno: 2, function: 'window.fetch' },
|
||||
]);
|
||||
assert.ok(beforeSend(event) !== null, 'First-party + extension Failed-to-fetch must reach Sentry');
|
||||
});
|
||||
|
||||
it('suppresses iOS Safari WKWebView "Cannot inject key into script value" regardless of first-party frame', () => {
|
||||
// The native throw always lands in a first-party caller; the existing
|
||||
// !hasFirstParty gate missed it. `UnknownError` type name is WebKit-only
|
||||
|
||||
@@ -192,24 +192,37 @@ describe('getCacheKey determinism', () => {
|
||||
describe('getVesselSnapshot caching (HIGH-1)', () => {
|
||||
const src = readSrc('server/worldmonitor/maritime/v1/get-vessel-snapshot.ts');
|
||||
|
||||
it('has per-variant cache slots (candidates=on vs off)', () => {
|
||||
assert.match(src, /cache:\s*Record<'with'\s*\|\s*'without'/,
|
||||
'Cache should split on include_candidates so the large/small payloads do not share a slot');
|
||||
assert.match(src, /with:\s*\{\s*snapshot:\s*undefined/,
|
||||
'with-candidates slot should be initialized empty');
|
||||
assert.match(src, /without:\s*\{\s*snapshot:\s*undefined/,
|
||||
'without-candidates slot should be initialized empty');
|
||||
it('cache is keyed by request shape (candidates, tankers, quantized bbox)', () => {
|
||||
// PR 3 (parity-push) replaced the prior `Record<'with'|'without'>` cache
|
||||
// with a Map<string, SnapshotCacheSlot> where the key embeds all three
|
||||
// axes that change response payload: includeCandidates, includeTankers,
|
||||
// and (when present) a 1°-quantized bbox. This prevents distinct bboxes
|
||||
// from collapsing onto a single cached response.
|
||||
assert.match(src, /const\s+cache\s*=\s*new\s+Map<string,\s*SnapshotCacheSlot>/,
|
||||
'cache should be a Map<string, SnapshotCacheSlot> keyed by request shape');
|
||||
assert.match(src, /cacheKeyFor\s*\(/,
|
||||
'cacheKeyFor() helper should compose the cache key');
|
||||
// Key must distinguish includeCandidates, includeTankers, and bbox.
|
||||
assert.match(src, /includeCandidates\s*\?\s*'1'\s*:\s*'0'/,
|
||||
'cache key must encode includeCandidates');
|
||||
assert.match(src, /includeTankers\s*\?\s*'1'\s*:\s*'0'/,
|
||||
'cache key must encode includeTankers');
|
||||
});
|
||||
|
||||
it('has 5-minute TTL cache', () => {
|
||||
assert.match(src, /SNAPSHOT_CACHE_TTL_MS\s*=\s*300[_]?000/,
|
||||
'TTL should be 5 minutes (300000ms)');
|
||||
it('has split TTLs for base (5min) and live tanker / bbox (60s) reads', () => {
|
||||
// Base path (density + military-detection consumers) keeps the prior
|
||||
// 5-min cache. Live-tanker and bbox-filtered paths drop to 60s to honor
|
||||
// the freshness contract that drives the Energy Atlas LiveTankersLayer.
|
||||
assert.match(src, /SNAPSHOT_CACHE_TTL_BASE_MS\s*=\s*300[_]?000/,
|
||||
'base TTL should remain 5 minutes (300000ms) for density/disruption consumers');
|
||||
assert.match(src, /SNAPSHOT_CACHE_TTL_LIVE_MS\s*=\s*60[_]?000/,
|
||||
'live tanker / bbox TTL should be 60s to match the gateway live tier s-maxage');
|
||||
});
|
||||
|
||||
it('checks cache before calling relay', () => {
|
||||
// fetchVesselSnapshot should check slot freshness before fetchVesselSnapshotFromRelay
|
||||
const cacheCheckIdx = src.indexOf('slot.snapshot && (now - slot.timestamp)');
|
||||
const relayCallIdx = src.indexOf('fetchVesselSnapshotFromRelay(includeCandidates)');
|
||||
const relayCallIdx = src.indexOf('fetchVesselSnapshotFromRelay(');
|
||||
assert.ok(cacheCheckIdx > -1, 'Should check slot freshness');
|
||||
assert.ok(relayCallIdx > -1, 'Should have relay fetch function');
|
||||
assert.ok(cacheCheckIdx < relayCallIdx,
|
||||
@@ -230,6 +243,25 @@ describe('getVesselSnapshot caching (HIGH-1)', () => {
|
||||
'Should return stale cached snapshot from the selected slot when fresh relay fetch fails');
|
||||
});
|
||||
|
||||
it('rejects oversized bbox AND out-of-range coords with statusCode=400', () => {
|
||||
// PR 3 (parity-push): server-side guard against a malicious or buggy
|
||||
// global-bbox query that would pull every tanker through one request.
|
||||
// Range guard added in #3402 review-fix: relay silently drops malformed
|
||||
// bboxes and serves global capped subsets — handler MUST validate
|
||||
// -90..90 / -180..180 before calling relay. Error must carry
|
||||
// statusCode=400 or error-mapper.ts maps it to a generic 500.
|
||||
assert.match(src, /MAX_BBOX_DEGREES\s*=\s*10/,
|
||||
'should declare a 10° max-bbox guard');
|
||||
assert.match(src, /class\s+BboxValidationError/,
|
||||
'should throw BboxValidationError on invalid bbox');
|
||||
assert.match(src, /readonly\s+statusCode\s*=\s*400/,
|
||||
'BboxValidationError must carry statusCode=400 (error-mapper surfaces it as HTTP 400 only when the error has a statusCode property)');
|
||||
assert.match(src, /lat\s*>=\s*-90\s*&&\s*lat\s*<=\s*90/,
|
||||
'must validate lat is in [-90, 90]');
|
||||
assert.match(src, /lon\s*>=\s*-180\s*&&\s*lon\s*<=\s*180/,
|
||||
'must validate lon is in [-180, 180]');
|
||||
});
|
||||
|
||||
// NOTE: Full integration test (mocking fetch, verifying cache hits) requires
|
||||
// a TypeScript-capable test runner. This structural test verifies the pattern.
|
||||
});
|
||||
|
||||
@@ -33,20 +33,48 @@ describe('SWF classification manifest — shipped YAML', () => {
|
||||
);
|
||||
});
|
||||
|
||||
it('lists the first-release set of funds from plan §3.4', () => {
|
||||
it('lists the first-release set of funds from plan §3.4 (KIA split per Phase 1B)', () => {
|
||||
// Phase 1B (Plan 2026-04-25-001) split the original `KW:kia` entry
|
||||
// into `KW:kia-grf` and `KW:kia-fgf` to correctly attribute GRF's
|
||||
// 0.9 stabilization access to its ~5% sleeve and FGF's 0.20
|
||||
// statutorily-gated access to the remaining ~95%. Both identifiers
|
||||
// are now required.
|
||||
const expected = new Set([
|
||||
'NO:gpfg',
|
||||
'AE:adia',
|
||||
'AE:mubadala',
|
||||
'SA:pif',
|
||||
'KW:kia',
|
||||
'KW:kia-grf',
|
||||
'KW:kia-fgf',
|
||||
'QA:qia',
|
||||
'SG:gic',
|
||||
'SG:temasek',
|
||||
]);
|
||||
const actual = new Set(manifest.funds.map((f) => `${f.country}:${f.fund}`));
|
||||
for (const required of expected) {
|
||||
assert.ok(actual.has(required), `plan §3.4 required fund missing from manifest: ${required}`);
|
||||
assert.ok(actual.has(required), `plan §3.4 + Phase 1B required fund missing from manifest: ${required}`);
|
||||
}
|
||||
});
|
||||
|
||||
it('Phase 1 (Plan 2026-04-25-001) expansion adds 12 new funds across 7 new + extended countries', () => {
|
||||
// Phase 1 expansion: UAE adds ICD/ADQ/EIA (3); KW splits kia → kia-grf+kia-fgf
|
||||
// (1 net since kia is dropped); CN adds CIC/NSSF/SAFE-IC (3); HK adds HKMA-EF
|
||||
// (1); KR adds KIC (1); AU adds Future Fund (1); OM adds OIA (1); BH adds
|
||||
// Mumtalakat (1); TL adds Petroleum Fund (1). Net new identifiers: 12 over
|
||||
// the original 8 + 1 from KIA split. Manifest total ≥ 20.
|
||||
const required = new Set([
|
||||
'AE:icd', 'AE:adq', 'AE:eia',
|
||||
'CN:cic', 'CN:nssf', 'CN:safe-ic',
|
||||
'HK:hkma-ef',
|
||||
'KR:kic',
|
||||
'AU:future-fund',
|
||||
'OM:oia',
|
||||
'BH:mumtalakat',
|
||||
'TL:petroleum-fund',
|
||||
]);
|
||||
const actual = new Set(manifest.funds.map((f) => `${f.country}:${f.fund}`));
|
||||
for (const r of required) {
|
||||
assert.ok(actual.has(r), `Phase 1 expansion fund missing from manifest: ${r}`);
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
387
tests/swf-manifest-loader-schema-extension.test.mjs
Normal file
@@ -0,0 +1,387 @@
|
||||
// Schema-extension tests for swf-manifest-loader.mjs (Phase 1).
|
||||
//
|
||||
// Pins the new schema fields' canonical placement and rejection rules:
|
||||
// - top-level (per-fund): aum_usd, aum_year, aum_verified
|
||||
// - under classification: aum_pct_of_audited, excluded_overlaps_with_reserves
|
||||
//
|
||||
// Codex Round 1 #4 mandated a SINGLE canonical placement for each new
|
||||
// field, with the loader REJECTING misplacement (positive control)
|
||||
// rather than silently accepting it.
|
||||
|
||||
import { test } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
|
||||
import { validateManifest } from '../scripts/shared/swf-manifest-loader.mjs';
|
||||
import {
|
||||
shouldSkipFundForBuffer,
|
||||
applyAumPctOfAudited,
|
||||
buildCoverageSummary,
|
||||
} from '../scripts/seed-sovereign-wealth.mjs';
|
||||
|
||||
function makeFund(overrides = {}) {
|
||||
return {
|
||||
country: 'AE',
|
||||
fund: 'test-fund',
|
||||
display_name: 'Test Fund',
|
||||
classification: { access: 0.5, liquidity: 0.5, transparency: 0.5 },
|
||||
rationale: { access: 'a', liquidity: 'l', transparency: 't' },
|
||||
sources: ['https://example.com/'],
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
function makeManifest(funds) {
|
||||
return {
|
||||
manifest_version: 1,
|
||||
last_reviewed: '2026-04-25',
|
||||
external_review_status: 'REVIEWED',
|
||||
funds,
|
||||
};
|
||||
}
|
||||
|
||||
test('REJECTS aum_pct_of_audited placed at fund top level (must be under classification)', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({ aum_pct_of_audited: 0.05 }),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m), /aum_pct_of_audited must be placed under classification/);
|
||||
});
|
||||
|
||||
test('REJECTS excluded_overlaps_with_reserves placed at fund top level', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({ excluded_overlaps_with_reserves: true }),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m), /excluded_overlaps_with_reserves must be placed under classification/);
|
||||
});
|
||||
|
||||
test('ACCEPTS aum_pct_of_audited under classification when paired with rationale', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
classification: { access: 0.9, liquidity: 0.8, transparency: 0.4, aum_pct_of_audited: 0.05 },
|
||||
rationale: { access: 'a', liquidity: 'l', transparency: 't', aum_pct_of_audited: 'GRF is ~5% of audited KIA AUM' },
|
||||
}),
|
||||
]);
|
||||
const out = validateManifest(m);
|
||||
assert.equal(out.funds[0].classification.aumPctOfAudited, 0.05);
|
||||
assert.equal(out.funds[0].rationale.aumPctOfAudited, 'GRF is ~5% of audited KIA AUM');
|
||||
});
|
||||
|
||||
test('REJECTS aum_pct_of_audited under classification WITHOUT a rationale paragraph', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
classification: { access: 0.9, liquidity: 0.8, transparency: 0.4, aum_pct_of_audited: 0.05 },
|
||||
// rationale.aum_pct_of_audited is missing
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m),
|
||||
/rationale\.aum_pct_of_audited: required when classification\.aum_pct_of_audited is set/);
|
||||
});
|
||||
|
||||
test('REJECTS aum_pct_of_audited outside (0, 1] range', () => {
|
||||
// `null` is intentionally NOT in this list — the loader treats null
|
||||
// as "field absent" (the value is optional), which is correct.
|
||||
for (const bad of [0, -0.1, 1.5, 'x', NaN]) {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
classification: { access: 0.9, liquidity: 0.8, transparency: 0.4, aum_pct_of_audited: bad },
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m), /aum_pct_of_audited: expected number in \(0, 1\]/);
|
||||
}
|
||||
});
|
||||
|
||||
test('ACCEPTS excluded_overlaps_with_reserves: true with paired rationale', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
classification: { access: 0.5, liquidity: 0.7, transparency: 0.3, excluded_overlaps_with_reserves: true },
|
||||
rationale: { access: 'a', liquidity: 'l', transparency: 't', excluded_overlaps_with_reserves: 'SAFE-IC overlaps PBOC reserves' },
|
||||
}),
|
||||
]);
|
||||
const out = validateManifest(m);
|
||||
assert.equal(out.funds[0].classification.excludedOverlapsWithReserves, true);
|
||||
});
|
||||
|
||||
test('REJECTS excluded_overlaps_with_reserves: true WITHOUT rationale paragraph', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
classification: { access: 0.5, liquidity: 0.7, transparency: 0.3, excluded_overlaps_with_reserves: true },
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m),
|
||||
/rationale\.excluded_overlaps_with_reserves: required when classification\.excluded_overlaps_with_reserves is true/);
|
||||
});
|
||||
|
||||
test('REJECTS excluded_overlaps_with_reserves of non-boolean type', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
classification: { access: 0.5, liquidity: 0.7, transparency: 0.3, excluded_overlaps_with_reserves: 'true' },
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m), /excluded_overlaps_with_reserves: expected boolean/);
|
||||
});
|
||||
|
||||
test('ACCEPTS aum_usd + aum_year + aum_verified=true together', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
aum_usd: 320_000_000_000,
|
||||
aum_year: 2024,
|
||||
aum_verified: true,
|
||||
}),
|
||||
]);
|
||||
const out = validateManifest(m);
|
||||
assert.equal(out.funds[0].aumUsd, 320_000_000_000);
|
||||
assert.equal(out.funds[0].aumYear, 2024);
|
||||
assert.equal(out.funds[0].aumVerified, true);
|
||||
});
|
||||
|
||||
test('REJECTS aum_verified: true without aum_usd', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
aum_verified: true,
|
||||
aum_year: 2024,
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m),
|
||||
/aum_verified=true requires both aum_usd and aum_year to be present/);
|
||||
});
|
||||
|
||||
test('REJECTS aum_verified: true without aum_year', () => {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
aum_verified: true,
|
||||
aum_usd: 100_000_000_000,
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m),
|
||||
/aum_verified=true requires both aum_usd and aum_year to be present/);
|
||||
});
|
||||
|
||||
test('ACCEPTS aum_verified: false (entry loaded for documentation only)', () => {
|
||||
// No aum_usd / aum_year required when verified=false — the entry
|
||||
// documents an unverifiable fund that the seeder will skip from
|
||||
// scoring. This is the EIA / data-integrity-rule path.
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
aum_verified: false,
|
||||
}),
|
||||
]);
|
||||
const out = validateManifest(m);
|
||||
assert.equal(out.funds[0].aumVerified, false);
|
||||
assert.equal(out.funds[0].aumUsd, undefined);
|
||||
});
|
||||
|
||||
test('REJECTS aum_year out of [2000, 2100]', () => {
|
||||
// `null` excluded — treated as field-absent, intentional.
|
||||
for (const bad of [1999, 2101, 0, -1, 'x']) {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
aum_usd: 100_000_000_000,
|
||||
aum_year: bad,
|
||||
aum_verified: true,
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m), /aum_year/);
|
||||
}
|
||||
});
|
||||
|
||||
test('REJECTS aum_usd of non-positive or non-finite type', () => {
|
||||
// `null` excluded — treated as field-absent, intentional.
|
||||
for (const bad of [0, -1, NaN, Infinity, 'big']) {
|
||||
const m = makeManifest([
|
||||
makeFund({
|
||||
aum_usd: bad,
|
||||
aum_year: 2024,
|
||||
aum_verified: true,
|
||||
}),
|
||||
]);
|
||||
assert.throws(() => validateManifest(m), /aum_usd/);
|
||||
}
|
||||
});
|
||||
|
||||
test('Backward-compat: existing entries without new fields still validate', () => {
|
||||
// The 8 existing entries on origin/main don't carry aum_usd /
|
||||
// aum_pct / excluded flags. Ensure the schema extension is purely
|
||||
// additive — existing fields produce a clean parse.
|
||||
const m = makeManifest([makeFund()]);
|
||||
const out = validateManifest(m);
|
||||
assert.equal(out.funds[0].aumUsd, undefined);
|
||||
assert.equal(out.funds[0].aumVerified, undefined);
|
||||
assert.equal(out.funds[0].classification.aumPctOfAudited, undefined);
|
||||
assert.equal(out.funds[0].classification.excludedOverlapsWithReserves, undefined);
|
||||
});
|
||||
|
||||
// ── Seeder-side pure helpers ──────────────────────────────────────
|
||||
|
||||
test('shouldSkipFundForBuffer: returns null for a normal fund', () => {
|
||||
const fund = { classification: { access: 0.5 }, aumVerified: true };
|
||||
assert.equal(shouldSkipFundForBuffer(fund), null);
|
||||
});
|
||||
|
||||
test('shouldSkipFundForBuffer: skips when excluded_overlaps_with_reserves=true', () => {
|
||||
const fund = {
|
||||
classification: { access: 0.5, excludedOverlapsWithReserves: true },
|
||||
aumVerified: true,
|
||||
};
|
||||
assert.equal(shouldSkipFundForBuffer(fund), 'excluded_overlaps_with_reserves');
|
||||
});
|
||||
|
||||
test('shouldSkipFundForBuffer: skips when aum_verified=false', () => {
|
||||
const fund = {
|
||||
classification: { access: 0.5 },
|
||||
aumVerified: false,
|
||||
};
|
||||
assert.equal(shouldSkipFundForBuffer(fund), 'aum_unverified');
|
||||
});
|
||||
|
||||
test('shouldSkipFundForBuffer: excluded takes precedence over unverified (single skip reason)', () => {
|
||||
// If a fund is BOTH excluded (overlaps reserves) AND unverified,
|
||||
// we surface the excluded reason because that's the more
|
||||
// architectural concern (double-counting risk).
|
||||
const fund = {
|
||||
classification: { excludedOverlapsWithReserves: true },
|
||||
aumVerified: false,
|
||||
};
|
||||
assert.equal(shouldSkipFundForBuffer(fund), 'excluded_overlaps_with_reserves');
|
||||
});
|
||||
|
||||
test('shouldSkipFundForBuffer: returns null when neither flag is set', () => {
|
||||
// Backward-compat: existing entries on origin/main don't carry
|
||||
// aumVerified or excludedOverlapsWithReserves. They must NOT skip.
|
||||
assert.equal(shouldSkipFundForBuffer({ classification: { access: 0.5 } }), null);
|
||||
});
|
||||
|
||||
test('shouldSkipFundForBuffer: handles malformed / null input defensively', () => {
|
||||
assert.equal(shouldSkipFundForBuffer(null), null);
|
||||
assert.equal(shouldSkipFundForBuffer(undefined), null);
|
||||
assert.equal(shouldSkipFundForBuffer({}), null);
|
||||
});
|
||||
|
||||
test('applyAumPctOfAudited: returns AUM unchanged when no multiplier set', () => {
|
||||
const fund = { classification: { access: 0.5 } };
|
||||
assert.equal(applyAumPctOfAudited(1_000_000_000_000, fund), 1_000_000_000_000);
|
||||
});
|
||||
|
||||
test('applyAumPctOfAudited: applies the fraction (KIA-GRF case)', () => {
|
||||
// KIA combined audited AUM = $1.072T; GRF is ~5%
|
||||
const fund = { classification: { access: 0.9, aumPctOfAudited: 0.05 } };
|
||||
const out = applyAumPctOfAudited(1_072_000_000_000, fund);
|
||||
assert.equal(out, 53_600_000_000);
|
||||
});
|
||||
|
||||
test('applyAumPctOfAudited: KIA-GRF + KIA-FGF sum equals combined AUM', () => {
|
||||
// The split must be conservative — sum of fractional parts equals
|
||||
// the original audited AUM. Pinned because a future edit that
|
||||
// changes 5/95 split to e.g. 5/90 would silently drop $50B.
|
||||
const audited = 1_072_000_000_000;
|
||||
const grf = applyAumPctOfAudited(audited, { classification: { aumPctOfAudited: 0.05 } });
|
||||
const fgf = applyAumPctOfAudited(audited, { classification: { aumPctOfAudited: 0.95 } });
|
||||
assert.equal(grf + fgf, audited);
|
||||
});
|
||||
|
||||
test('applyAumPctOfAudited: ignores out-of-range multipliers (defensive)', () => {
|
||||
// The loader rejects out-of-range values at parse time; this is a
|
||||
// belt-and-suspenders runtime check that doesn't multiply by an
|
||||
// invalid fraction even if the loader's gate is somehow bypassed.
|
||||
for (const bad of [0, -0.1, 1.5, NaN, 'big']) {
|
||||
const fund = { classification: { aumPctOfAudited: bad } };
|
||||
assert.equal(applyAumPctOfAudited(1_000, fund), 1_000);
|
||||
}
|
||||
});
|
||||
|
||||
// ── buildCoverageSummary regression: completeness denominator ──────
|
||||
//
|
||||
// User's PR-3391 review caught a P1: completeness used `funds.length`
|
||||
// (manifest count) as the denominator, which depresses the ratio for
|
||||
// countries whose manifest contains documentation-only entries
|
||||
// (excluded_overlaps_with_reserves OR aum_verified=false). The shipped
|
||||
// manifest has this state for UAE (EIA unverified) and CN (SAFE-IC
|
||||
// excluded). These tests pin the corrected denominator: only scorable
|
||||
// funds count toward expected.
|
||||
|
||||
test('buildCoverageSummary: country with all scorable funds matched is "complete" even if manifest also has unverified entries', () => {
|
||||
// UAE-shape: 4 scorable (ADIA, Mubadala, ICD, ADQ) + 1 unverified (EIA).
|
||||
// If all 4 scorable matched, country is COMPLETE, not partial.
|
||||
const manifest = {
|
||||
funds: [
|
||||
{ country: 'AE', fund: 'adia', classification: { access: 0.4 } },
|
||||
{ country: 'AE', fund: 'mubadala',classification: { access: 0.5 } },
|
||||
{ country: 'AE', fund: 'icd', classification: { access: 0.5 } },
|
||||
{ country: 'AE', fund: 'adq', classification: { access: 0.5 } },
|
||||
{ country: 'AE', fund: 'eia', classification: { access: 0.4 }, aumVerified: false },
|
||||
],
|
||||
};
|
||||
const imports = { AE: { importsUsd: 481.9e9 } };
|
||||
const countries = {
|
||||
AE: {
|
||||
// expectedFunds is computed PER-COUNTRY in fetchSovereignWealth using
|
||||
// shouldSkipFundForBuffer, so this test fixture mirrors the seeder's
|
||||
// post-fix output (expectedFunds = 4 scorable, completeness = 1.0).
|
||||
matchedFunds: 4,
|
||||
expectedFunds: 4,
|
||||
completeness: 1.0,
|
||||
},
|
||||
};
|
||||
const summary = buildCoverageSummary(manifest, imports, countries);
|
||||
// Only 4 scorable funds in AE; 1 unverified entry doesn't count.
|
||||
assert.equal(summary.expectedFunds, 4,
|
||||
`headline expected funds should exclude documentation-only entries; got ${summary.expectedFunds}`);
|
||||
const aeStatus = summary.countryStatuses.find((s) => s.country === 'AE');
|
||||
assert.equal(aeStatus.status, 'complete');
|
||||
});
|
||||
|
||||
test('buildCoverageSummary: excludes excluded_overlaps_with_reserves entries from expectedFundsTotal', () => {
|
||||
// CN-shape: CIC + NSSF scorable + SAFE-IC excluded.
|
||||
const manifest = {
|
||||
funds: [
|
||||
{ country: 'CN', fund: 'cic', classification: { access: 0.4 } },
|
||||
{ country: 'CN', fund: 'nssf', classification: { access: 0.20 } },
|
||||
{ country: 'CN', fund: 'safe-ic', classification: { access: 0.5, excludedOverlapsWithReserves: true } },
|
||||
],
|
||||
};
|
||||
const imports = { CN: { importsUsd: 3.0e12 } };
|
||||
const countries = {
|
||||
CN: { matchedFunds: 2, expectedFunds: 2, completeness: 1.0 },
|
||||
};
|
||||
const summary = buildCoverageSummary(manifest, imports, countries);
|
||||
assert.equal(summary.expectedFunds, 2,
|
||||
`SAFE-IC should NOT count toward expected funds; got ${summary.expectedFunds}`);
|
||||
const cnStatus = summary.countryStatuses.find((s) => s.country === 'CN');
|
||||
assert.equal(cnStatus.status, 'complete');
|
||||
});
|
||||
|
||||
test('buildCoverageSummary: missing-country path uses scorable count, not raw manifest count', () => {
|
||||
// Country with mixed scorable + excluded entries that fails to seed
|
||||
// entirely (e.g. WB imports missing). The "expected" figure on the
|
||||
// missing-country status row should reflect SCORABLE funds, not all
|
||||
// manifest entries — otherwise an operator dashboard shows
|
||||
// "0/3 funds" when the truth is "0/2 funds, 1 documentation-only".
|
||||
const manifest = {
|
||||
funds: [
|
||||
{ country: 'CN', fund: 'cic', classification: { access: 0.4 } },
|
||||
{ country: 'CN', fund: 'nssf', classification: { access: 0.20 } },
|
||||
{ country: 'CN', fund: 'safe-ic', classification: { access: 0.5, excludedOverlapsWithReserves: true } },
|
||||
],
|
||||
};
|
||||
const imports = {}; // CN imports missing → country not seeded
|
||||
const countries = {}; // no country payload at all
|
||||
const summary = buildCoverageSummary(manifest, imports, countries);
|
||||
const cnStatus = summary.countryStatuses.find((s) => s.country === 'CN');
|
||||
assert.equal(cnStatus.status, 'missing');
|
||||
assert.equal(cnStatus.expected, 2,
|
||||
`missing-country expected should be SCORABLE count (2), not all-manifest (3); got ${cnStatus.expected}`);
|
||||
});
|
||||
|
||||
test('buildCoverageSummary: country with ONLY documentation-only entries is excluded from expectedCountries', () => {
|
||||
// Edge case: hypothetical country where every manifest entry is
|
||||
// documentation-only (e.g. only EIA-style unverified). Such a
|
||||
// country has 0 scorable funds → should not appear in
|
||||
// expectedCountries because there's nothing scorable to expect.
|
||||
const manifest = {
|
||||
funds: [
|
||||
{ country: 'XX', fund: 'placeholder', classification: { access: 0.4 }, aumVerified: false },
|
||||
],
|
||||
};
|
||||
const summary = buildCoverageSummary(manifest, {}, {});
|
||||
assert.equal(summary.expectedCountries, 0,
|
||||
`XX has zero scorable funds — should not be in expectedCountries`);
|
||||
assert.equal(summary.expectedFunds, 0);
|
||||
});
|
||||
369
tests/usage-telemetry-emission.test.mts
Normal file
@@ -0,0 +1,369 @@
|
||||
/**
|
||||
* Asserts the Axiom telemetry payload emitted by createDomainGateway() —
|
||||
* specifically the four fields the round-1 Codex review flagged:
|
||||
*
|
||||
* - domain (must be 'shipping' for /api/v2/shipping/* routes, not 'v2')
|
||||
* - customer_id (must be populated on legacy premium bearer-token success)
|
||||
* - auth_kind (must reflect the resolved identity, not stay 'anon')
|
||||
* - tier (recorded when entitlement-gated routes succeed; covered indirectly
|
||||
* by the legacy bearer success case via the Dodo `tier` branch)
|
||||
*
|
||||
* Strategy: enable telemetry (USAGE_TELEMETRY=1 + AXIOM_API_TOKEN=fake), stub
|
||||
* globalThis.fetch to intercept the Axiom ingest POST, and pass a real ctx
|
||||
* whose waitUntil collects the in-flight Promises so we can await them after
|
||||
* the gateway returns.
|
||||
*/
|
||||
|
||||
import assert from 'node:assert/strict';
|
||||
import { createServer, type Server } from 'node:http';
|
||||
import { afterEach, before, after, describe, it } from 'node:test';
|
||||
import { generateKeyPair, exportJWK, SignJWT } from 'jose';
|
||||
|
||||
import { createDomainGateway, type GatewayCtx } from '../server/gateway.ts';
|
||||
|
||||
interface CapturedEvent {
|
||||
event_type: string;
|
||||
domain: string;
|
||||
route: string;
|
||||
status: number;
|
||||
customer_id: string | null;
|
||||
auth_kind: string;
|
||||
tier: number;
|
||||
}
|
||||
|
||||
function makeRecordingCtx(): { ctx: GatewayCtx; settled: Promise<void> } {
|
||||
const pending: Promise<unknown>[] = [];
|
||||
const ctx: GatewayCtx = {
|
||||
waitUntil: (p) => { pending.push(p); },
|
||||
};
|
||||
// Quiescence loop: emitUsageEvents calls ctx.waitUntil from inside an
|
||||
// already-pending waitUntil promise, so the array grows during drain.
|
||||
// Keep awaiting until no new entries appear between iterations.
|
||||
async function settled(): Promise<void> {
|
||||
let prev = -1;
|
||||
while (pending.length !== prev) {
|
||||
prev = pending.length;
|
||||
await Promise.allSettled(pending.slice(0, prev));
|
||||
}
|
||||
}
|
||||
return {
|
||||
ctx,
|
||||
get settled() { return settled(); },
|
||||
} as { ctx: GatewayCtx; settled: Promise<void> };
|
||||
}
|
||||
|
||||
function installAxiomFetchSpy(
|
||||
originalFetch: typeof fetch,
|
||||
opts: { entitlementsResponse?: unknown } = {},
|
||||
): {
|
||||
events: CapturedEvent[];
|
||||
restore: () => void;
|
||||
} {
|
||||
const events: CapturedEvent[] = [];
|
||||
globalThis.fetch = (async (input: RequestInfo | URL, init?: RequestInit) => {
|
||||
const url = typeof input === 'string' ? input : input instanceof URL ? input.toString() : input.url;
|
||||
if (url.includes('api.axiom.co')) {
|
||||
const body = init?.body ? JSON.parse(init.body as string) as CapturedEvent[] : [];
|
||||
for (const ev of body) events.push(ev);
|
||||
return new Response('{}', { status: 200 });
|
||||
}
|
||||
if (url.includes('/api/internal-entitlements')) {
|
||||
return new Response(JSON.stringify(opts.entitlementsResponse ?? null), {
|
||||
status: 200,
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
});
|
||||
}
|
||||
return originalFetch(input as Request | string | URL, init);
|
||||
}) as typeof fetch;
|
||||
return { events, restore: () => { globalThis.fetch = originalFetch; } };
|
||||
}
|
||||
|
||||
const ORIGINAL_FETCH = globalThis.fetch;
|
||||
const ORIGINAL_USAGE_FLAG = process.env.USAGE_TELEMETRY;
|
||||
const ORIGINAL_AXIOM_TOKEN = process.env.AXIOM_API_TOKEN;
|
||||
const ORIGINAL_VALID_KEYS = process.env.WORLDMONITOR_VALID_KEYS;
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = ORIGINAL_FETCH;
|
||||
if (ORIGINAL_USAGE_FLAG == null) delete process.env.USAGE_TELEMETRY;
|
||||
else process.env.USAGE_TELEMETRY = ORIGINAL_USAGE_FLAG;
|
||||
if (ORIGINAL_AXIOM_TOKEN == null) delete process.env.AXIOM_API_TOKEN;
|
||||
else process.env.AXIOM_API_TOKEN = ORIGINAL_AXIOM_TOKEN;
|
||||
if (ORIGINAL_VALID_KEYS == null) delete process.env.WORLDMONITOR_VALID_KEYS;
|
||||
else process.env.WORLDMONITOR_VALID_KEYS = ORIGINAL_VALID_KEYS;
|
||||
});
|
||||
|
||||
describe('gateway telemetry payload — domain extraction', () => {
|
||||
it("emits domain='shipping' for /api/v2/shipping/* routes (not 'v2')", async () => {
|
||||
process.env.USAGE_TELEMETRY = '1';
|
||||
process.env.AXIOM_API_TOKEN = 'test-token';
|
||||
const spy = installAxiomFetchSpy(ORIGINAL_FETCH);
|
||||
|
||||
const handler = createDomainGateway([
|
||||
{
|
||||
method: 'GET',
|
||||
path: '/api/v2/shipping/route-intelligence',
|
||||
handler: async () => new Response('{"ok":true}', { status: 200 }),
|
||||
},
|
||||
]);
|
||||
|
||||
const recorder = makeRecordingCtx();
|
||||
const res = await handler(
|
||||
new Request('https://worldmonitor.app/api/v2/shipping/route-intelligence', {
|
||||
headers: { Origin: 'https://worldmonitor.app' },
|
||||
}),
|
||||
recorder.ctx,
|
||||
);
|
||||
// Anonymous → 401 (premium path, missing API key + no bearer)
|
||||
assert.equal(res.status, 401);
|
||||
|
||||
await recorder.settled;
|
||||
spy.restore();
|
||||
|
||||
assert.equal(spy.events.length, 1, 'expected exactly one telemetry event');
|
||||
const ev = spy.events[0]!;
|
||||
assert.equal(ev.domain, 'shipping', `domain should strip leading vN segment, got '${ev.domain}'`);
|
||||
assert.equal(ev.route, '/api/v2/shipping/route-intelligence');
|
||||
assert.equal(ev.auth_kind, 'anon');
|
||||
assert.equal(ev.customer_id, null);
|
||||
assert.equal(ev.tier, 0);
|
||||
});
|
||||
|
||||
it("emits domain='market' for the standard /api/<domain>/v1/<rpc> layout", async () => {
|
||||
process.env.USAGE_TELEMETRY = '1';
|
||||
process.env.AXIOM_API_TOKEN = 'test-token';
|
||||
const spy = installAxiomFetchSpy(ORIGINAL_FETCH);
|
||||
|
||||
const handler = createDomainGateway([
|
||||
{
|
||||
method: 'GET',
|
||||
path: '/api/market/v1/list-market-quotes',
|
||||
handler: async () => new Response('{"ok":true}', { status: 200 }),
|
||||
},
|
||||
]);
|
||||
|
||||
const recorder = makeRecordingCtx();
|
||||
const res = await handler(
|
||||
new Request('https://worldmonitor.app/api/market/v1/list-market-quotes?symbols=AAPL', {
|
||||
headers: { Origin: 'https://worldmonitor.app' },
|
||||
}),
|
||||
recorder.ctx,
|
||||
);
|
||||
assert.equal(res.status, 200);
|
||||
|
||||
await recorder.settled;
|
||||
spy.restore();
|
||||
|
||||
assert.equal(spy.events.length, 1);
|
||||
assert.equal(spy.events[0]!.domain, 'market');
|
||||
});
|
||||
});
|
||||
|
||||
describe('gateway telemetry payload — bearer identity propagation', () => {
|
||||
let privateKey: CryptoKey;
|
||||
let jwksServer: Server;
|
||||
let jwksPort: number;
|
||||
|
||||
before(async () => {
|
||||
const { publicKey, privateKey: pk } = await generateKeyPair('RS256');
|
||||
privateKey = pk;
|
||||
|
||||
const publicJwk = await exportJWK(publicKey);
|
||||
publicJwk.kid = 'telemetry-key-1';
|
||||
publicJwk.alg = 'RS256';
|
||||
publicJwk.use = 'sig';
|
||||
const jwks = { keys: [publicJwk] };
|
||||
|
||||
jwksServer = createServer((req, res) => {
|
||||
if (req.url === '/.well-known/jwks.json') {
|
||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||
res.end(JSON.stringify(jwks));
|
||||
} else {
|
||||
res.writeHead(404);
|
||||
res.end();
|
||||
}
|
||||
});
|
||||
await new Promise<void>((resolve) => jwksServer.listen(0, '127.0.0.1', () => resolve()));
|
||||
const addr = jwksServer.address();
|
||||
jwksPort = typeof addr === 'object' && addr ? addr.port : 0;
|
||||
process.env.CLERK_JWT_ISSUER_DOMAIN = `http://127.0.0.1:${jwksPort}`;
|
||||
});
|
||||
|
||||
after(async () => {
|
||||
jwksServer?.close();
|
||||
delete process.env.CLERK_JWT_ISSUER_DOMAIN;
|
||||
});
|
||||
|
||||
function signToken(claims: Record<string, unknown>) {
|
||||
return new SignJWT(claims)
|
||||
.setProtectedHeader({ alg: 'RS256', kid: 'telemetry-key-1' })
|
||||
.setIssuer(`http://127.0.0.1:${jwksPort}`)
|
||||
.setAudience('convex')
|
||||
.setSubject(claims.sub as string ?? 'user_test')
|
||||
.setIssuedAt()
|
||||
.setExpirationTime('1h')
|
||||
.sign(privateKey);
|
||||
}
|
||||
|
||||
it('records customer_id from a successful legacy premium bearer call', async () => {
|
||||
process.env.USAGE_TELEMETRY = '1';
|
||||
process.env.AXIOM_API_TOKEN = 'test-token';
|
||||
const spy = installAxiomFetchSpy(ORIGINAL_FETCH);
|
||||
|
||||
const handler = createDomainGateway([
|
||||
{
|
||||
method: 'GET',
|
||||
path: '/api/resilience/v1/get-resilience-score',
|
||||
handler: async () => new Response('{"ok":true}', { status: 200 }),
|
||||
},
|
||||
]);
|
||||
|
||||
const token = await signToken({ sub: 'user_pro', plan: 'pro' });
|
||||
const recorder = makeRecordingCtx();
|
||||
const res = await handler(
|
||||
new Request('https://worldmonitor.app/api/resilience/v1/get-resilience-score?countryCode=US', {
|
||||
headers: {
|
||||
Origin: 'https://worldmonitor.app',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
}),
|
||||
recorder.ctx,
|
||||
);
|
||||
assert.equal(res.status, 200);
|
||||
|
||||
await recorder.settled;
|
||||
spy.restore();
|
||||
|
||||
assert.equal(spy.events.length, 1, 'expected exactly one telemetry event');
|
||||
const ev = spy.events[0]!;
|
||||
// The whole point of fix #2: pre-fix this would have been null/anon.
|
||||
assert.equal(ev.customer_id, 'user_pro', 'customer_id should be the bearer subject');
|
||||
assert.equal(ev.auth_kind, 'clerk_jwt');
|
||||
assert.equal(ev.domain, 'resilience');
|
||||
assert.equal(ev.status, 200);
|
||||
});
|
||||
|
||||
it("records tier=2 for an entitlement-gated success (the path the round-1 P2 fix targets)", async () => {
|
||||
// /api/market/v1/analyze-stock requires tier 2 in ENDPOINT_ENTITLEMENTS.
|
||||
// Pre-fix: usage.tier stayed null → emitted as 0. Post-fix: gateway re-reads
|
||||
// entitlements after checkEntitlement allows the request, so tier=2 lands on
|
||||
// the wire. We exercise this by stubbing the Convex entitlements fallback —
|
||||
// Redis returns null without UPSTASH env, then getEntitlements falls through
|
||||
// to the Convex HTTP path which we intercept via the same fetch spy.
|
||||
process.env.USAGE_TELEMETRY = '1';
|
||||
process.env.AXIOM_API_TOKEN = 'test-token';
|
||||
process.env.CONVEX_SITE_URL = 'https://convex.test';
|
||||
process.env.CONVEX_SERVER_SHARED_SECRET = 'test-shared-secret';
|
||||
|
||||
const fakeEntitlements = {
|
||||
planKey: 'api_starter',
|
||||
features: {
|
||||
tier: 2,
|
||||
apiAccess: true,
|
||||
apiRateLimit: 1000,
|
||||
maxDashboards: 10,
|
||||
prioritySupport: false,
|
||||
exportFormats: ['json'],
|
||||
},
|
||||
validUntil: Date.now() + 60_000,
|
||||
};
|
||||
const spy = installAxiomFetchSpy(ORIGINAL_FETCH, { entitlementsResponse: fakeEntitlements });
|
||||
|
||||
const handler = createDomainGateway([
|
||||
{
|
||||
method: 'GET',
|
||||
path: '/api/market/v1/analyze-stock',
|
||||
handler: async () => new Response('{"ok":true}', { status: 200 }),
|
||||
},
|
||||
]);
|
||||
|
||||
// plan: 'api' so the legacy bearer-role short-circuit (`session.role === 'pro'`)
|
||||
// does NOT fire — we want the entitlement-check path that populates usage.tier.
|
||||
const token = await signToken({ sub: 'user_api', plan: 'api' });
|
||||
const recorder = makeRecordingCtx();
|
||||
const res = await handler(
|
||||
new Request('https://worldmonitor.app/api/market/v1/analyze-stock?symbol=AAPL', {
|
||||
headers: {
|
||||
Origin: 'https://worldmonitor.app',
|
||||
Authorization: `Bearer ${token}`,
|
||||
},
|
||||
}),
|
||||
recorder.ctx,
|
||||
);
|
||||
assert.equal(res.status, 200, 'entitlement-gated request with sufficient tier should succeed');
|
||||
|
||||
await recorder.settled;
|
||||
spy.restore();
|
||||
delete process.env.CONVEX_SITE_URL;
|
||||
delete process.env.CONVEX_SERVER_SHARED_SECRET;
|
||||
|
||||
assert.equal(spy.events.length, 1);
|
||||
const ev = spy.events[0]!;
|
||||
assert.equal(ev.tier, 2, `tier should reflect resolved entitlement, got ${ev.tier}`);
|
||||
assert.equal(ev.customer_id, 'user_api');
|
||||
assert.equal(ev.auth_kind, 'clerk_jwt');
|
||||
assert.equal(ev.domain, 'market');
|
||||
assert.equal(ev.route, '/api/market/v1/analyze-stock');
|
||||
});
|
||||
|
||||
it('still emits with auth_kind=anon when the bearer is invalid', async () => {
|
||||
process.env.USAGE_TELEMETRY = '1';
|
||||
process.env.AXIOM_API_TOKEN = 'test-token';
|
||||
const spy = installAxiomFetchSpy(ORIGINAL_FETCH);
|
||||
|
||||
const handler = createDomainGateway([
|
||||
{
|
||||
method: 'GET',
|
||||
path: '/api/resilience/v1/get-resilience-score',
|
||||
handler: async () => new Response('{"ok":true}', { status: 200 }),
|
||||
},
|
||||
]);
|
||||
|
||||
const recorder = makeRecordingCtx();
|
||||
const res = await handler(
|
||||
new Request('https://worldmonitor.app/api/resilience/v1/get-resilience-score?countryCode=US', {
|
||||
headers: {
|
||||
Origin: 'https://worldmonitor.app',
|
||||
Authorization: 'Bearer not-a-real-token',
|
||||
},
|
||||
}),
|
||||
recorder.ctx,
|
||||
);
|
||||
assert.equal(res.status, 401);
|
||||
|
||||
await recorder.settled;
|
||||
spy.restore();
|
||||
|
||||
assert.equal(spy.events.length, 1);
|
||||
const ev = spy.events[0]!;
|
||||
assert.equal(ev.auth_kind, 'anon');
|
||||
assert.equal(ev.customer_id, null);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gateway telemetry payload — ctx-optional safety', () => {
|
||||
it('handler(req) without ctx still resolves cleanly even with telemetry on', async () => {
|
||||
process.env.USAGE_TELEMETRY = '1';
|
||||
process.env.AXIOM_API_TOKEN = 'test-token';
|
||||
const spy = installAxiomFetchSpy(ORIGINAL_FETCH);
|
||||
|
||||
const handler = createDomainGateway([
|
||||
{
|
||||
method: 'GET',
|
||||
path: '/api/market/v1/list-market-quotes',
|
||||
handler: async () => new Response('{"ok":true}', { status: 200 }),
|
||||
},
|
||||
]);
|
||||
|
||||
const res = await handler(
|
||||
new Request('https://worldmonitor.app/api/market/v1/list-market-quotes?symbols=AAPL', {
|
||||
headers: { Origin: 'https://worldmonitor.app' },
|
||||
}),
|
||||
);
|
||||
assert.equal(res.status, 200);
|
||||
spy.restore();
|
||||
// No ctx → emit short-circuits → no events delivered. The point is that
|
||||
// the handler does not throw "Cannot read properties of undefined".
|
||||
assert.equal(spy.events.length, 0);
|
||||
});
|
||||
});
|
||||
@@ -87,7 +87,7 @@
|
||||
{ "key": "Strict-Transport-Security", "value": "max-age=63072000; includeSubDomains; preload" },
|
||||
{ "key": "Referrer-Policy", "value": "strict-origin-when-cross-origin" },
|
||||
{ "key": "Permissions-Policy", "value": "camera=(), microphone=(), geolocation=(self), accelerometer=(), autoplay=(self \"https://www.youtube.com\" \"https://www.youtube-nocookie.com\"), bluetooth=(), display-capture=(), encrypted-media=(self \"https://www.youtube.com\" \"https://www.youtube-nocookie.com\"), gyroscope=(), hid=(), idle-detection=(), magnetometer=(), midi=(), payment=(self \"https://checkout.dodopayments.com\" \"https://test.checkout.dodopayments.com\" \"https://pay.google.com\" \"https://hooks.stripe.com\" \"https://js.stripe.com\"), picture-in-picture=(self \"https://www.youtube.com\" \"https://www.youtube-nocookie.com\" \"https://challenges.cloudflare.com\"), screen-wake-lock=(), serial=(), usb=(), xr-spatial-tracking=(\"https://challenges.cloudflare.com\")" },
|
||||
{ "key": "Content-Security-Policy", "value": "default-src 'self'; connect-src 'self' https: wss: blob: data: https://*.ingest.sentry.io https://*.ingest.us.sentry.io; img-src 'self' data: blob: https:; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; script-src 'self' 'sha256-LnMFPWZxTgVOr2VYwIh9mhQ3l/l3+a3SfNOLERnuHfY=' 'sha256-4Z2xtr1B9QQugoojE/nbpOViG+8l2B7CZVlKgC78AeQ=' 'sha256-903UI9my1I7mqHoiVeZSc56yd50YoRJTB2269QqL76w=' 'sha256-EytE6o1N8rwzpVFMrF+WvBZr2y5UhFLw79o1/4VqS0s=' 'wasm-unsafe-eval' https://www.youtube.com https://static.cloudflareinsights.com https://vercel.live https://challenges.cloudflare.com https://*.clerk.accounts.dev https://abacus.worldmonitor.app https://*.dodopayments.com https://js.stripe.com; worker-src 'self' blob:; font-src 'self' data: https:; media-src 'self' data: blob: https:; frame-src 'self' https://worldmonitor.app https://tech.worldmonitor.app https://finance.worldmonitor.app https://commodity.worldmonitor.app https://happy.worldmonitor.app https://www.youtube.com https://www.youtube-nocookie.com https://www.google.com https://webcams.windy.com https://challenges.cloudflare.com https://*.clerk.accounts.dev https://vercel.live https://*.vercel.app https://*.dodopayments.com https://pay.google.com https://hooks.stripe.com https://js.stripe.com; frame-ancestors 'self' https://www.worldmonitor.app https://tech.worldmonitor.app https://finance.worldmonitor.app https://commodity.worldmonitor.app https://happy.worldmonitor.app https://worldmonitor.app https://vercel.live https://*.vercel.app; base-uri 'self'; object-src 'none'; form-action 'self' https://api.worldmonitor.app" }
|
||||
{ "key": "Content-Security-Policy", "value": "default-src 'self'; connect-src 'self' https: wss: blob: data: https://*.ingest.sentry.io https://*.ingest.us.sentry.io; img-src 'self' data: blob: https:; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; script-src 'self' 'sha256-LnMFPWZxTgVOr2VYwIh9mhQ3l/l3+a3SfNOLERnuHfY=' 'sha256-4Z2xtr1B9QQugoojE/nbpOViG+8l2B7CZVlKgC78AeQ=' 'sha256-903UI9my1I7mqHoiVeZSc56yd50YoRJTB2269QqL76w=' 'sha256-EytE6o1N8rwzpVFMrF+WvBZr2y5UhFLw79o1/4VqS0s=' 'wasm-unsafe-eval' https://www.youtube.com https://static.cloudflareinsights.com https://vercel.live https://challenges.cloudflare.com https://*.clerk.accounts.dev https://abacus.worldmonitor.app https://*.dodopayments.com https://js.stripe.com; worker-src 'self' blob:; font-src 'self' data: https:; media-src 'self' data: blob: https:; frame-src 'self' https://worldmonitor.app https://tech.worldmonitor.app https://finance.worldmonitor.app https://commodity.worldmonitor.app https://happy.worldmonitor.app https://energy.worldmonitor.app https://www.youtube.com https://www.youtube-nocookie.com https://www.google.com https://webcams.windy.com https://challenges.cloudflare.com https://*.clerk.accounts.dev https://vercel.live https://*.vercel.app https://*.dodopayments.com https://pay.google.com https://hooks.stripe.com https://js.stripe.com; frame-ancestors 'self' https://www.worldmonitor.app https://tech.worldmonitor.app https://finance.worldmonitor.app https://commodity.worldmonitor.app https://happy.worldmonitor.app https://energy.worldmonitor.app https://worldmonitor.app https://vercel.live https://*.vercel.app; base-uri 'self'; object-src 'none'; form-action 'self' https://api.worldmonitor.app" }
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||