diff --git a/api/health.js b/api/health.js index bc200e26c..f465a5c3c 100644 --- a/api/health.js +++ b/api/health.js @@ -161,7 +161,7 @@ const STANDALONE_KEYS = { pizzint: 'intelligence:pizzint:seed:v1', resilienceStaticIndex: 'resilience:static:index:v1', resilienceStaticFao: 'resilience:static:fao', - resilienceRanking: 'resilience:ranking:v10', + resilienceRanking: 'resilience:ranking:v11', productCatalog: 'product-catalog:v2', energySpineCountries: 'energy:spine:v1:_countries', energyExposure: 'energy:exposure:v1:index', diff --git a/docs/methodology/country-resilience-index.mdx b/docs/methodology/country-resilience-index.mdx index 80a86fcf4..241009aa9 100644 --- a/docs/methodology/country-resilience-index.mdx +++ b/docs/methodology/country-resilience-index.mdx @@ -227,6 +227,31 @@ All six WGI indicators are equally weighted. This domain forms the recovery-capacity pillar. It measures a country's ability to bounce back from an acute shock along fiscal, monetary, trade, institutional, and energy dimensions. +**Per-dimension weights in the recovery domain (PR 2 §3.4).** Four +core recovery dimensions (`fiscalSpace`, `externalDebtCoverage`, +`importConcentration`, `stateContinuity`) carry the default weight +`1.0`. The two PR 2 §3.4 replacements for the retired `reserveAdequacy` +carry weight `0.5` each: + +| Dimension | Weight | Share at full coverage | +|---|---:|---:| +| fiscalSpace | 1.0 | 20% | +| externalDebtCoverage | 1.0 | 20% | +| importConcentration | 1.0 | 20% | +| stateContinuity | 1.0 | 20% | +| liquidReserveAdequacy | 0.5 | 10% | +| sovereignFiscalBuffer | 0.5 | 10% | + +The `0.5` weight on the two new dims caps their combined contribution +to the recovery score at ~20%, matching the plan's direction that the +sovereign-wealth signal complement — rather than dominate — the +classical liquid-reserves and fiscal-space signals. The weights are +applied via `RESILIENCE_DIMENSION_WEIGHTS` in +`server/worldmonitor/resilience/v1/_dimension-scorers.ts`; +`coverageWeightedMean` in `_shared.ts` multiplies each dim's coverage +by its weight before computing the domain average, so a dim with +`coverage=0` (retirement) still contributes zero regardless of weight. + #### Fiscal Space | Indicator | Description | Direction | Goalposts (worst-best) | Weight | Source | Cadence | @@ -462,9 +487,9 @@ The CRI is designed to be auditable end-to-end: given the Redis snapshot at any | Key | Type | TTL | Written by | Read by | |---|---|---|---|---| -| `resilience:score:v10:{countryCode}` | JSON | 6 hours | `buildResilienceScore` in `server/worldmonitor/resilience/v1/_shared.ts` | `getResilienceScore` handler | -| `resilience:ranking:v10` | JSON | 6 hours | `buildResilienceRanking`, only when all countries are scored | `getResilienceRanking` handler | -| `resilience:history:v5:{countryCode}` | sorted set | indefinite, trimmed to 30 days | `appendHistory` during scoring | trend and `change30d` computation | +| `resilience:score:v11:{countryCode}` | JSON | 6 hours | `buildResilienceScore` in `server/worldmonitor/resilience/v1/_shared.ts` | `getResilienceScore` handler | +| `resilience:ranking:v11` | JSON | 6 hours | `buildResilienceRanking`, only when all countries are scored | `getResilienceRanking` handler | +| `resilience:history:v6:{countryCode}` | sorted set | indefinite, trimmed to 30 days | `appendHistory` during scoring | trend and `change30d` computation | | `resilience:intervals:v1:{countryCode}` | JSON | 6 hours | `scripts/seed-resilience-intervals.mjs` | `getResilienceScore` (optional `scoreInterval` field) | | `seed-meta:resilience:static` | JSON | 2 hours | `scripts/seed-resilience-static.mjs` at the end of each successful seed run | scorer for `dataVersion` population, health checks | | `resilience:static:{countryCode}` | JSON | 400 days | `scripts/seed-resilience-static.mjs` | scorer for all baseline signals (WGI, WHO, FAO, GPI, RSF, and so on) | @@ -573,12 +598,12 @@ The plan's non-compensatory pillar combine is the methodologically stronger form **Activation sequence**: the rank-stability evidence supports flipping the default — there is no statistical reason to keep the legacy compensatory form. The blocker is messaging: publishing "US = 54.50" the day after publishing "US = 68.26" without a methodology note would look like a regression instead of a rigor upgrade. The pillar-combine activation PR wires the following so the flip is a single env-var change with no code deploy required: 1. **Feature flag**: `RESILIENCE_PILLAR_COMBINE_ENABLED`, read dynamically from `process.env` per call. Default `false`. Set to `true` in Vercel env + Railway env to activate. -2. **Cache invalidation**: per-country score cache bumped from `resilience:score:v9:` to `resilience:score:v10:`, ranking cache bumped from `resilience:ranking:v9` to `resilience:ranking:v10`, and score-history bumped from `resilience:history:v4:` to `resilience:history:v5:`. The version bumps are a clean-slate guard; the actual cross-formula isolation is the `_formula` tag written into every cached score / ranking payload and the `:d6` / `:pc` suffix on every history sorted-set member, checked at read time so a flag flip forces a rebuild without waiting for TTLs. +2. **Cache invalidation**: per-country score cache bumped from `resilience:score:v9:` to `resilience:score:v10:`, ranking cache bumped from `resilience:ranking:v9` to `resilience:ranking:v10`, and score-history bumped from `resilience:history:v4:` to `resilience:history:v5:` (subsequently bumped to `resilience:score:v11:`, `resilience:ranking:v11`, and `resilience:history:v6:` in the recovery-domain weight rebalance — see the Redis keys table above for current values). The version bumps are a clean-slate guard; the actual cross-formula isolation is the `_formula` tag written into every cached score / ranking payload and the `:d6` / `:pc` suffix on every history sorted-set member, checked at read time so a flag flip forces a rebuild without waiting for TTLs. 3. **Methodology-aware level thresholds**: `classifyResilienceLevel` reads `isPillarCombineEnabled()` and switches the high/medium cutoffs from 70/40 (6-domain) to 60/30 (pillar-combined). Without this, scale compression alone would demote FI (75.64 → 68.60) and NZ (76.26 → 67.93) from "high" to "medium" purely because the formula changed, not because anything about the country changed. The re-anchored cutoffs preserve the qualitative label for every country whose old label was correct. 4. **Re-anchored release-gate bands**: `tests/resilience-pillar-combine-activation.test.mts` pins high-band anchors (NO, CH, DK) at ≥ 60 (vs the 6-domain formula's ≥ 70 floor) and low-band anchors (YE, SO) at ≤ 40 (vs ≤ 45). The snapshot test reads `methodologyFormula` from each snapshot and applies the matching bands. The live sample numbers confirm the bands hold with margin: NO proposed ≈ 71.59 (≥ 60 by 11 points), YE ≈ 27.36 (≤ 40 by 13 points). 5. **Projected snapshot**: `docs/snapshots/resilience-ranking-pillar-combined-projected-2026-04-21.json` carries the top/bottom/major-economies tables at the proposed formula so reviewers can preview the post-activation ranking before flipping the flag. Once the flag is on in production, run `scripts/freeze-resilience-ranking.mjs` to capture the authoritative full-universe snapshot. -Rollback: set `RESILIENCE_PILLAR_COMBINE_ENABLED=false`, flush the `resilience:score:v10:*`, `resilience:ranking:v10`, and `resilience:history:v5:*` keys (or wait for TTLs to expire). The 6-domain formula lives alongside the pillar combine in `_shared.ts` and needs no code change to come back. +Rollback: set `RESILIENCE_PILLAR_COMBINE_ENABLED=false`, flush the current `resilience:score:v11:*`, `resilience:ranking:v11`, and `resilience:history:v6:*` keys (or wait for TTLs to expire). The 6-domain formula lives alongside the pillar combine in `_shared.ts` and needs no code change to come back. Until operators set the flag, `overall_score` remains the 6-domain weighted aggregate documented above. @@ -615,7 +640,7 @@ Self-assessed against the standard composite-indicator review axes on a 0-10 sca - **§3.5 point 1 — `fuelStockDays` permanently retired from the core score.** IEA/EIA fuel-stock disclosure covers ~45 OECD-member countries; every other country was imputed `unmonitored`. `scoreFuelStockDays` now pins at `score=50, coverage=0, imputationClass=null` for every country. Coverage-weighted domain aggregation excludes it (coverage=0 contributes zero weight), and user-facing confidence / coverage averages exclude it via the `RESILIENCE_RETIRED_DIMENSIONS` registry filter (distinct from non-retired runtime coverage=0 entries, which must keep dragging confidence down — that is the sparse-data signal). `imputationClass=null` (not `source-failure`) because retirement is structural, not a runtime outage; `source-failure` would render a false "Source down" label in the widget on every country. The `recoveryFuelStockDays` registry entry remains (tier=`experimental`) so the data surfaces on IEA-member drill-downs. Re-retention requires a globally-comparable strategic-reserve disclosure concept (>180 countries) to emerge. - **§3.5 point 2 — `currencyExternal` rebuilt on IMF inflation + WB reserves.** BIS REER / DSR covered only the 64 BIS-reporting economies; the old composite fell through to curated_list_absent (coverage 0.3) or a thin IMF proxy (coverage 0.45) for ~130 of 195 countries. New dimension: `inflationStability` (IMF WEO headline inflation, weight 0.60) + `fxReservesAdequacy` (WB reserves in months, weight 0.40). Coverage ladder: both=0.85, inflation-only=0.55, reserves-only=0.40, neither=0.30. Legacy `fxVolatility` + `fxDeviation` kept as `tier='experimental'` on country drill-downs for the 64 BIS economies. - **§3.5 point 3 — `externalDebtCoverage` re-goalposted from (0..5) to (0..2).** The old goalpost made ratios < 0.5 all score above 90, saturating at 100 across the full 9-country probe (including stressed states). New goalpost is anchored on Greenspan-Guidotti: ratio=1.0 (short-term debt matches reserves = reserve inadequacy threshold) → score 50; ratio=2.0 (double the threshold = acute rollover-shock exposure) → score 0. Ratios above 2.0 clamp to 0. -- **§3.6 — Coverage-and-influence gate on indicator weight.** `tests/resilience-coverage-influence-gate.test.mts` fails the build if any core indicator with observed coverage below 70% of the ~195-country universe (<137 countries) carries more than 5% nominal weight in the overall score. The effective-influence half (variance-explained, Pearson-derivative) runs through `scripts/validate-resilience-sensitivity.mjs` and is committed as an artifact per plan §5 acceptance-criterion 9. +- **§3.6 — Coverage-and-influence gate on indicator weight.** `tests/resilience-coverage-influence-gate.test.mts` fails the build if any core indicator with observed coverage below 70% of the ~195-country universe (fewer than 137 countries) carries more than 5% nominal weight in the overall score. The effective-influence half (variance-explained, Pearson-derivative) runs through `scripts/validate-resilience-sensitivity.mjs` and is committed as an artifact per plan §5 acceptance-criterion 9. - **Acceptance gates (plan §6):** Spearman vs prior-state >= 0.85, no country swings >5 points from PR 1 state (plan §3.5 deliverable row 4), all release-gate anchors hold, matched-pair directions verified. Sensitivity rerun and post-PR-3 snapshot committed as `docs/snapshots/resilience-ranking-live-post-pr3-.json` at flag-flip/ranking-refresh time. - **Construct-audit updates:** `docs/methodology/indicator-sources.yaml` updates `recoveryDebtToReserves.constructStatus` from `dead-signal` to `observed-mechanism` citing the Greenspan-Guidotti anchor. diff --git a/scripts/backtest-resilience-outcomes.mjs b/scripts/backtest-resilience-outcomes.mjs index d3c94ab69..07e452bc2 100644 --- a/scripts/backtest-resilience-outcomes.mjs +++ b/scripts/backtest-resilience-outcomes.mjs @@ -27,7 +27,7 @@ loadEnvFile(import.meta.url); const __dirname = dirname(fileURLToPath(import.meta.url)); const VALIDATION_DIR = join(__dirname, '..', 'docs', 'methodology', 'country-resilience-index', 'validation'); -const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v10:'; +const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v11:'; // Mirror of _shared.ts#currentCacheFormula. Must stay in lockstep; see // the same comment in scripts/validate-resilience-correlation.mjs for diff --git a/scripts/benchmark-resilience-external.mjs b/scripts/benchmark-resilience-external.mjs index ee67f66e1..58f361197 100644 --- a/scripts/benchmark-resilience-external.mjs +++ b/scripts/benchmark-resilience-external.mjs @@ -374,7 +374,7 @@ function currentCacheFormulaLocal() { async function readWmScoresFromRedis() { const { url, token } = getRedisCredentials(); - const rankingResp = await fetch(`${url}/get/${encodeURIComponent('resilience:ranking:v10')}`, { + const rankingResp = await fetch(`${url}/get/${encodeURIComponent('resilience:ranking:v11')}`, { headers: { Authorization: `Bearer ${token}` }, signal: AbortSignal.timeout(10_000), }); diff --git a/scripts/compare-resilience-current-vs-proposed.mjs b/scripts/compare-resilience-current-vs-proposed.mjs index 08a5d149c..c40aea8db 100644 --- a/scripts/compare-resilience-current-vs-proposed.mjs +++ b/scripts/compare-resilience-current-vs-proposed.mjs @@ -44,18 +44,32 @@ loadEnvFile(import.meta.url); // score — fail-loud instead of silent drop). // Mirrors `_shared.ts#coverageWeightedMean`. Kept local because the -// production helper is not exported. -function coverageWeightedMean(dims) { - const totalCoverage = dims.reduce((s, d) => s + d.coverage, 0); - if (!totalCoverage) return 0; - return dims.reduce((s, d) => s + d.score * d.coverage, 0) / totalCoverage; +// production helper is not exported. MUST stay in lockstep with +// _shared.ts — including the per-dim weight multiplier introduced in +// PR 2 §3.4 for the recovery-domain rebalance. Without the weight +// application, this harness's Spearman / rank-delta artifacts would +// silently diverge from live API scoring post-rebalance (see the +// RESILIENCE_DIMENSION_WEIGHTS source-of-truth constant). +function coverageWeightedMean(dims, dimensionWeights) { + let totalWeight = 0; + let weightedSum = 0; + for (const d of dims) { + const w = dimensionWeights[d.id] ?? 1.0; + const effective = d.coverage * w; + totalWeight += effective; + weightedSum += d.score * effective; + } + if (!totalWeight) return 0; + return weightedSum / totalWeight; } // Mirrors `_shared.ts#buildDomainList` exactly so the ResilienceDomain // objects fed to buildPillarList are byte-identical to what production // emits. The production helper is not exported, so we re-implement it -// here; the implementation MUST stay in lockstep with _shared.ts. -function buildDomainList(dimensions, dimensionDomains, domainOrder, getDomainWeight) { +// here; the implementation MUST stay in lockstep with _shared.ts — +// including the per-dim weight pass-through from +// RESILIENCE_DIMENSION_WEIGHTS (PR 2 §3.4 recovery rebalance). +function buildDomainList(dimensions, dimensionDomains, domainOrder, getDomainWeight, dimensionWeights) { const grouped = new Map(); for (const domainId of domainOrder) grouped.set(domainId, []); for (const dim of dimensions) { @@ -64,7 +78,7 @@ function buildDomainList(dimensions, dimensionDomains, domainOrder, getDomainWei } return domainOrder.map((domainId) => { const domainDims = grouped.get(domainId) ?? []; - const domainScore = coverageWeightedMean(domainDims); + const domainScore = coverageWeightedMean(domainDims, dimensionWeights); return { id: domainId, score: Math.round(domainScore * 100) / 100, @@ -688,6 +702,14 @@ async function main() { scoreAllDimensions, RESILIENCE_DIMENSION_ORDER, RESILIENCE_DIMENSION_DOMAINS, + // PR 2 §3.4 recovery-domain rebalance: per-dim weights applied + // inside coverageWeightedMean so the harness's domain scores, + // overall score, and Spearman / rank-delta artifacts track live + // scoring after the rebalance. Missing entries default to 1.0 in + // the mirror functions above (same as production), so this import + // is authoritative if present and forward-compatible if a future + // refactor renames / removes the constant. + RESILIENCE_DIMENSION_WEIGHTS, getResilienceDomainWeight, RESILIENCE_DOMAIN_ORDER, createMemoizedSeedReader, @@ -805,12 +827,18 @@ async function main() { freshness: { lastObservedAtMs: '0', staleness: '' }, })); - // Build domains and pillars with the EXACT production aggregation. + // Build domains and pillars with the EXACT production aggregation + // — including the per-dim weight channel (PR 2 §3.4 recovery + // rebalance). RESILIENCE_DIMENSION_WEIGHTS is passed through so + // this harness's Spearman / rank-delta artifacts reflect live + // scoring. The mirror `coverageWeightedMean` above defaults any + // missing id to 1.0 (same contract as production). const domains = buildDomainList( dimensions, RESILIENCE_DIMENSION_DOMAINS, RESILIENCE_DOMAIN_ORDER, getResilienceDomainWeight, + RESILIENCE_DIMENSION_WEIGHTS, ); // Current production overallScore: Σ domain.score * domain.weight diff --git a/scripts/seed-resilience-scores.mjs b/scripts/seed-resilience-scores.mjs index 8012c0243..a51ebd828 100644 --- a/scripts/seed-resilience-scores.mjs +++ b/scripts/seed-resilience-scores.mjs @@ -19,8 +19,12 @@ const WM_KEY = process.env.WORLDMONITOR_API_KEY || ''; const SEED_UA = 'Mozilla/5.0 (compatible; WorldMonitor-Seed/1.0)'; -export const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v10:'; -export const RESILIENCE_RANKING_CACHE_KEY = 'resilience:ranking:v10'; +// Bumped v10 → v11 in lockstep with server/worldmonitor/resilience/v1/ +// _shared.ts for the PR 2 §3.4 recovery-domain weight rebalance. +// Seeder and server MUST agree on the prefix or the seeder writes +// scores the handler will never read. +export const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v11:'; +export const RESILIENCE_RANKING_CACHE_KEY = 'resilience:ranking:v11'; // Must match the server-side RESILIENCE_RANKING_CACHE_TTL_SECONDS. Extended // to 12h (2x the cron interval) so a missed/slow cron can't create an // EMPTY_ON_DEMAND gap before the next successful rebuild. diff --git a/scripts/validate-resilience-backtest.mjs b/scripts/validate-resilience-backtest.mjs index 753ffd87d..2bfe130a4 100644 --- a/scripts/validate-resilience-backtest.mjs +++ b/scripts/validate-resilience-backtest.mjs @@ -27,7 +27,7 @@ import { unwrapEnvelope } from './_seed-envelope-source.mjs'; loadEnvFile(import.meta.url); // Source of truth: server/worldmonitor/resilience/v1/_shared.ts -const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v10:'; +const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v11:'; // Mirror of _shared.ts#currentCacheFormula — must stay in lockstep so // the backtest only ingests same-formula cache entries. A mixed-formula diff --git a/scripts/validate-resilience-correlation.mjs b/scripts/validate-resilience-correlation.mjs index 4e69ea015..72a8e3414 100644 --- a/scripts/validate-resilience-correlation.mjs +++ b/scripts/validate-resilience-correlation.mjs @@ -3,7 +3,7 @@ import { loadEnvFile, getRedisCredentials } from './_seed-utils.mjs'; // Source of truth: server/worldmonitor/resilience/v1/_shared.ts → RESILIENCE_SCORE_CACHE_PREFIX -const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v10:'; +const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v11:'; // Mirror of server/worldmonitor/resilience/v1/_shared.ts#currentCacheFormula. // Must stay in lockstep with the server-side definition so this script diff --git a/server/worldmonitor/resilience/v1/_dimension-scorers.ts b/server/worldmonitor/resilience/v1/_dimension-scorers.ts index 347be86b6..5a4a7a53b 100644 --- a/server/worldmonitor/resilience/v1/_dimension-scorers.ts +++ b/server/worldmonitor/resilience/v1/_dimension-scorers.ts @@ -359,6 +359,48 @@ const RESILIENCE_DOMAIN_WEIGHTS: Record = { recovery: 0.25, }; +// Per-dimension weight multipliers applied inside the coverage-weighted +// mean when aggregating a domain. Defaults to 1.0 (every dim gets the +// same nominal share, and the coverage-weighted mean's share-denominator +// reflects how much real data each dim contributes). +// +// PR 2 §3.4 — `liquidReserveAdequacy` and `sovereignFiscalBuffer` each +// carry 0.5 so they sit at ~10% of the recovery-domain score instead of +// the equal-share 1/6 (~16.7%) the old reserveAdequacy dim implicitly +// claimed. The plan's target: "liquidReserveAdequacy ~0.10; +// sovereignFiscalBuffer ~0.10; other recovery dimensions absorb +// residual." Math check with all 6 active recovery dims at coverage=1: +// (1.0×4 + 0.5×2) = 5.0 total weighted coverage +// new-dim share = 0.5 / 5.0 = 0.10 ✓ +// other-dim share = 1.0 / 5.0 = 0.20 (the residual-absorbed weight) +// +// Retired dims have coverage=0 and so contribute 0 to the numerator / +// denominator regardless of their weight entry; setting them to 1.0 +// here is fine and keeps the map uniform. +export const RESILIENCE_DIMENSION_WEIGHTS: Record = { + macroFiscal: 1.0, + currencyExternal: 1.0, + tradeSanctions: 1.0, + cyberDigital: 1.0, + logisticsSupply: 1.0, + infrastructure: 1.0, + energy: 1.0, + governanceInstitutional: 1.0, + socialCohesion: 1.0, + borderSecurity: 1.0, + informationCognitive: 1.0, + healthPublicService: 1.0, + foodWater: 1.0, + fiscalSpace: 1.0, + reserveAdequacy: 1.0, // retired; coverage=0 neutralizes the weight + externalDebtCoverage: 1.0, + importConcentration: 1.0, + stateContinuity: 1.0, + fuelStockDays: 1.0, // retired; coverage=0 neutralizes the weight + liquidReserveAdequacy: 0.5, // PR 2 §3.4 target ~10% recovery share + sovereignFiscalBuffer: 0.5, // PR 2 §3.4 target ~10% recovery share +}; + export const RESILIENCE_DIMENSION_DOMAINS: Record = { macroFiscal: 'economic', currencyExternal: 'economic', diff --git a/server/worldmonitor/resilience/v1/_shared.ts b/server/worldmonitor/resilience/v1/_shared.ts index 9e03b0428..84f2de7cb 100644 --- a/server/worldmonitor/resilience/v1/_shared.ts +++ b/server/worldmonitor/resilience/v1/_shared.ts @@ -16,6 +16,7 @@ import { RESILIENCE_DIMENSION_DOMAINS, RESILIENCE_DIMENSION_ORDER, RESILIENCE_DIMENSION_TYPES, + RESILIENCE_DIMENSION_WEIGHTS, RESILIENCE_DOMAIN_ORDER, RESILIENCE_RETIRED_DIMENSIONS, createMemoizedSeedReader, @@ -124,7 +125,13 @@ export const RESILIENCE_RANKING_CACHE_TTL_SECONDS = 12 * 60 * 60; // `buildResilienceScore`, read by `ensureResilienceScoreCached` and // `getCachedResilienceScores` to reject stale-formula hits at serve // time. See the `CacheFormulaTag` comment block. -export const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v10:'; +// v11 bump for PR 2 §3.4 recovery-domain weight rebalance. The +// `_formula` tag only distinguishes 'd6' vs 'pc' and does NOT detect +// intra-'d6' coefficient changes like a per-dim weight adjustment, so +// a bare flag-guard would leave pre-deploy equal-weight scores served +// for up to the full 6h TTL. Prefix bump forces a clean slate — +// matches the established v9→v10 pattern for formula-changing deploys. +export const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v11:'; // Bumped from v4 to v5 in the pillar-combined activation PR. Provides // a clean slate at PR deploy so pre-PR history points (which were // written without a formula tag) do not mix with tagged points. NOTE: @@ -136,7 +143,11 @@ export const RESILIENCE_SCORE_CACHE_PREFIX = 'resilience:score:v10:'; // untagged members (from older deploys that happen to survive on v4 // readers) decode as `d6` — matching the only formula that existed // before this PR — so the filter stays correct in either direction. -export const RESILIENCE_HISTORY_KEY_PREFIX = 'resilience:history:v5:'; +// v6 bump in lockstep with RESILIENCE_SCORE_CACHE_PREFIX v10→v11 for +// PR 2 §3.4 recovery-domain weight rebalance. Pre-bump history points +// were written against equal-weight scoring; trend + change30d math +// mixes them with post-bump points otherwise. +export const RESILIENCE_HISTORY_KEY_PREFIX = 'resilience:history:v6:'; // Bumped in lockstep with RESILIENCE_SCORE_CACHE_PREFIX (v9 → v10) for // a clean slate at PR deploy. As with the score prefix, the version // bump is a belt — the suspenders are the `_formula` tag on the @@ -144,7 +155,7 @@ export const RESILIENCE_HISTORY_KEY_PREFIX = 'resilience:history:v5:'; // via rankingCacheTagMatches in the ranking handler, which force a // recompute-and-publish on a cross-formula cache hit rather than // serving the stale ranking for up to the 12h ranking TTL. -export const RESILIENCE_RANKING_CACHE_KEY = 'resilience:ranking:v10'; +export const RESILIENCE_RANKING_CACHE_KEY = 'resilience:ranking:v11'; export const RESILIENCE_STATIC_INDEX_KEY = 'resilience:static:index:v1'; export const RESILIENCE_INTERVAL_KEY_PREFIX = 'resilience:intervals:v1:'; const RESILIENCE_STATIC_META_KEY = 'seed-meta:resilience:static'; @@ -272,10 +283,25 @@ function buildDimensionList( })); } +// Coverage-weighted mean with an optional per-dimension weight multiplier. +// Each dim's effective weight is `coverage * dimWeight`, so when all +// weights default to 1.0 this reduces to the original coverage-weighted +// mean. PR 2 §3.4 uses the weight channel to dial the two new recovery +// dims down to ~10% share (see RESILIENCE_DIMENSION_WEIGHTS in +// _dimension-scorers.ts for the rationale). Retired dims have +// coverage=0 so they're neutralized at the coverage end; the weight +// channel stays 1.0 for them in the canonical map. function coverageWeightedMean(dimensions: ResilienceDimension[]): number { - const totalCoverage = dimensions.reduce((sum, d) => sum + d.coverage, 0); - if (!totalCoverage) return 0; - return dimensions.reduce((sum, d) => sum + d.score * d.coverage, 0) / totalCoverage; + let totalWeight = 0; + let weightedSum = 0; + for (const d of dimensions) { + const w = RESILIENCE_DIMENSION_WEIGHTS[d.id as ResilienceDimensionId] ?? 1.0; + const effective = d.coverage * w; + totalWeight += effective; + weightedSum += d.score * effective; + } + if (!totalWeight) return 0; + return weightedSum / totalWeight; } export const PENALTY_ALPHA = 0.50; @@ -351,6 +377,16 @@ export function computeLowConfidence(dimensions: ResilienceDimension[], imputati // retired dims via weightedBlend fall-through, and those coverage=0 // entries SHOULD drag the confidence down — that is precisely the // sparse-data signal lowConfidence exists to surface. + // + // INTENTIONALLY NOT weighted by RESILIENCE_DIMENSION_WEIGHTS. The + // coverage signal answers a different question from the scoring + // aggregation: "how much real data do we have on this country?" + // vs "how much does each dim matter to the overall score?" A dim + // with coverage=0.3 has sparse data regardless of how little it + // contributes to the final number — and the user-facing + // "Low confidence" label is about data availability, not score + // composition. The asymmetry is deliberate and mirrored in + // `computeOverallCoverage` below. const scoring = dimensions.filter( (dimension) => !RESILIENCE_RETIRED_DIMENSIONS.has(dimension.id as ResilienceDimensionId), ); @@ -656,6 +692,14 @@ export function computeOverallCoverage(response: GetResilienceScoreResponse): nu // coverage=0 dims (genuine weightedBlend fall-through) stay in the // average because they reflect real data sparsity for that country. // See `computeLowConfidence` for the matching rationale. + // + // INTENTIONALLY NOT weighted by RESILIENCE_DIMENSION_WEIGHTS — + // same reason as `computeLowConfidence`: this is a data-availability + // signal ("how much real data do we have?"), not a score-composition + // signal ("how much does each dim matter?"). Applying the scoring + // weights would let a dim at weight=0.5 hide half its sparsity + // from the overallCoverage pill, which would confuse users reading + // the coverage percentage as a data-quality indicator. const coverages = response.domains.flatMap((domain) => domain.dimensions .filter((dimension) => !RESILIENCE_RETIRED_DIMENSIONS.has(dimension.id as ResilienceDimensionId)) diff --git a/tests/resilience-handlers.test.mts b/tests/resilience-handlers.test.mts index 6b106f603..9d56d1f40 100644 --- a/tests/resilience-handlers.test.mts +++ b/tests/resilience-handlers.test.mts @@ -28,7 +28,7 @@ describe('resilience handlers', () => { delete process.env.VERCEL_ENV; const { fetchImpl, redis, sortedSets } = createRedisFetch(RESILIENCE_FIXTURES); - sortedSets.set('resilience:history:v5:US', [ + sortedSets.set('resilience:history:v6:US', [ { member: '2026-04-01:20', score: 20260401 }, { member: '2026-04-02:30', score: 20260402 }, ]); @@ -58,16 +58,16 @@ describe('resilience handlers', () => { assert.ok(response.stressFactor >= 0 && response.stressFactor <= 0.5, `stressFactor out of bounds: ${response.stressFactor}`); assert.equal(response.dataVersion, '2024-04-03', 'dataVersion should be the ISO date from seed-meta fetchedAt'); - const cachedScore = redis.get('resilience:score:v10:US'); + const cachedScore = redis.get('resilience:score:v11:US'); assert.ok(cachedScore, 'expected score cache to be written'); assert.equal(JSON.parse(cachedScore || '{}').countryCode, 'US'); - const history = sortedSets.get('resilience:history:v5:US') ?? []; + const history = sortedSets.get('resilience:history:v6:US') ?? []; assert.ok(history.some((entry) => entry.member.startsWith(today + ':')), 'expected today history member to be written'); await getResilienceScore({ request: new Request('https://example.com') } as never, { countryCode: 'US', }); - assert.equal((sortedSets.get('resilience:history:v5:US') ?? []).length, history.length, 'cache hit must not append history'); + assert.equal((sortedSets.get('resilience:history:v6:US') ?? []).length, history.length, 'cache hit must not append history'); }); }); diff --git a/tests/resilience-pillar-aggregation.test.mts b/tests/resilience-pillar-aggregation.test.mts index ce4d84ab1..5842591f5 100644 --- a/tests/resilience-pillar-aggregation.test.mts +++ b/tests/resilience-pillar-aggregation.test.mts @@ -158,7 +158,7 @@ describe('pillar constants', () => { }); it('RESILIENCE_SCORE_CACHE_PREFIX is v10', () => { - assert.equal(RESILIENCE_SCORE_CACHE_PREFIX, 'resilience:score:v10:'); + assert.equal(RESILIENCE_SCORE_CACHE_PREFIX, 'resilience:score:v11:'); }); it('PILLAR_ORDER has 3 entries', () => { diff --git a/tests/resilience-ranking.test.mts b/tests/resilience-ranking.test.mts index 297c4e97d..0bf7db513 100644 --- a/tests/resilience-ranking.test.mts +++ b/tests/resilience-ranking.test.mts @@ -58,14 +58,14 @@ describe('resilience ranking contracts', () => { // so fixtures must carry the `_formula` tag matching the current env // (default flag-off ⇒ 'd6'). Writing the tagged shape here mirrors // what the handler persists via stampRankingCacheTag. - redis.set('resilience:ranking:v10', JSON.stringify({ ...cachedPublic, _formula: 'd6' })); + redis.set('resilience:ranking:v11', JSON.stringify({ ...cachedPublic, _formula: 'd6' })); const response = await getResilienceRanking({ request: new Request('https://example.com') } as never, {}); // The handler strips `_formula` before returning, so response matches // the public shape rather than the on-wire cache shape. assert.deepEqual(response, cachedPublic); - assert.equal(redis.has('resilience:score:v10:YE'), false, 'cache hit must not trigger score warmup'); + assert.equal(redis.has('resilience:score:v11:YE'), false, 'cache hit must not trigger score warmup'); }); it('returns all-greyed-out cached payload without rewarming (items=[], greyedOut non-empty)', async () => { @@ -79,12 +79,12 @@ describe('resilience ranking contracts', () => { { countryCode: 'ER', overallScore: 10, level: 'critical', lowConfidence: true, overallCoverage: 0.12 }, ], }; - redis.set('resilience:ranking:v10', JSON.stringify({ ...cachedPublic, _formula: 'd6' })); + redis.set('resilience:ranking:v11', JSON.stringify({ ...cachedPublic, _formula: 'd6' })); const response = await getResilienceRanking({ request: new Request('https://example.com') } as never, {}); assert.deepEqual(response, cachedPublic); - assert.equal(redis.has('resilience:score:v10:SS'), false, 'all-greyed-out cache hit must not trigger score warmup'); + assert.equal(redis.has('resilience:score:v11:SS'), false, 'all-greyed-out cache hit must not trigger score warmup'); }); it('bulk-read path skips untagged per-country score entries (legacy writes must rebuild on flip)', async () => { @@ -111,13 +111,13 @@ describe('resilience ranking contracts', () => { const domain = [{ id: 'political', score: 80, weight: 0.2, dimensions: [{ id: 'd1', score: 80, coverage: 0.9, observedWeight: 1, imputedWeight: 0 }] }]; // Tagged entry: served as-is. - redis.set('resilience:score:v10:NO', JSON.stringify({ + redis.set('resilience:score:v11:NO', JSON.stringify({ countryCode: 'NO', overallScore: 82, level: 'high', domains: domain, trend: 'stable', change30d: 1.2, lowConfidence: false, imputationShare: 0.05, _formula: 'd6', })); // Untagged entry: must be rejected, ranking warm rebuilds US. - redis.set('resilience:score:v10:US', JSON.stringify({ + redis.set('resilience:score:v11:US', JSON.stringify({ countryCode: 'US', overallScore: 61, level: 'medium', domains: domain, trend: 'rising', change30d: 4.3, lowConfidence: false, imputationShare: 0.1, @@ -130,7 +130,7 @@ describe('resilience ranking contracts', () => { // `_formula: 'd6'`. If the bulk read had ADMITTED the untagged // entry (the pre-fix bug), the warm path for US would not have // run, and the stored value would still be untagged. - const rewrittenRaw = redis.get('resilience:score:v10:US'); + const rewrittenRaw = redis.get('resilience:score:v11:US'); assert.ok(rewrittenRaw, 'US entry must remain in Redis after the ranking run'); const rewritten = JSON.parse(rewrittenRaw!); assert.equal( @@ -157,7 +157,7 @@ describe('resilience ranking contracts', () => { greyedOut: [], _formula: 'pc', // mismatched — current env is flag-off ⇒ current='d6' }; - redis.set('resilience:ranking:v10', JSON.stringify(stale)); + redis.set('resilience:ranking:v11', JSON.stringify(stale)); const response = await getResilienceRanking({ request: new Request('https://example.com') } as never, {}); @@ -169,7 +169,7 @@ describe('resilience ranking contracts', () => { // Recompute path warms missing per-country scores, so YE (in // RESILIENCE_FIXTURES) must get scored during this call. assert.ok( - redis.has('resilience:score:v10:YE'), + redis.has('resilience:score:v11:YE'), 'stale-formula reject must trigger the recompute-and-warm path', ); }); @@ -177,7 +177,7 @@ describe('resilience ranking contracts', () => { it('warms missing scores synchronously and returns complete ranking on first call', async () => { const { redis } = installRedis(RESILIENCE_FIXTURES); const domainWithCoverage = [{ name: 'political', dimensions: [{ name: 'd1', coverage: 0.9 }] }]; - redis.set('resilience:score:v10:NO', JSON.stringify({ + redis.set('resilience:score:v11:NO', JSON.stringify({ countryCode: 'NO', overallScore: 82, level: 'high', @@ -187,7 +187,7 @@ describe('resilience ranking contracts', () => { lowConfidence: false, imputationShare: 0.05, })); - redis.set('resilience:score:v10:US', JSON.stringify({ + redis.set('resilience:score:v11:US', JSON.stringify({ countryCode: 'US', overallScore: 61, level: 'medium', @@ -202,20 +202,20 @@ describe('resilience ranking contracts', () => { const totalItems = response.items.length + (response.greyedOut?.length ?? 0); assert.equal(totalItems, 3, `expected 3 total items across ranked + greyedOut, got ${totalItems}`); - assert.ok(redis.has('resilience:score:v10:YE'), 'missing country should be warmed during first call'); + assert.ok(redis.has('resilience:score:v11:YE'), 'missing country should be warmed during first call'); assert.ok(response.items.every((item) => item.overallScore >= 0), 'ranked items should all have computed scores'); - assert.ok(redis.has('resilience:ranking:v10'), 'fully scored ranking should be cached'); + assert.ok(redis.has('resilience:ranking:v11'), 'fully scored ranking should be cached'); }); it('sets rankStable=true when interval data exists and width <= 8', async () => { const { redis } = installRedis(RESILIENCE_FIXTURES); const domainWithCoverage = [{ id: 'political', score: 80, weight: 0.2, dimensions: [{ id: 'd1', score: 80, coverage: 0.9, observedWeight: 1, imputedWeight: 0 }] }]; - redis.set('resilience:score:v10:NO', JSON.stringify({ + redis.set('resilience:score:v11:NO', JSON.stringify({ countryCode: 'NO', overallScore: 82, level: 'high', domains: domainWithCoverage, trend: 'stable', change30d: 1.2, lowConfidence: false, imputationShare: 0.05, })); - redis.set('resilience:score:v10:US', JSON.stringify({ + redis.set('resilience:score:v11:US', JSON.stringify({ countryCode: 'US', overallScore: 61, level: 'medium', domains: domainWithCoverage, trend: 'rising', change30d: 4.3, lowConfidence: false, imputationShare: 0.1, @@ -242,12 +242,12 @@ describe('resilience ranking contracts', () => { seedYear: 2025, })); const domainWithCoverage = [{ id: 'political', score: 80, weight: 0.2, dimensions: [{ id: 'd1', score: 80, coverage: 0.9, observedWeight: 1, imputedWeight: 0 }] }]; - redis.set('resilience:score:v10:NO', JSON.stringify({ + redis.set('resilience:score:v11:NO', JSON.stringify({ countryCode: 'NO', overallScore: 82, level: 'high', domains: domainWithCoverage, trend: 'stable', change30d: 1.2, lowConfidence: false, imputationShare: 0.05, })); - redis.set('resilience:score:v10:US', JSON.stringify({ + redis.set('resilience:score:v11:US', JSON.stringify({ countryCode: 'US', overallScore: 61, level: 'medium', domains: domainWithCoverage, trend: 'rising', change30d: 4.3, lowConfidence: false, imputationShare: 0.1, @@ -257,7 +257,7 @@ describe('resilience ranking contracts', () => { // 3 of 4 (NO + US pre-cached, YE warmed from fixtures, ZZ can't be warmed) // = 75% which meets the threshold — must cache. - assert.ok(redis.has('resilience:ranking:v10'), 'ranking must be cached at exactly 75% coverage'); + assert.ok(redis.has('resilience:ranking:v11'), 'ranking must be cached at exactly 75% coverage'); assert.ok(redis.has('seed-meta:resilience:ranking'), 'seed-meta must be written alongside the ranking'); }); @@ -288,7 +288,7 @@ describe('resilience ranking contracts', () => { if (url.endsWith('/pipeline') && typeof init?.body === 'string') { const commands = JSON.parse(init.body) as Array>; const allScoreReads = commands.length > 0 && commands.every( - (cmd) => cmd[0] === 'GET' && typeof cmd[1] === 'string' && cmd[1].startsWith('resilience:score:v10:'), + (cmd) => cmd[0] === 'GET' && typeof cmd[1] === 'string' && cmd[1].startsWith('resilience:score:v11:'), ); if (allScoreReads) { // Simulate visibility lag: pretend no scores are cached yet. @@ -304,7 +304,7 @@ describe('resilience ranking contracts', () => { await getResilienceRanking({ request: new Request('https://example.com') } as never, {}); - assert.ok(redis.has('resilience:ranking:v10'), 'ranking must be published despite pipeline-GET race'); + assert.ok(redis.has('resilience:ranking:v11'), 'ranking must be published despite pipeline-GET race'); assert.ok(redis.has('seed-meta:resilience:ranking'), 'seed-meta must be written despite pipeline-GET race'); }); @@ -312,8 +312,8 @@ describe('resilience ranking contracts', () => { // Reviewer regression: passing `raw=true` to runRedisPipeline bypasses the // env-based key prefix (preview: / dev:) that isolates preview deploys // from production. The symptom is asymmetric: preview reads hit - // `preview::resilience:score:v10:XX` while preview writes landed at - // raw `resilience:score:v10:XX`, simultaneously (a) missing the preview + // `preview::resilience:score:v11:XX` while preview writes landed at + // raw `resilience:score:v11:XX`, simultaneously (a) missing the preview // cache forever and (b) poisoning production's shared cache. Simulate a // preview deploy and assert the pipeline SET keys carry the prefix. // Shared afterEach snapshots/restores VERCEL_ENV + VERCEL_GIT_COMMIT_SHA @@ -345,7 +345,7 @@ describe('resilience ranking contracts', () => { const scoreSetKeys = pipelineBodies .flat() - .filter((cmd) => cmd[0] === 'SET' && typeof cmd[1] === 'string' && (cmd[1] as string).includes('resilience:score:v10:')) + .filter((cmd) => cmd[0] === 'SET' && typeof cmd[1] === 'string' && (cmd[1] as string).includes('resilience:score:v11:')) .map((cmd) => cmd[1] as string); assert.ok(scoreSetKeys.length >= 2, `expected at least 2 score SETs, got ${scoreSetKeys.length}`); for (const key of scoreSetKeys) { @@ -380,7 +380,7 @@ describe('resilience ranking contracts', () => { // rejected by the formula gate and the refresh path would not // get tested as intended. const stale = { items: [{ countryCode: 'ZZ', overallScore: 1, level: 'low', lowConfidence: true, overallCoverage: 0.5 }], greyedOut: [], _formula: 'd6' }; - redis.set('resilience:ranking:v10', JSON.stringify(stale)); + redis.set('resilience:ranking:v11', JSON.stringify(stale)); // No X-WorldMonitor-Key → refresh must be ignored, stale cache returned. const unauth = new Request('https://example.com/api/resilience/v1/get-resilience-ranking?refresh=1'); @@ -434,7 +434,7 @@ describe('resilience ranking contracts', () => { // rejected by the formula gate and the refresh path would not // get tested as intended. const stale = { items: [{ countryCode: 'ZZ', overallScore: 1, level: 'low', lowConfidence: true, overallCoverage: 0.5 }], greyedOut: [], _formula: 'd6' }; - redis.set('resilience:ranking:v10', JSON.stringify(stale)); + redis.set('resilience:ranking:v11', JSON.stringify(stale)); const request = new Request('https://example.com/api/resilience/v1/get-resilience-ranking?refresh=1', { headers: { 'X-WorldMonitor-Key': 'seed-secret' }, @@ -469,7 +469,7 @@ describe('resilience ranking contracts', () => { if (url.endsWith('/pipeline') && typeof init?.body === 'string') { const commands = JSON.parse(init.body) as Array>; const isAllScoreSets = commands.length > 0 && commands.every( - (cmd) => cmd[0] === 'SET' && typeof cmd[1] === 'string' && (cmd[1] as string).includes('resilience:score:v10:'), + (cmd) => cmd[0] === 'SET' && typeof cmd[1] === 'string' && (cmd[1] as string).includes('resilience:score:v11:'), ); if (isAllScoreSets) setPipelineSizes.push(commands.length); } @@ -501,7 +501,7 @@ describe('resilience ranking contracts', () => { seedYear: 2026, })); - // Intercept any pipeline SET to resilience:score:v10:* and reply with + // Intercept any pipeline SET to resilience:score:v11:* and reply with // non-OK results (persisted but authoritative signal says no). /set and // other paths pass through normally so history/interval writes succeed. const blockedScoreWrites = (async (input: RequestInfo | URL, init?: RequestInit) => { @@ -509,7 +509,7 @@ describe('resilience ranking contracts', () => { if (url.endsWith('/pipeline') && typeof init?.body === 'string') { const commands = JSON.parse(init.body) as Array>; const allScoreSets = commands.length > 0 && commands.every( - (cmd) => cmd[0] === 'SET' && typeof cmd[1] === 'string' && cmd[1].startsWith('resilience:score:v10:'), + (cmd) => cmd[0] === 'SET' && typeof cmd[1] === 'string' && cmd[1].startsWith('resilience:score:v11:'), ); if (allScoreSets) { return new Response( @@ -524,7 +524,7 @@ describe('resilience ranking contracts', () => { await getResilienceRanking({ request: new Request('https://example.com') } as never, {}); - assert.ok(!redis.has('resilience:ranking:v10'), 'ranking must NOT be published when score writes failed'); + assert.ok(!redis.has('resilience:ranking:v11'), 'ranking must NOT be published when score writes failed'); assert.ok(!redis.has('seed-meta:resilience:ranking'), 'seed-meta must NOT be written when score writes failed'); }); diff --git a/tests/resilience-recovery-ordering.test.mts b/tests/resilience-recovery-ordering.test.mts new file mode 100644 index 000000000..80b9bdcdf --- /dev/null +++ b/tests/resilience-recovery-ordering.test.mts @@ -0,0 +1,82 @@ +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; + +import { + RESILIENCE_DIMENSION_DOMAINS, + RESILIENCE_DIMENSION_ORDER, + RESILIENCE_DIMENSION_WEIGHTS, + RESILIENCE_DOMAIN_ORDER, + getResilienceDomainWeight, + scoreAllDimensions, +} from '../server/worldmonitor/resilience/v1/_dimension-scorers.ts'; +import { installRedis } from './helpers/fake-upstash-redis.mts'; +import { RESILIENCE_FIXTURES } from './helpers/resilience-fixtures.mts'; + +// Sensitivity proxy: the recovery-domain weight rebalance (PR 2 §3.4) +// must not disturb the NO > US > YE country ordering on the committed +// fixture. Plan §6 sets a ≥0.85 Spearman rank-correlation gate against +// the live post-PR-0 ranking; that check runs post-merge against real +// seed data (snapshot committed as docs/snapshots/resilience-ranking- +// live-post-pr2-.json). This file is the pre-merge proxy — with +// only 3 fixture countries, strict ordering preservation is the +// strongest signal we can compute without live data. + +function overallScore(scoreMap: Record): number { + function round(v: number, d = 2) { return Number(v.toFixed(d)); } + let overall = 0; + for (const domainId of RESILIENCE_DOMAIN_ORDER) { + const dims = RESILIENCE_DIMENSION_ORDER + .filter((id) => RESILIENCE_DIMENSION_DOMAINS[id] === domainId) + .map((id) => ({ id, score: round(scoreMap[id].score), coverage: round(scoreMap[id].coverage) })); + let totalW = 0, sum = 0; + for (const d of dims) { + const w = (RESILIENCE_DIMENSION_WEIGHTS as Record)[d.id] ?? 1.0; + const eff = d.coverage * w; + totalW += eff; + sum += d.score * eff; + } + const cwMean = totalW ? sum / totalW : 0; + overall += round(cwMean) * getResilienceDomainWeight(domainId); + } + return round(overall); +} + +describe('resilience fixture country ordering (PR 2 §3.4 sensitivity proxy)', () => { + it('NO > US > YE on overall score after the weight rebalance', async () => { + installRedis(RESILIENCE_FIXTURES); + const [no, us, ye] = await Promise.all([ + scoreAllDimensions('NO'), + scoreAllDimensions('US'), + scoreAllDimensions('YE'), + ]); + const noScore = overallScore(no); + const usScore = overallScore(us); + const yeScore = overallScore(ye); + assert.ok(noScore > usScore, + `fixture ordering broken: NO overall=${noScore} must exceed US overall=${usScore}. ` + + `The PR 2 §3.4 weight rebalance is expected to preserve country ranks — verify against the live snapshot.`); + assert.ok(usScore > yeScore, + `fixture ordering broken: US overall=${usScore} must exceed YE overall=${yeScore}.`); + }); + + it('NO > US > YE on recovery-domain score after the weight rebalance', async () => { + installRedis(RESILIENCE_FIXTURES); + const recoveryOf = async (iso: string) => { + const scoreMap = await scoreAllDimensions(iso); + const dims = RESILIENCE_DIMENSION_ORDER + .filter((id) => RESILIENCE_DIMENSION_DOMAINS[id] === 'recovery') + .map((id) => ({ id, score: scoreMap[id].score, coverage: scoreMap[id].coverage })); + let totalW = 0, sum = 0; + for (const d of dims) { + const w = (RESILIENCE_DIMENSION_WEIGHTS as Record)[d.id] ?? 1.0; + const eff = d.coverage * w; + totalW += eff; + sum += d.score * eff; + } + return totalW ? sum / totalW : 0; + }; + const [noR, usR, yeR] = await Promise.all([recoveryOf('NO'), recoveryOf('US'), recoveryOf('YE')]); + assert.ok(noR > usR, `recovery rebalance regressed NO > US (NO=${noR.toFixed(2)}, US=${usR.toFixed(2)})`); + assert.ok(usR > yeR, `recovery rebalance regressed US > YE (US=${usR.toFixed(2)}, YE=${yeR.toFixed(2)})`); + }); +}); diff --git a/tests/resilience-recovery-weight-rebalance.test.mts b/tests/resilience-recovery-weight-rebalance.test.mts new file mode 100644 index 000000000..e864edd0f --- /dev/null +++ b/tests/resilience-recovery-weight-rebalance.test.mts @@ -0,0 +1,111 @@ +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; + +import { + RESILIENCE_DIMENSION_DOMAINS, + RESILIENCE_DIMENSION_ORDER, + RESILIENCE_DIMENSION_WEIGHTS, + RESILIENCE_RETIRED_DIMENSIONS, + type ResilienceDimensionId, +} from '../server/worldmonitor/resilience/v1/_dimension-scorers.ts'; + +// PR 2 §3.4 recovery-domain weight rebalance. The plan pins the two +// new dims (liquidReserveAdequacy, sovereignFiscalBuffer) at ~0.10 +// share of the recovery-domain score, with the other four active +// recovery dims absorbing the residual. This test locks the share +// arithmetic against regression — any future weight change must +// explicitly update this test with the new targets so the operator +// rationale stays auditable. +// +// Math (6 active recovery dims at coverage=1.0, weights from +// RESILIENCE_DIMENSION_WEIGHTS): +// fiscalSpace × 1.0 +// externalDebtCoverage × 1.0 +// importConcentration × 1.0 +// stateContinuity × 1.0 +// liquidReserveAdequacy × 0.5 +// sovereignFiscalBuffer × 0.5 +// Total weighted coverage = 4.0 + 2×0.5 = 5.0 +// Each new-dim share = 0.5 / 5.0 = 0.10 +// Each other-dim share = 1.0 / 5.0 = 0.20 +describe('recovery-domain weight rebalance (PR 2 §3.4)', () => { + const recoveryDims = RESILIENCE_DIMENSION_ORDER.filter( + (id) => RESILIENCE_DIMENSION_DOMAINS[id] === 'recovery', + ); + const activeRecoveryDims = recoveryDims.filter( + (id) => !RESILIENCE_RETIRED_DIMENSIONS.has(id), + ); + + it('exposes a per-dimension weight entry for every dim in the order', () => { + for (const id of RESILIENCE_DIMENSION_ORDER) { + assert.ok( + RESILIENCE_DIMENSION_WEIGHTS[id] != null, + `RESILIENCE_DIMENSION_WEIGHTS missing entry for ${id}. Every dim must have an explicit weight — default 1.0 is fine but must be spelled out so the rebalance decisions stay auditable.`, + ); + } + }); + + it('pins liquidReserveAdequacy + sovereignFiscalBuffer at weight 0.5', () => { + assert.equal( + RESILIENCE_DIMENSION_WEIGHTS.liquidReserveAdequacy, + 0.5, + 'plan §3.4 targets ~10% recovery share; weight 0.5 with the other 4 dims at 1.0 gives 0.5/5.0 = 0.10', + ); + assert.equal( + RESILIENCE_DIMENSION_WEIGHTS.sovereignFiscalBuffer, + 0.5, + 'plan §3.4 targets ~10% recovery share; weight 0.5 with the other 4 dims at 1.0 gives 0.5/5.0 = 0.10', + ); + }); + + it('the four active core recovery dims carry weight 1.0', () => { + const coreRecovery: ResilienceDimensionId[] = [ + 'fiscalSpace', + 'externalDebtCoverage', + 'importConcentration', + 'stateContinuity', + ]; + for (const id of coreRecovery) { + assert.equal( + RESILIENCE_DIMENSION_WEIGHTS[id], + 1.0, + `${id} must carry weight 1.0 per plan §3.4 "other recovery dimensions absorb residual"`, + ); + } + }); + + it('recovery-domain share math: each new dim = 10% at full coverage', () => { + // Reproduce the coverage-weighted-mean share denominator using + // coverage=1.0 for all active dims. If this ever diverges from + // 0.10 the plan's target is no longer met. + const weightSum = activeRecoveryDims.reduce( + (s, id) => s + (RESILIENCE_DIMENSION_WEIGHTS[id] ?? 1), + 0, + ); + const liquidShare = (RESILIENCE_DIMENSION_WEIGHTS.liquidReserveAdequacy) / weightSum; + const swfShare = (RESILIENCE_DIMENSION_WEIGHTS.sovereignFiscalBuffer) / weightSum; + // ±0.005 = tolerant of one future addition drifting the share + // slightly; the plan says "~0.10" not exactly 0.10. + assert.ok( + Math.abs(liquidShare - 0.10) < 0.005, + `liquidReserveAdequacy share at full coverage = ${liquidShare.toFixed(4)}, expected ~0.10`, + ); + assert.ok( + Math.abs(swfShare - 0.10) < 0.005, + `sovereignFiscalBuffer share at full coverage = ${swfShare.toFixed(4)}, expected ~0.10`, + ); + }); + + it('retired recovery dims (reserveAdequacy, fuelStockDays) stay in the weight map', () => { + // Retired dims have coverage=0 and so are neutralized at the + // coverage channel regardless of weight. Keeping them in the + // weight map at 1.0 rather than stripping them is the defensive + // choice: if a future scorer bug accidentally returns coverage>0 + // for a retired dim, a missing weight entry here would make the + // aggregation silently fall through to the `?? 1.0` default, + // bypassing the retirement signal. Having explicit weights + // enforces a single source of truth. + assert.ok(RESILIENCE_DIMENSION_WEIGHTS.reserveAdequacy != null); + assert.ok(RESILIENCE_DIMENSION_WEIGHTS.fuelStockDays != null); + }); +}); diff --git a/tests/resilience-scorers.test.mts b/tests/resilience-scorers.test.mts index f8909e7ba..2c0454c79 100644 --- a/tests/resilience-scorers.test.mts +++ b/tests/resilience-scorers.test.mts @@ -6,6 +6,7 @@ import { RESILIENCE_DIMENSION_ORDER, RESILIENCE_DIMENSION_SCORERS, RESILIENCE_DIMENSION_TYPES, + RESILIENCE_DIMENSION_WEIGHTS, RESILIENCE_DOMAIN_ORDER, getResilienceDomainWeight, scoreAllDimensions, @@ -132,10 +133,21 @@ describe('resilience scorer contracts', () => { }); function round(v: number, d = 2) { return Number(v.toFixed(d)); } - function coverageWeightedMean(dims: { score: number; coverage: number }[]) { - const totalCov = dims.reduce((s, d) => s + d.coverage, 0); - if (!totalCov) return 0; - return dims.reduce((s, d) => s + d.score * d.coverage, 0) / totalCov; + // Mirror of the production coverage-weighted mean (see + // server/worldmonitor/resilience/v1/_shared.ts). Must apply the + // per-dim weight from RESILIENCE_DIMENSION_WEIGHTS so the expected + // values here track the production aggregation after the PR 2 §3.4 + // recovery-domain weight rebalance. + function coverageWeightedMean(dims: { id: string; score: number; coverage: number }[]) { + let totalW = 0, sum = 0; + for (const d of dims) { + const w = (RESILIENCE_DIMENSION_WEIGHTS as Record)[d.id] ?? 1.0; + const effective = d.coverage * w; + totalW += effective; + sum += d.score * effective; + } + if (!totalW) return 0; + return sum / totalW; } const dimensions = RESILIENCE_DIMENSION_ORDER.map((id) => ({ @@ -161,11 +173,13 @@ describe('resilience scorer contracts', () => { // PR 2 §3.4: 60.12 → 60.35 — split adds liquidReserveAdequacy // (US ≈ 1 month WB reserves → score 18 at cov=1.0) and // sovereignFiscalBuffer (IMPUTE at 50 / cov=0.3) into the baseline - // coverage-weighted mean. Net effect is a small upward shift - // because the retired reserveAdequacy's 50-at-coverage-weighted-1 - // is replaced by the same total weight split across the two new - // dims with different coverage profiles. - assert.equal(baselineScore, 60.35); + // coverage-weighted mean. + // PR 2 §3.4 weight rebalance: 60.35 → 62.17. The two new recovery + // dims now carry weight=0.5 (RESILIENCE_DIMENSION_WEIGHTS), so + // the low-scoring liquidReserveAdequacy (18) and partial-coverage + // sovereignFiscalBuffer (50 × 0.3) contribute ~half as much to + // the US baseline aggregate as under the equal-weight default. + assert.equal(baselineScore, 62.17); // PR 3 §3.5: 65.84 → 67.85 (fuelStockDays retirement) → 67.21 // (currencyExternal rebuilt on IMF inflation + WB reserves, coverage // shifts and US stress score moves). stressFactor updates in lockstep: @@ -177,21 +191,31 @@ describe('resilience scorer contracts', () => { RESILIENCE_DOMAIN_ORDER.map((domainId) => { const dimScores = RESILIENCE_DIMENSION_ORDER .filter((id) => RESILIENCE_DIMENSION_DOMAINS[id] === domainId) - .map((id) => ({ score: round(scoreMap[id].score), coverage: round(scoreMap[id].coverage) })); - const totalCov = dimScores.reduce((sum, d) => sum + d.coverage, 0); - const cwMean = totalCov ? dimScores.reduce((sum, d) => sum + d.score * d.coverage, 0) / totalCov : 0; + .map((id) => ({ id, score: round(scoreMap[id].score), coverage: round(scoreMap[id].coverage) })); + // Mirror production: apply per-dim weight to each dim's + // effective coverage before computing the mean. + let totalW = 0, sum = 0; + for (const d of dimScores) { + const w = (RESILIENCE_DIMENSION_WEIGHTS as Record)[d.id] ?? 1.0; + const eff = d.coverage * w; + totalW += eff; + sum += d.score * eff; + } + const cwMean = totalW ? sum / totalW : 0; return round(cwMean) * getResilienceDomainWeight(domainId); }).reduce((sum, v) => sum + v, 0), ); // PR 3 §3.5: 65.57 → 65.82 (fuelStockDays retirement) → 65.52 // (currencyExternal rebuild) → 63.27 (externalDebtCoverage goalpost // tightened 0..5 → 0..2; US recovery-domain contribution drops). - // PR 2 §3.4: 63.27 → 63.6 after the reserveAdequacy split. The new - // liquidReserveAdequacy at score=18 / coverage=1.0 + sovereign- - // FiscalBuffer at score=50 / coverage=0.3 shifts the recovery- - // domain coverage-weighted mean upward (retired reserveAdequacy - // dropped out with coverage=0), lifting the overall by ~0.33. - assert.equal(overallScore, 63.6); + // PR 2 §3.4: 63.27 → 63.6 after the reserveAdequacy split. + // PR 2 §3.4 weight rebalance: 63.6 → 64.39. The two new recovery + // dims (liquidReserveAdequacy @ score=18, sovereignFiscalBuffer @ + // score=50/cov=0.3) now carry weight=0.5 so they're each ~10% of + // the recovery domain instead of the equal-share ~16.7%. The + // under-weighted score-18 dim matters less, lifting US's recovery + // contribution by ~3 points and the overall by ~0.79. + assert.equal(overallScore, 64.39); }); it('baselineScore is computed from baseline + mixed dimensions only', async () => { @@ -236,10 +260,21 @@ describe('resilience scorer contracts', () => { installRedis(RESILIENCE_FIXTURES); const scoreMap = await scoreAllDimensions('US'); function round(v: number, d = 2) { return Number(v.toFixed(d)); } - function coverageWeightedMean(dims: { score: number; coverage: number }[]) { - const totalCov = dims.reduce((s, d) => s + d.coverage, 0); - if (!totalCov) return 0; - return dims.reduce((s, d) => s + d.score * d.coverage, 0) / totalCov; + // Mirror of the production coverage-weighted mean (see + // server/worldmonitor/resilience/v1/_shared.ts). Must apply the + // per-dim weight from RESILIENCE_DIMENSION_WEIGHTS so the expected + // values here track the production aggregation after the PR 2 §3.4 + // recovery-domain weight rebalance. + function coverageWeightedMean(dims: { id: string; score: number; coverage: number }[]) { + let totalW = 0, sum = 0; + for (const d of dims) { + const w = (RESILIENCE_DIMENSION_WEIGHTS as Record)[d.id] ?? 1.0; + const effective = d.coverage * w; + totalW += effective; + sum += d.score * effective; + } + if (!totalW) return 0; + return sum / totalW; } const dimensions = RESILIENCE_DIMENSION_ORDER.map((id) => ({ @@ -264,9 +299,10 @@ describe('resilience scorer contracts', () => { assert.ok(expected > 0, 'overall should be positive'); // PR 3 §3.5: 65.82 → 65.52 (currencyExternal rebuild) → 63.27 after // externalDebtCoverage goalpost tightened from (0..5) to (0..2). - // PR 2 §3.4: 63.27 → 63.6 after reserveAdequacy retirement + the - // liquidReserveAdequacy / sovereignFiscalBuffer split. - assert.equal(expected, 63.6, 'overallScore should match sum(domainScore * domainWeight); 63.27 → 63.6 after PR 2 §3.4 reserveAdequacy split'); + // PR 2 §3.4: 63.27 → 63.6 after reserveAdequacy retirement + split. + // PR 2 §3.4 weight rebalance: 63.6 → 64.39 after dialing the two + // new recovery dims down to weight=0.5 (~10% recovery share each). + assert.equal(expected, 64.39, 'overallScore should match sum(domainScore * domainWeight); 63.6 → 64.39 after PR 2 §3.4 recovery-domain weight rebalance'); }); it('stressFactor is still computed (informational) and clamped to [0, 0.5]', () => { diff --git a/tests/resilience-scores-seed.test.mjs b/tests/resilience-scores-seed.test.mjs index d643e0f38..a2868a990 100644 --- a/tests/resilience-scores-seed.test.mjs +++ b/tests/resilience-scores-seed.test.mjs @@ -10,12 +10,12 @@ import { } from '../scripts/seed-resilience-scores.mjs'; describe('exported constants', () => { - it('RESILIENCE_RANKING_CACHE_KEY matches server-side key (v10)', () => { - assert.equal(RESILIENCE_RANKING_CACHE_KEY, 'resilience:ranking:v10'); + it('RESILIENCE_RANKING_CACHE_KEY matches server-side key (v11)', () => { + assert.equal(RESILIENCE_RANKING_CACHE_KEY, 'resilience:ranking:v11'); }); - it('RESILIENCE_SCORE_CACHE_PREFIX matches server-side prefix (v10)', () => { - assert.equal(RESILIENCE_SCORE_CACHE_PREFIX, 'resilience:score:v10:'); + it('RESILIENCE_SCORE_CACHE_PREFIX matches server-side prefix (v11)', () => { + assert.equal(RESILIENCE_SCORE_CACHE_PREFIX, 'resilience:score:v11:'); }); it('RESILIENCE_RANKING_CACHE_TTL_SECONDS is 12 hours (2x cron interval)', () => {