feat(resilience): language/source-density normalization for informationCognitive (T2.9) (#2992)

* feat(resilience): language/source-density normalization for informationCognitive (Phase 2 T2.9) Add English-language media coverage factor that normalizes social velocity and news threat scores before they enter the informationCognitive dimension scorer. Countries with low English-media coverage (e.g. CN at 0.4, minimal-tier at 0.2) get their raw signal amplified to compensate for under-representation in English-language sources. RSF press freedom index passes through unchanged (already globally normalized in its methodology). With normalization in place, promote all three informationCognitive indicators from Enrichment back to Core tier, fulfilling the T2.2a demotion contract. Tier counts: 7 primary, 37 secondary, 46 limited, default minimal. * fix(resilience): ISO2 case-normalization + test improvements (#2992 review) - Add .toUpperCase() to getLanguageCoverageFactor() preventing lowercase ISO2 codes from falling through to 'minimal' tier (5x score error) - Replace tautological RSF test with real scorer exercise proving RSF component is equal across language tiers - Add velocity cap test matching real scorer cap of 1000 (was 100)
2026-04-25 17:14:57 +02:00 · 2026-04-12 10:24:19 +04:00
parent 8eca33790e
commit d84acd2921
5 changed files with 196 additions and 16 deletions
--- a/server/worldmonitor/resilience/v1/_dimension-scorers.ts
+++ b/server/worldmonitor/resilience/v1/_dimension-scorers.ts
@@ -3,6 +3,7 @@ import iso2ToIso3Json from '../../../../shared/iso2-to-iso3.json';
 import { normalizeCountryToken } from '../../../_shared/country-token';
 import { getCachedJson } from '../../../_shared/redis';
 import { classifyDimensionFreshness, readFreshnessMap } from './_dimension-freshness';
+import { getLanguageCoverageFactor } from './_language-coverage';
 import { failedDimensionsFromDatasets, readFailedDatasets } from './_source-failure';

 export type ResilienceDimensionId =
@@ -1118,10 +1119,14 @@ export async function scoreInformationCognitive(
  const velocity = summarizeSocialVelocity(socialVelocityRaw, countryCode);
  const threatScore = getThreatSummaryScore(threatSummaryRaw, countryCode);

+  const langFactor = getLanguageCoverageFactor(countryCode);
+  const adjustedVelocity = velocity > 0 ? Math.min(velocity / Math.max(langFactor, 0.1), 1000) : 0;
+  const adjustedThreat = threatScore != null ? Math.min(threatScore / Math.max(langFactor, 0.1), 100) : null;
+
  return weightedBlend([
    { score: rsfScore == null ? null : normalizeLowerBetter(rsfScore, 0, 100), weight: 0.55 },
-    { score: velocity > 0 ? normalizeLowerBetter(Math.log10(velocity + 1), 0, 3) : null, weight: 0.15 },
-    { score: threatScore == null ? null : normalizeLowerBetter(threatScore, 0, 20), weight: 0.3 },
+    { score: adjustedVelocity > 0 ? normalizeLowerBetter(Math.log10(adjustedVelocity + 1), 0, 3) : null, weight: 0.15 },
+    { score: adjustedThreat == null ? null : normalizeLowerBetter(adjustedThreat, 0, 20), weight: 0.3 },
  ]);
 }

--- a/server/worldmonitor/resilience/v1/_indicator-registry.ts
+++ b/server/worldmonitor/resilience/v1/_indicator-registry.ts
@@ -597,12 +597,10 @@ export const INDICATOR_REGISTRY: IndicatorSpec[] = [
  },

  // ── informationCognitive (3 sub-metrics) ──────────────────────────────────
-  // The whole informationCognitive dimension is demoted to Enrichment until
-  // Phase 2 T2.9 ships the language / source-density normalization. See the
-  // parent plan, "Signal tiering" section: "Existing 13 dimensions default
-  // to Core, with one exception: informationCognitive is demoted to
-  // Enrichment until the language / source-density normalization lands in
-  // T2.9, at which point it re-enters Core."
+  // Promoted back to Core in T2.9 after language / source-density
+  // normalization landed (getLanguageCoverageFactor in _language-coverage.ts).
+  // Social velocity and news threat scores are now adjusted by the
+  // English-language coverage factor before normalization.
  {
    id: 'rsfPressFreedom',
    dimension: 'informationCognitive',
@@ -613,35 +611,35 @@ export const INDICATOR_REGISTRY: IndicatorSpec[] = [
    sourceKey: 'resilience:static:{ISO2}',
    scope: 'global',
    cadence: 'annual',
-    tier: 'enrichment',
+    tier: 'core',
    coverage: 180,
    license: 'open-attribution',
  },
  {
    id: 'socialVelocity',
    dimension: 'informationCognitive',
-    description: 'Reddit social velocity score (log10(velocity+1)); viral narrative stress',
+    description: 'Reddit social velocity score (log10(velocity+1)); language-normalized viral narrative stress',
    direction: 'lowerBetter',
    goalposts: { worst: 3, best: 0 },
    weight: 0.15,
    sourceKey: 'intelligence:social:reddit:v1',
    scope: 'global',
    cadence: 'realtime',
-    tier: 'enrichment',
+    tier: 'core',
    coverage: 195,
    license: 'open-attribution',
  },
  {
    id: 'newsThreatScore',
    dimension: 'informationCognitive',
-    description: 'AI news threat summary (critical=4x, high=2x, medium=1x, low=0.5x)',
+    description: 'AI news threat summary (critical=4x, high=2x, medium=1x, low=0.5x); language-normalized',
    direction: 'lowerBetter',
    goalposts: { worst: 20, best: 0 },
    weight: 0.3,
    sourceKey: 'news:threat:summary:v1',
    scope: 'global',
    cadence: 'daily',
-    tier: 'enrichment',
+    tier: 'core',
    coverage: 195,
    license: 'open-attribution',
  },
--- a/server/worldmonitor/resilience/v1/_language-coverage.ts
+++ b/server/worldmonitor/resilience/v1/_language-coverage.ts
@@ -0,0 +1,52 @@
+export type LanguageCoverageTier = 'primary' | 'secondary' | 'limited' | 'minimal';
+
+export const LANGUAGE_TIERS: Record<LanguageCoverageTier, number> = {
+  primary: 1.0,
+  secondary: 0.7,
+  limited: 0.4,
+  minimal: 0.2,
+};
+
+export const COUNTRY_LANGUAGE_TIER: Record<string, LanguageCoverageTier> = {
+  // primary: English-dominant media landscape
+  US: 'primary', GB: 'primary', AU: 'primary', NZ: 'primary',
+  CA: 'primary', IE: 'primary', SG: 'primary',
+
+  // secondary: English widely available but not dominant
+  IN: 'secondary', PH: 'secondary', NG: 'secondary', KE: 'secondary',
+  ZA: 'secondary', GH: 'secondary', MY: 'secondary', PK: 'secondary',
+  LK: 'secondary', BD: 'secondary', TZ: 'secondary', UG: 'secondary',
+  RW: 'secondary', ZW: 'secondary', ZM: 'secondary', BW: 'secondary',
+  NA: 'secondary', MW: 'secondary', SL: 'secondary', LR: 'secondary',
+  GM: 'secondary', JM: 'secondary', TT: 'secondary', BB: 'secondary',
+  GY: 'secondary', FJ: 'secondary', PG: 'secondary', WS: 'secondary',
+  MT: 'secondary', CY: 'secondary', IL: 'secondary', AE: 'secondary',
+  QA: 'secondary', BH: 'secondary', KW: 'secondary', JO: 'secondary',
+  HK: 'secondary', NP: 'secondary', MM: 'secondary', KH: 'secondary',
+  ET: 'secondary', CM: 'secondary', MZ: 'secondary', LS: 'secondary',
+  SZ: 'secondary',
+
+  // limited: English available but minority of media
+  CN: 'limited', JP: 'limited', RU: 'limited', BR: 'limited',
+  FR: 'limited', DE: 'limited', ES: 'limited', IT: 'limited',
+  KR: 'limited', TR: 'limited', MX: 'limited', AR: 'limited',
+  CO: 'limited', CL: 'limited', PE: 'limited', VE: 'limited',
+  EC: 'limited', PL: 'limited', UA: 'limited', RO: 'limited',
+  CZ: 'limited', HU: 'limited', GR: 'limited', PT: 'limited',
+  SE: 'limited', NO: 'limited', DK: 'limited', FI: 'limited',
+  NL: 'limited', BE: 'limited', AT: 'limited', CH: 'limited',
+  TH: 'limited', VN: 'limited', ID: 'limited', TW: 'limited',
+  EG: 'limited', SA: 'limited', IQ: 'limited', IR: 'limited',
+  MA: 'limited', TN: 'limited', DZ: 'limited', LB: 'limited',
+  RS: 'limited', BG: 'limited', HR: 'limited', SK: 'limited',
+  SI: 'limited', LT: 'limited', LV: 'limited', EE: 'limited',
+  BY: 'limited', GE: 'limited', AM: 'limited', AZ: 'limited',
+  KZ: 'limited', UZ: 'limited',
+
+  // Unlisted countries default to 'minimal' (0.2)
+};
+
+export function getLanguageCoverageFactor(iso2: string): number {
+  const tier = COUNTRY_LANGUAGE_TIER[iso2.toUpperCase()] ?? 'minimal';
+  return LANGUAGE_TIERS[tier];
+}
--- a/tests/resilience-indicator-tiering.test.mts
+++ b/tests/resilience-indicator-tiering.test.mts
@@ -62,14 +62,14 @@ describe('signal tiering registry (Phase 2 T2.2a)', () => {
    );
  });

-  it('informationCognitive dimension indicators are Enrichment (plan mandate, demoted until T2.9)', () => {
+  it('informationCognitive dimension indicators are Core (promoted in T2.9 after language normalization)', () => {
    const infoCogIndicators = INDICATOR_REGISTRY.filter((e) => e.dimension === 'informationCognitive');
    assert.ok(infoCogIndicators.length > 0, 'expected informationCognitive indicators in registry');
    for (const e of infoCogIndicators) {
      assert.equal(
        e.tier,
-        'enrichment',
-        `${e.id}: informationCognitive indicators must be 'enrichment' until PR 9 / T2.9 lands the language normalization. See parent plan, "Signal tiering" section.`,
+        'core',
+        `${e.id}: informationCognitive indicators must be 'core' now that T2.9 language normalization has landed.`,
      );
    }
  });
--- a/tests/resilience-language-normalization.test.mts
+++ b/tests/resilience-language-normalization.test.mts
@@ -0,0 +1,125 @@
+import assert from 'node:assert/strict';
+import { describe, it } from 'node:test';
+
+import {
+  COUNTRY_LANGUAGE_TIER,
+  LANGUAGE_TIERS,
+  getLanguageCoverageFactor,
+  type LanguageCoverageTier,
+} from '../server/worldmonitor/resilience/v1/_language-coverage.ts';
+import {
+  scoreInformationCognitive,
+  type ResilienceSeedReader,
+} from '../server/worldmonitor/resilience/v1/_dimension-scorers.ts';
+
+describe('language coverage normalization (Phase 2 T2.9)', () => {
+  it('primary tier countries return 1.0', () => {
+    assert.equal(getLanguageCoverageFactor('US'), 1.0);
+    assert.equal(getLanguageCoverageFactor('GB'), 1.0);
+    assert.equal(getLanguageCoverageFactor('AU'), 1.0);
+  });
+
+  it('secondary tier countries return 0.7', () => {
+    assert.equal(getLanguageCoverageFactor('IN'), 0.7);
+    assert.equal(getLanguageCoverageFactor('PH'), 0.7);
+    assert.equal(getLanguageCoverageFactor('KE'), 0.7);
+  });
+
+  it('limited tier countries return 0.4', () => {
+    assert.equal(getLanguageCoverageFactor('CN'), 0.4);
+    assert.equal(getLanguageCoverageFactor('JP'), 0.4);
+    assert.equal(getLanguageCoverageFactor('RU'), 0.4);
+    assert.equal(getLanguageCoverageFactor('BR'), 0.4);
+  });
+
+  it('lowercase ISO2 codes are case-normalized', () => {
+    assert.equal(getLanguageCoverageFactor('us'), 1.0);
+    assert.equal(getLanguageCoverageFactor('gb'), 1.0);
+    assert.equal(getLanguageCoverageFactor('cn'), 0.4);
+    assert.equal(getLanguageCoverageFactor('in'), 0.7);
+  });
+
+  it('unknown country codes default to minimal (0.2)', () => {
+    assert.equal(getLanguageCoverageFactor('XX'), 0.2);
+    assert.equal(getLanguageCoverageFactor('ZZ'), 0.2);
+  });
+
+  it('tier map values match LANGUAGE_TIERS constants', () => {
+    for (const [, tier] of Object.entries(COUNTRY_LANGUAGE_TIER)) {
+      assert.ok(
+        tier in LANGUAGE_TIERS,
+        `tier '${tier}' not found in LANGUAGE_TIERS`,
+      );
+    }
+  });
+
+  it('all four tiers are represented in the country map', () => {
+    const usedTiers = new Set(Object.values(COUNTRY_LANGUAGE_TIER));
+    for (const tier of ['primary', 'secondary', 'limited'] as LanguageCoverageTier[]) {
+      assert.ok(usedTiers.has(tier), `tier '${tier}' has no countries assigned`);
+    }
+  });
+
+  it('country map has reasonable coverage (30+ countries assigned)', () => {
+    assert.ok(
+      Object.keys(COUNTRY_LANGUAGE_TIER).length >= 30,
+      `expected at least 30 countries in the language tier map, got ${Object.keys(COUNTRY_LANGUAGE_TIER).length}`,
+    );
+  });
+
+  describe('normalization arithmetic', () => {
+    it('langFactor=1.0 leaves score unchanged', () => {
+      const rawScore = 10;
+      const langFactor = 1.0;
+      const adjusted = Math.min(rawScore / Math.max(langFactor, 0.1), 100);
+      assert.equal(adjusted, 10);
+    });
+
+    it('langFactor=0.4 amplifies score by 2.5x', () => {
+      const rawScore = 10;
+      const langFactor = 0.4;
+      const adjusted = Math.min(rawScore / Math.max(langFactor, 0.1), 100);
+      assert.equal(adjusted, 25);
+    });
+
+    it('langFactor=0.2 amplifies score by 5x', () => {
+      const rawScore = 10;
+      const langFactor = 0.2;
+      const adjusted = Math.min(rawScore / Math.max(langFactor, 0.1), 100);
+      assert.equal(adjusted, 50);
+    });
+
+    it('adjusted score is capped at 100', () => {
+      const rawScore = 30;
+      const langFactor = 0.2;
+      const adjusted = Math.min(rawScore / Math.max(langFactor, 0.1), 100);
+      assert.equal(adjusted, 100);
+    });
+
+    it('langFactor floor at 0.1 prevents division by zero', () => {
+      const rawScore = 5;
+      const langFactor = 0;
+      const adjusted = Math.min(rawScore / Math.max(langFactor, 0.1), 100);
+      assert.equal(adjusted, 50);
+    });
+
+    it('velocity cap matches real scorer cap of 1000', () => {
+      const rawScore = 500;
+      const langFactor = 0.2;
+      const adjusted = Math.min(rawScore / Math.max(langFactor, 0.1), 1000);
+      assert.equal(adjusted, 1000);
+    });
+
+    it('RSF press freedom score is NOT language-adjusted (exercises scorer)', async () => {
+      const rsfValue = 75;
+      const mockReader = (key: string): Promise<unknown> => {
+        if (key === 'resilience:static:US') return Promise.resolve({ rsf: { score: rsfValue } });
+        if (key === 'resilience:static:CN') return Promise.resolve({ rsf: { score: rsfValue } });
+        return Promise.resolve(null);
+      };
+      const usResult = await scoreInformationCognitive('US', mockReader as ResilienceSeedReader);
+      const cnResult = await scoreInformationCognitive('CN', mockReader as ResilienceSeedReader);
+      assert.equal(usResult.score, cnResult.score, 'RSF component should be equal regardless of language tier');
+    });
+  });
+});