From 5d68f0ae6bbcf4c4a83553f679c1b92db5918773 Mon Sep 17 00:00:00 2001 From: Elie Habib Date: Fri, 27 Mar 2026 12:21:23 +0400 Subject: [PATCH] fix(intelligence): land news:threat:summary:v1 CII work missed from PR #2096 (#2356) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(intelligence): emit news:threat:summary:v1 from relay classify loop for CII During seedClassifyForVariant(), attribute each title to ISO2 countries while both title and classification result are in scope. At the end of seedClassify(), merge per-country threat counts across all variants and write news:threat:summary:v1 (20min TTL) with { byCountry: { [iso2]: { critical, high, medium, low, info } }, generatedAt }. get-risk-scores.ts reads the new key via fetchAuxiliarySources() and applies weighted scores (critical→4, high→2, medium→1, low→0.5, info→0, capped at 20) per country into the information component of CII eventScore. Closes #2053 * fix(intelligence): register news:threat-summary in health.js and expand tests - Add newsThreatSummary to BOOTSTRAP_KEYS (seed-meta:news:threat-summary, maxStaleMin: 60) so relay classify outages surface in health dashboard - Add 4 tests: boost verification, cap-at-20, unknown-country safety, null-threatSummary zero baseline * fix(classify): de-dup cross-variant titles and attribute to last-mentioned country P1-A: seedClassify() was summing byCountry across all 5 variants (full/tech/ finance/happy/commodity) without de-duplicating. Shared feeds (CNBC, Yahoo Finance, FT, HN, Ars) let a single headline count up to 4x before reaching CII, saturating threatSummaryScore on one story. Fix: pass seenTitles Set into seedClassifyForVariant; skip attribution for titles already counted by an earlier variant. P1-B: matchCountryNamesInText() was attributing every country mentioned in a headline equally. "UK and US launch strikes on Yemen" raised GB, US, and YE with identical weight, inflating actor-country CII. Fix: return only the last country in document order — the grammatical object of the headline, which is the primary affected country in SVO structure. * fix(classify): replace last-position heuristic with preposition-pattern attribution The previous "last-mentioned country" fix still failed for: - "Yemen says UK and US strikes hit Hodeidah" → returned US (wrong) - "US strikes on Yemen condemned by Iran" → returned IR (wrong) Both failures stem from position not conveying grammatical role. Switch to a preposition/verb-pattern approach: only attribute to a country that immediately follows a locative preposition (in/on/against/at/into/targeting/toward) or an attack verb (invades/attacks/bombs/hits/strikes). No pattern match → return [] (skip attribution rather than attribute to the wrong country). * fix(classify): fix regex hitting, gaza/hamas geo mapping, seed-meta always written - hitt?(?:ing|s)? instead of hit(?:s|ting)? so "hitting" is matched - gaza → PS (Palestinian Territories), hamas → PS (was IL) - seed-meta:news:threat-summary written unconditionally so health check does not fire false alerts during no-attribution runs --- api/health.js | 1 + scripts/ais-relay.cjs | 128 +++++++++++++++++- .../intelligence/v1/get-risk-scores.ts | 30 +++- tests/cii-scoring.test.mts | 32 +++++ 4 files changed, 183 insertions(+), 8 deletions(-) diff --git a/api/health.js b/api/health.js index 032b5b15d..cc8ed57d9 100644 --- a/api/health.js +++ b/api/health.js @@ -208,6 +208,7 @@ const SEED_META = { euGasStorage: { key: 'seed-meta:economic:eu-gas-storage', maxStaleMin: 2880 }, // daily seed (T+1); 2880min = 48h = 2x interval euYieldCurve: { key: 'seed-meta:economic:yield-curve-eu', maxStaleMin: 2880 }, // daily seed (weekdays); 2880min = 48h = 2x interval euFsi: { key: 'seed-meta:economic:fsi-eu', maxStaleMin: 20160 }, // weekly seed (Saturday); 20160min = 14d = 2x interval + newsThreatSummary: { key: 'seed-meta:news:threat-summary', maxStaleMin: 60 }, // relay classify every ~20min; 60min = 3x interval }; // Standalone keys that are populated on-demand by RPC handlers (not seeds). diff --git a/scripts/ais-relay.cjs b/scripts/ais-relay.cjs index d3655acd2..0b693e8ba 100644 --- a/scripts/ais-relay.cjs +++ b/scripts/ais-relay.cjs @@ -2802,6 +2802,83 @@ Output: [{"i":0,"l":"high","c":"conflict"}, ...] Focus: geopolitical events, conflicts, disasters, diplomacy. Classify by real-world severity and impact.`; +const NEWS_THREAT_SUMMARY_KEY = 'news:threat:summary:v1'; +const NEWS_THREAT_SUMMARY_TTL = 1200; // 20 min — aligns with relay cadence + +// Country name → ISO2 for threat summary geo-attribution (inline to avoid ESM import) +const THREAT_COUNTRY_NAME_TO_ISO2 = { + 'afghanistan':'AF','albania':'AL','algeria':'DZ','angola':'AO','argentina':'AR', + 'armenia':'AM','australia':'AU','austria':'AT','azerbaijan':'AZ','bahrain':'BH', + 'bangladesh':'BD','belarus':'BY','belgium':'BE','bolivia':'BO','brazil':'BR', + 'burkina faso':'BF','burma':'MM','cambodia':'KH','cameroon':'CM','canada':'CA', + 'chad':'TD','chile':'CL','china':'CN','colombia':'CO','congo':'CG', + 'costa rica':'CR','croatia':'HR','cuba':'CU','cyprus':'CY', + 'czech republic':'CZ','czechia':'CZ', + 'democratic republic of the congo':'CD','dr congo':'CD','drc':'CD', + 'denmark':'DK','djibouti':'DJ','dominican republic':'DO', + 'ecuador':'EC','egypt':'EG','el salvador':'SV','eritrea':'ER', + 'estonia':'EE','ethiopia':'ET','finland':'FI','france':'FR', + 'georgia':'GE','germany':'DE','ghana':'GH','greece':'GR', + 'guatemala':'GT','guinea':'GN','haiti':'HT','honduras':'HN','hungary':'HU', + 'iceland':'IS','india':'IN','indonesia':'ID','iran':'IR','iraq':'IQ', + 'ireland':'IE','israel':'IL','italy':'IT','ivory coast':'CI', + 'jamaica':'JM','japan':'JP','jordan':'JO','kazakhstan':'KZ', + 'kenya':'KE','kosovo':'XK','kuwait':'KW','kyrgyzstan':'KG', + 'laos':'LA','latvia':'LV','lebanon':'LB','libya':'LY','lithuania':'LT', + 'mali':'ML','mauritania':'MR','mexico':'MX','moldova':'MD', + 'mongolia':'MN','montenegro':'ME','morocco':'MA','mozambique':'MZ', + 'myanmar':'MM','namibia':'NA','nepal':'NP','netherlands':'NL', + 'new zealand':'NZ','nicaragua':'NI','niger':'NE','nigeria':'NG', + 'north korea':'KP','north macedonia':'MK','norway':'NO', + 'oman':'OM','pakistan':'PK','palestine':'PS','panama':'PA', + 'paraguay':'PY','peru':'PE','philippines':'PH','poland':'PL', + 'portugal':'PT','qatar':'QA','romania':'RO','russia':'RU','rwanda':'RW', + 'saudi arabia':'SA','senegal':'SN','serbia':'RS','sierra leone':'SL', + 'singapore':'SG','slovakia':'SK','slovenia':'SI','somalia':'SO', + 'south africa':'ZA','south korea':'KR','south sudan':'SS','spain':'ES', + 'sri lanka':'LK','sudan':'SD','sweden':'SE','switzerland':'CH', + 'syria':'SY','taiwan':'TW','tajikistan':'TJ','tanzania':'TZ', + 'thailand':'TH','togo':'TG','tunisia':'TN','turkey':'TR', + 'turkmenistan':'TM','uganda':'UG','ukraine':'UA', + 'united arab emirates':'AE','uae':'AE', + 'united kingdom':'GB','uk':'GB','united states':'US','usa':'US', + 'uruguay':'UY','uzbekistan':'UZ','venezuela':'VE','vietnam':'VN', + 'yemen':'YE','zambia':'ZM','zimbabwe':'ZW', + // Key aliases + 'tehran':'IR','moscow':'RU','beijing':'CN','kyiv':'UA','pyongyang':'KP', + 'tel aviv':'IL','gaza':'PS','damascus':'SY','sanaa':'YE','houthi':'YE', + 'kremlin':'RU','pentagon':'US','nato':'','irgc':'IR','hezbollah':'LB', + 'hamas':'PS','taliban':'AF','riyadh':'SA','ankara':'TR', +}; +// Sort by name length desc so longer multi-word names match first (used for tie-breaking same position) +const THREAT_COUNTRY_NAME_ENTRIES = Object.entries(THREAT_COUNTRY_NAME_TO_ISO2) + .filter(([name, iso2]) => name.length >= 3 && iso2.length === 2) + .sort((a, b) => b[0].length - a[0].length) + .map(([name, iso2]) => ({ name, iso2, regex: new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i') })); + +// Returns the single primary affected country — the country appearing immediately after a +// locative preposition or attack verb, which marks the grammatical object/affected entity. +// Returns [] when no such pattern fires (no attribution is better than wrong attribution). +// "UK and US launch strikes on Yemen" → ['YE'] +// "US strikes on Yemen condemned by Iran" → ['YE'] (Iran is a reactor, not affected) +// "Yemen says UK and US strikes hit Hodeidah" → [] (Hodeidah is a city, skip) +// "Russia invades Ukraine" → ['UA'] +const AFFECTED_PREFIX_RE = /\b(in|on|against|at|into|across|inside|targeting|toward[s]?|invad(?:es?|ed|ing)|attack(?:s|ed|ing)?|bomb(?:s|ed|ing)?|hitt?(?:ing|s)?|strik(?:es?|ing))\s+(?:the\s+)?/gi; +function matchCountryNamesInText(text) { + const lower = text.toLowerCase(); + let match; + AFFECTED_PREFIX_RE.lastIndex = 0; + while ((match = AFFECTED_PREFIX_RE.exec(lower)) !== null) { + const afterPfx = lower.slice(match.index + match[0].length); + for (const { name, iso2 } of THREAT_COUNTRY_NAME_ENTRIES) { + if (afterPfx.startsWith(name) && (afterPfx.length === name.length || /\W/.test(afterPfx[name.length]))) { + return [iso2]; + } + } + } + return []; +} + function classifyCacheKey(title) { const hash = crypto.createHash('sha256').update(title.toLowerCase()).digest('hex').slice(0, 16); return `classify:sebuf:v1:${hash}`; @@ -2907,7 +2984,7 @@ async function classifyFetchLlm(titles) { let classifyInFlight = false; -async function seedClassifyForVariant(variant) { +async function seedClassifyForVariant(variant, seenTitles) { const digestUrl = `https://api.worldmonitor.app/api/news/v1/list-feed-digest?variant=${variant}&lang=en`; let digest; try { @@ -2945,11 +3022,30 @@ async function seedClassifyForVariant(variant) { const cached = await upstashMGet(cacheKeys); const misses = []; + // byCountry accumulates threat counts while title+level are in scope + const byCountry = {}; + const emptyLevel = () => ({ critical: 0, high: 0, medium: 0, low: 0, info: 0 }); + for (let i = 0; i < titleArr.length; i++) { - if (!cached[i]) misses.push(titleArr[i]); + const hit = cached[i]; + if (!hit) { + misses.push(titleArr[i]); + continue; + } + // Attribute cached hits while we still have the title + let parsed = hit; + if (typeof hit === 'string') { try { parsed = JSON.parse(hit); } catch { continue; } } + const level = parsed?.level; + if (!CLASSIFY_VALID_LEVELS.includes(level)) continue; + if (seenTitles.has(titleArr[i])) continue; + seenTitles.add(titleArr[i]); + for (const code of matchCountryNamesInText(titleArr[i])) { + if (!byCountry[code]) byCountry[code] = emptyLevel(); + byCountry[code][level]++; + } } - if (misses.length === 0) return { total: titleArr.length, classified: 0, skipped: 0 }; + if (misses.length === 0) return { total: titleArr.length, classified: 0, skipped: 0, byCountry }; let classified = 0; let skipped = 0; @@ -2977,6 +3073,14 @@ async function seedClassifyForVariant(variant) { classifiedSet.add(idx); await upstashSet(classifyCacheKey(chunk[idx]), { level, category, timestamp: Date.now() }, CLASSIFY_CACHE_TTL); classified++; + // Attribute newly classified title while it's still in scope + if (!seenTitles.has(chunk[idx])) { + seenTitles.add(chunk[idx]); + for (const code of matchCountryNamesInText(chunk[idx])) { + if (!byCountry[code]) byCountry[code] = emptyLevel(); + byCountry[code][level]++; + } + } } for (let i = 0; i < chunk.length; i++) { @@ -2987,7 +3091,7 @@ async function seedClassifyForVariant(variant) { } } - return { total: titleArr.length, classified, skipped }; + return { total: titleArr.length, classified, skipped, byCountry }; } async function seedClassify() { @@ -3003,18 +3107,32 @@ async function seedClassify() { let totalClassified = 0; let totalSkipped = 0; + const mergedByCountry = {}; + const seenTitles = new Set(); for (let v = 0; v < CLASSIFY_VARIANTS.length; v++) { if (v > 0) await new Promise((r) => setTimeout(r, CLASSIFY_VARIANT_STAGGER_MS)); try { - const stats = await seedClassifyForVariant(CLASSIFY_VARIANTS[v]); + const stats = await seedClassifyForVariant(CLASSIFY_VARIANTS[v], seenTitles); totalClassified += stats.classified; totalSkipped += stats.skipped; console.log(`[Classify] ${CLASSIFY_VARIANTS[v]}: ${stats.total} titles, ${stats.classified} classified, ${stats.skipped} skipped`); + for (const [code, counts] of Object.entries(stats.byCountry || {})) { + if (!mergedByCountry[code]) mergedByCountry[code] = { critical: 0, high: 0, medium: 0, low: 0, info: 0 }; + for (const lvl of ['critical', 'high', 'medium', 'low', 'info']) { + mergedByCountry[code][lvl] += counts[lvl] || 0; + } + } } catch (e) { console.warn(`[Classify] ${CLASSIFY_VARIANTS[v]} error:`, e?.message || e); } } + await upstashSet('seed-meta:news:threat-summary', { fetchedAt: Date.now(), recordCount: Object.keys(mergedByCountry).length }, 604800); + if (Object.keys(mergedByCountry).length > 0) { + await upstashSet(NEWS_THREAT_SUMMARY_KEY, { byCountry: mergedByCountry, generatedAt: Date.now() }, NEWS_THREAT_SUMMARY_TTL); + console.log(`[Classify] Threat summary written for ${Object.keys(mergedByCountry).length} countries`); + } + await upstashSet('seed-meta:classify', { fetchedAt: Date.now(), recordCount: totalClassified }, 604800); console.log(`[Classify] Done in ${((Date.now() - t0) / 1000).toFixed(1)}s — ${totalClassified} classified, ${totalSkipped} skipped`); } catch (e) { diff --git a/server/worldmonitor/intelligence/v1/get-risk-scores.ts b/server/worldmonitor/intelligence/v1/get-risk-scores.ts index 61d478ec3..f2ce5be42 100644 --- a/server/worldmonitor/intelligence/v1/get-risk-scores.ts +++ b/server/worldmonitor/intelligence/v1/get-risk-scores.ts @@ -176,6 +176,7 @@ interface CountrySignals { advisoryLevel: 'do-not-travel' | 'reconsider' | 'caution' | null; totalDisplaced: number; newsScore: number; + threatSummaryScore: number; } function emptySignals(): CountrySignals { @@ -191,6 +192,7 @@ function emptySignals(): CountrySignals { advisoryLevel: null, totalDisplaced: 0, newsScore: 0, + threatSummaryScore: 0, }; } @@ -236,11 +238,13 @@ interface AuxiliarySources { // Per-country displaced population by ISO3 code (UNHCR — persists after ceasefires) displacedByIso3: Record; newsTopStories: Array<{ countryCode: string | null; threatLevel: string; primaryTitle: string }>; + // Per-country classified headline counts from relay seedClassify() — written to news:threat:summary:v1 + threatSummaryByCountry: Record | null; } async function fetchAuxiliarySources(): Promise { const currentYear = new Date().getFullYear(); - const [ucdpRaw, outagesRaw, climateRaw, cyberRaw, firesRaw, gpsRaw, iranRaw, orefRaw, advisoriesRaw, displacementRaw, insightsRaw] = await Promise.all([ + const [ucdpRaw, outagesRaw, climateRaw, cyberRaw, firesRaw, gpsRaw, iranRaw, orefRaw, advisoriesRaw, displacementRaw, insightsRaw, threatSummaryRaw] = await Promise.all([ getCachedJson('conflict:ucdp-events:v1', true).catch(() => null), getCachedJson('infra:outages:v1', true).catch(() => null), getCachedJson('climate:anomalies:v1', true).catch(() => null), @@ -255,6 +259,7 @@ async function fetchAuxiliarySources(): Promise { .catch(() => null) .then(d => d ?? getCachedJson(`displacement:summary:v1:${currentYear - 1}`, true).catch(() => null)), getCachedJson('news:insights:v1', true).catch(() => null), + getCachedJson('news:threat:summary:v1', true).catch(() => null), ]); const arr = (v: any, field?: string, maxLen = 10000) => { let a: any[]; @@ -294,6 +299,10 @@ async function fetchAuxiliarySources(): Promise { threatLevel: typeof s.threatLevel === 'string' ? s.threatLevel.toLowerCase() : 'low', primaryTitle: typeof s.primaryTitle === 'string' ? s.primaryTitle : '', })); + const threatSummaryByCountry: AuxiliarySources['threatSummaryByCountry'] = + threatSummaryRaw && typeof threatSummaryRaw === 'object' && (threatSummaryRaw as any).byCountry + ? (threatSummaryRaw as any).byCountry + : null; return { ucdpEvents: arr(ucdpRaw, 'events'), @@ -309,6 +318,7 @@ async function fetchAuxiliarySources(): Promise { : null, displacedByIso3, newsTopStories, + threatSummaryByCountry, }; } @@ -444,6 +454,20 @@ export function computeCIIScores( if (signals) signals.newsScore += weight; } + // --- News threat summary (from relay seedClassify — all classified headlines) --- + if (aux.threatSummaryByCountry) { + const SUMMARY_WEIGHT: Record = { critical: 4, high: 2, medium: 1, low: 0.5, info: 0 }; + for (const [code, counts] of Object.entries(aux.threatSummaryByCountry)) { + const signals = data[code]; + if (!signals) continue; + let score = 0; + for (const [lvl, w] of Object.entries(SUMMARY_WEIGHT)) { + score += (counts[lvl as keyof typeof counts] || 0) * w; + } + signals.threatSummaryScore = Math.min(20, score); + } + } + // --- Scoring --- const scores: CiiScore[] = []; for (const code of Object.keys(TIER1_COUNTRIES)) { @@ -475,7 +499,7 @@ export function computeCIIScores( const gpsJammingScore = Math.min(35, d.gpsHighCount * 5 + d.gpsMediumCount * 2); const security = Math.min(100, Math.round(gpsJammingScore)); - const information = Math.min(20, d.newsScore); + const information = Math.min(20, d.newsScore + d.threatSummaryScore); const eventScore = unrest * 0.25 + conflict * 0.30 + security * 0.20 + information * 0.25; @@ -600,7 +624,7 @@ export async function getRiskScores( const stale = (await getCachedJson(RISK_STALE_CACHE_KEY)) as GetRiskScoresResponse | null; if (stale) return stale; - const emptyAux: AuxiliarySources = { ucdpEvents: [], outages: [], climate: [], cyber: [], fires: [], gpsHexes: [], iranEvents: [], orefData: null, advisories: null, displacedByIso3: {}, newsTopStories: [] }; + const emptyAux: AuxiliarySources = { ucdpEvents: [], outages: [], climate: [], cyber: [], fires: [], gpsHexes: [], iranEvents: [], orefData: null, advisories: null, displacedByIso3: {}, newsTopStories: [], threatSummaryByCountry: null }; const ciiScores = computeCIIScores([], emptyAux); return { ciiScores, strategicRisks: computeStrategicRisks(ciiScores) }; } diff --git a/tests/cii-scoring.test.mts b/tests/cii-scoring.test.mts index e2b9121b7..4cb1e9d41 100644 --- a/tests/cii-scoring.test.mts +++ b/tests/cii-scoring.test.mts @@ -16,6 +16,7 @@ function emptyAux() { advisories: null as { byCountry: Record } | null, displacedByIso3: {} as Record, newsTopStories: [] as Array<{ countryCode: string | null; threatLevel: string; primaryTitle: string }>, + threatSummaryByCountry: null as Record | null, }; } @@ -191,6 +192,16 @@ describe('CII scoring', () => { `RU with critical news (${withNews.combinedScore}) should exceed baseline (${withoutNews.combinedScore})`); }); + it('threatSummaryByCountry boosts newsActivity for target country', () => { + const aux = emptyAux(); + aux.threatSummaryByCountry = { RU: { critical: 3, high: 2, medium: 1, low: 1, info: 0 } }; + const withThreat = scoreFor(computeCIIScores([], aux), 'RU')!; + const withoutThreat = scoreFor(computeCIIScores([], emptyAux()), 'RU')!; + assert.ok(withThreat.components!.newsActivity > 0, 'newsActivity should be > 0 with threat summary'); + assert.ok(withThreat.combinedScore > withoutThreat.combinedScore, + `RU with threat summary (${withThreat.combinedScore}) should exceed baseline (${withoutThreat.combinedScore})`); + }); + it('newsTopStories newsActivity capped at 20', () => { const aux = emptyAux(); aux.newsTopStories = Array.from({ length: 20 }, () => ({ @@ -201,6 +212,14 @@ describe('CII scoring', () => { assert.ok(sy.components!.newsActivity <= 20, `newsActivity ${sy.components!.newsActivity} should be capped at 20`); }); + it('threatSummaryByCountry newsActivity capped at 20', () => { + const aux = emptyAux(); + aux.threatSummaryByCountry = { SY: { critical: 100, high: 100, medium: 100, low: 100, info: 100 } }; + const scores = computeCIIScores([], aux); + const sy = scoreFor(scores, 'SY')!; + assert.ok(sy.components!.newsActivity <= 20, `newsActivity ${sy.components!.newsActivity} should be capped at 20`); + }); + it('newsTopStories moderate threat contributes (not silently dropped)', () => { const aux = emptyAux(); aux.newsTopStories = [ @@ -235,4 +254,17 @@ describe('CII scoring', () => { assert.equal(withInfo.components!.newsActivity, withoutNews.components!.newsActivity, 'info threat level should not affect newsActivity'); }); + + it('threatSummaryByCountry unknown country code is safely ignored', () => { + const aux = emptyAux(); + aux.threatSummaryByCountry = { XX: { critical: 10, high: 5, medium: 2, low: 1, info: 0 } }; + assert.doesNotThrow(() => computeCIIScores([], aux), 'unknown country code should not throw'); + }); + + it('null threatSummaryByCountry produces zero newsActivity', () => { + const scores = computeCIIScores([], emptyAux()); + for (const s of scores) { + assert.equal(s.components!.newsActivity, 0, `${s.region} should have zero newsActivity with null threatSummary`); + } + }); });