Files
worldmonitor/tests/resilience-static-seed.test.mjs
Elie Habib 02555671f2 refactor: consolidate country name/code mappings into single canonical sources (#2676)
* refactor(country-maps): consolidate country name/ISO maps

Expand shared/country-names.json from 265 to 309 entries by merging
geojson names, COUNTRY_ALIAS_MAP, upstream API variants (World Bank,
WHO, UN, FAO), and seed-correlation extras.

Add ISO3 map generator (generate-iso3-maps.cjs) producing
iso3-to-iso2.json (239 entries) and iso2-to-iso3.json (239 entries)
with TWN and XKX supplements.

Add build-country-names.cjs for reproducible expansion from all sources.
Sync scripts/shared/ copies for edge-function test compatibility.

* refactor: consolidate country name/code mappings into single canonical sources

Eliminates fragmented country mapping across the repo. Every feature
(resilience, conflict, correlation, intelligence) was maintaining its
own partial alias map.

Data consolidation:
- Expand shared/country-names.json from 265 to 302 entries covering
  World Bank, WHO, UN, FAO, and correlation script naming variants
- Generate shared/iso3-to-iso2.json (239 entries) and
  shared/iso2-to-iso3.json from countries.geojson + supplements
  (Taiwan TWN, Kosovo XKX)

Consumer migrations:
- _country-resolver.mjs: delete COUNTRY_ALIAS_MAP (37 entries),
  replace 2MB geojson parse with 5KB iso3-to-iso2.json
- conflict/_shared.ts: replace 33-entry ISO2_TO_ISO3 literal
- seed-conflict-intel.mjs: replace 20-entry ISO2_TO_ISO3 literal
- _dimension-scorers.ts: replace geojson-based ISO3 construction
- get-risk-scores.ts: replace 31-entry ISO3_TO_ISO2 literal
- seed-correlation.mjs: replace 102-entry COUNTRY_NAME_TO_ISO2
  and 90-entry ISO3_TO_ISO2, use resolveIso2() from canonical
  resolver, lower short-alias threshold to 2 chars with word
  boundary matching, export matchCountryNamesInText(), add isMain
  guard

Tests:
- New tests/country-resolver.test.mjs with structural validation,
  parity regression for all 37 old aliases, ISO3 bidirectional
  consistency, and Taiwan/Kosovo assertions
- Updated resilience seed test for new resolver signature

Net: -190 lines, 0 hardcoded country maps remaining

* fix: normalize raw text before country name matching

Text matchers (geo-extract, seed-security-advisories, seed-correlation)
were matching normalized keys against raw text containing diacritics
and punctuation. "Curaçao", "Timor-Leste", "Hong Kong S.A.R." all
failed to resolve after country-names.json keys were normalized.

Fix: apply NFKD + diacritic stripping + punctuation normalization to
input text before matching, same transform used on the keys.

Also add "hong kong" and "sao tome" as short-form keys for bigram
headline matching in geo-extract.

* fix: remove 'u s' alias that caused US/VI misattribution

'u s' in country-names.json matched before 'u s virgin islands' in
geo-extract's bigram scanner, attributing Virgin Islands headlines
to US. Removed since 'usa', 'united states', and the uppercase US
expansion already cover the United States.
2026-04-04 15:38:02 +04:00

194 lines
6.8 KiB
JavaScript

import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs';
import { dirname, join, resolve } from 'node:path';
import { fileURLToPath } from 'node:url';
import {
RESILIENCE_STATIC_INDEX_KEY,
RESILIENCE_STATIC_META_KEY,
buildFailureRefreshKeys,
buildManifest,
countryRedisKey,
createCountryResolvers,
finalizeCountryPayloads,
parseEurostatEnergyDataset,
parseRsfRanking,
resolveIso2,
shouldSkipSeedYear,
} from '../scripts/seed-resilience-static.mjs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const root = resolve(__dirname, '..');
function makeResolvers() {
return createCountryResolvers(
{
norway: 'NO',
'united states': 'US',
yemen: 'YE',
'cape verde': 'CV',
},
{ NOR: 'NO', USA: 'US', YEM: 'YE', CPV: 'CV' },
);
}
describe('resilience static seed country normalization', () => {
const resolvers = makeResolvers();
it('resolves explicit fixture countries from ISO3 and aliases', () => {
assert.equal(resolveIso2({ iso3: 'NOR' }, resolvers), 'NO');
assert.equal(resolveIso2({ iso3: 'USA' }, resolvers), 'US');
assert.equal(resolveIso2({ iso3: 'YEM' }, resolvers), 'YE');
assert.equal(resolveIso2({ name: 'Cape Verde' }, resolvers), 'CV');
assert.equal(resolveIso2({ name: 'OECS' }, resolvers), null);
});
});
describe('resilience static seed parsers', () => {
it('parses RSF ranking rows and skips aggregate entries', () => {
const html = `
<div class="field__item">|Rank|Country|Note|Differential|
|3|Norway|6,52|-2 (1)|
|32|United States|18,22|+15 (47)|
|34|OECS|19,72|-9 (25)|
|169|Yemen|69,22|+2 (171)|</div>
`;
const rows = parseRsfRanking(html);
assert.deepEqual([...rows.keys()].sort(), ['NO', 'US', 'YE']);
assert.deepEqual(rows.get('NO'), {
source: 'rsf-ranking',
rank: 3,
score: 6.52,
differential: '-2 (1)',
year: null,
});
assert.equal(rows.get('US').rank, 32);
assert.equal(rows.get('YE').score, 69.22);
});
it('parses Eurostat energy dependency and keeps the latest TOTAL series value', () => {
const dataset = {
id: ['freq', 'siec', 'unit', 'geo', 'time'],
size: [1, 2, 1, 2, 2],
dimension: {
freq: { category: { index: { A: 0 } } },
siec: { category: { index: { TOTAL: 0, C0110: 1 } } },
unit: { category: { index: { PC: 0 } } },
geo: { category: { index: { NO: 0, US: 1 } } },
time: { category: { index: { 2023: 0, 2024: 1 } } },
},
value: {
0: -15.2,
1: -13.3,
2: 7.9,
3: 8.5,
5: 999.0,
},
};
const parsed = parseEurostatEnergyDataset(dataset);
assert.deepEqual(parsed.get('NO'), {
source: 'eurostat-nrg_ind_id',
energyImportDependency: {
value: -13.3,
year: 2024,
source: 'eurostat',
},
});
assert.equal(parsed.get('US').energyImportDependency.value, 8.5);
});
});
describe('resilience static seed payload assembly', () => {
it('merges sparse datasets into the canonical per-country shape with coverage', () => {
const payloads = finalizeCountryPayloads({
wgi: new Map([
['NO', { source: 'worldbank-wgi', indicators: { 'GE.EST': { value: 1.8, year: 2024 } } }],
['US', { source: 'worldbank-wgi', indicators: { 'GE.EST': { value: 1.1, year: 2024 } } }],
]),
infrastructure: new Map([
['NO', { source: 'worldbank-infrastructure', indicators: { 'EG.ELC.ACCS.ZS': { value: 100, year: 2024 } } }],
]),
gpi: new Map(),
rsf: new Map([
['YE', { source: 'rsf-ranking', rank: 169, score: 69.22, differential: '+2 (171)', year: null }],
]),
who: new Map([
['US', { source: 'who-gho', indicators: { uhcIndex: { indicator: 'UHC_INDEX_REPORTED', value: 81, year: 2021 } } }],
]),
fao: new Map(),
aquastat: new Map(),
iea: new Map([
['NO', { source: 'eurostat-nrg_ind_id', energyImportDependency: { value: -13.3, year: 2024, source: 'eurostat' } }],
]),
}, 2026, '2026-04-03T12:00:00.000Z');
assert.deepEqual([...payloads.keys()].sort(), ['NO', 'US', 'YE']);
assert.deepEqual(payloads.get('NO'), {
wgi: { source: 'worldbank-wgi', indicators: { 'GE.EST': { value: 1.8, year: 2024 } } },
infrastructure: { source: 'worldbank-infrastructure', indicators: { 'EG.ELC.ACCS.ZS': { value: 100, year: 2024 } } },
gpi: null,
rsf: null,
who: null,
fao: null,
aquastat: null,
iea: { source: 'eurostat-nrg_ind_id', energyImportDependency: { value: -13.3, year: 2024, source: 'eurostat' } },
coverage: { availableDatasets: 3, totalDatasets: 8, ratio: 0.375 },
seedYear: 2026,
seededAt: '2026-04-03T12:00:00.000Z',
});
assert.equal(payloads.get('US').coverage.availableDatasets, 2);
assert.equal(payloads.get('YE').coverage.availableDatasets, 1);
});
it('builds a manifest and the failure refresh key set from the country list', () => {
const countryPayloads = new Map([
['US', { coverage: { availableDatasets: 2 } }],
['NO', { coverage: { availableDatasets: 3 } }],
['YE', { coverage: { availableDatasets: 1 } }],
]);
const manifest = buildManifest(countryPayloads, ['aquastat', 'gpi'], 2026, '2026-04-03T12:00:00.000Z');
assert.deepEqual(manifest, {
countries: ['NO', 'US', 'YE'],
recordCount: 3,
failedDatasets: ['aquastat', 'gpi'],
seedYear: 2026,
seededAt: '2026-04-03T12:00:00.000Z',
sourceVersion: 'resilience-static-v1',
});
assert.deepEqual(buildFailureRefreshKeys(manifest), [
RESILIENCE_STATIC_INDEX_KEY,
RESILIENCE_STATIC_META_KEY,
countryRedisKey('NO'),
countryRedisKey('US'),
countryRedisKey('YE'),
]);
});
it('skips reruns only after a successful snapshot for the same seed year', () => {
assert.equal(shouldSkipSeedYear({ status: 'ok', seedYear: 2026, recordCount: 150 }, 2026), true);
assert.equal(shouldSkipSeedYear({ status: 'error', seedYear: 2026, recordCount: 150 }, 2026), false);
assert.equal(shouldSkipSeedYear({ status: 'ok', seedYear: 2025, recordCount: 150 }, 2026), false);
});
});
describe('resilience static health registrations', () => {
const healthSrc = readFileSync(join(root, 'api', 'health.js'), 'utf8');
const seedHealthSrc = readFileSync(join(root, 'api', 'seed-health.js'), 'utf8');
it('registers the manifest key and seed-meta in health.js', () => {
assert.match(healthSrc, /resilienceStaticIndex:\s+'resilience:static:index:v1'/);
assert.match(healthSrc, /seed-meta:resilience:static/);
});
it('registers annual seed-health monitoring for resilience static', () => {
assert.match(seedHealthSrc, /'resilience:static':\s+\{ key: 'seed-meta:resilience:static',\s+intervalMin: 288000 \}/);
});
});