Files
worldmonitor/tests/country-resolver.test.mjs
Elie Habib 02555671f2 refactor: consolidate country name/code mappings into single canonical sources (#2676)
* refactor(country-maps): consolidate country name/ISO maps

Expand shared/country-names.json from 265 to 309 entries by merging
geojson names, COUNTRY_ALIAS_MAP, upstream API variants (World Bank,
WHO, UN, FAO), and seed-correlation extras.

Add ISO3 map generator (generate-iso3-maps.cjs) producing
iso3-to-iso2.json (239 entries) and iso2-to-iso3.json (239 entries)
with TWN and XKX supplements.

Add build-country-names.cjs for reproducible expansion from all sources.
Sync scripts/shared/ copies for edge-function test compatibility.

* refactor: consolidate country name/code mappings into single canonical sources

Eliminates fragmented country mapping across the repo. Every feature
(resilience, conflict, correlation, intelligence) was maintaining its
own partial alias map.

Data consolidation:
- Expand shared/country-names.json from 265 to 302 entries covering
  World Bank, WHO, UN, FAO, and correlation script naming variants
- Generate shared/iso3-to-iso2.json (239 entries) and
  shared/iso2-to-iso3.json from countries.geojson + supplements
  (Taiwan TWN, Kosovo XKX)

Consumer migrations:
- _country-resolver.mjs: delete COUNTRY_ALIAS_MAP (37 entries),
  replace 2MB geojson parse with 5KB iso3-to-iso2.json
- conflict/_shared.ts: replace 33-entry ISO2_TO_ISO3 literal
- seed-conflict-intel.mjs: replace 20-entry ISO2_TO_ISO3 literal
- _dimension-scorers.ts: replace geojson-based ISO3 construction
- get-risk-scores.ts: replace 31-entry ISO3_TO_ISO2 literal
- seed-correlation.mjs: replace 102-entry COUNTRY_NAME_TO_ISO2
  and 90-entry ISO3_TO_ISO2, use resolveIso2() from canonical
  resolver, lower short-alias threshold to 2 chars with word
  boundary matching, export matchCountryNamesInText(), add isMain
  guard

Tests:
- New tests/country-resolver.test.mjs with structural validation,
  parity regression for all 37 old aliases, ISO3 bidirectional
  consistency, and Taiwan/Kosovo assertions
- Updated resilience seed test for new resolver signature

Net: -190 lines, 0 hardcoded country maps remaining

* fix: normalize raw text before country name matching

Text matchers (geo-extract, seed-security-advisories, seed-correlation)
were matching normalized keys against raw text containing diacritics
and punctuation. "Curaçao", "Timor-Leste", "Hong Kong S.A.R." all
failed to resolve after country-names.json keys were normalized.

Fix: apply NFKD + diacritic stripping + punctuation normalization to
input text before matching, same transform used on the keys.

Also add "hong kong" and "sao tome" as short-form keys for bigram
headline matching in geo-extract.

* fix: remove 'u s' alias that caused US/VI misattribution

'u s' in country-names.json matched before 'u s virgin islands' in
geo-extract's bigram scanner, attributing Virgin Islands headlines
to US. Removed since 'usa', 'united states', and the uppercase US
expansion already cover the United States.
2026-04-04 15:38:02 +04:00

84 lines
3.5 KiB
JavaScript

import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import { normalizeCountryToken, createCountryResolvers, resolveIso2, isIso2, isIso3 } from '../scripts/_country-resolver.mjs';
const root = resolve(import.meta.dirname, '..');
const countryNames = JSON.parse(readFileSync(resolve(root, 'shared/country-names.json'), 'utf8'));
const iso3ToIso2 = JSON.parse(readFileSync(resolve(root, 'shared/iso3-to-iso2.json'), 'utf8'));
const iso2ToIso3 = JSON.parse(readFileSync(resolve(root, 'shared/iso2-to-iso3.json'), 'utf8'));
describe('country-names.json structural validation', () => {
it('every key equals normalizeCountryToken(key)', () => {
for (const key of Object.keys(countryNames)) {
assert.equal(key, normalizeCountryToken(key), `key "${key}" is not normalized`);
}
});
it('every value is a valid ISO2 code', () => {
for (const [key, value] of Object.entries(countryNames)) {
assert.ok(isIso2(value), `"${key}" → "${value}" is not valid ISO2`);
}
});
it('has at least 300 entries', () => {
assert.ok(Object.keys(countryNames).length >= 300);
});
});
describe('iso3-to-iso2.json validation', () => {
it('has at least 238 entries', () => {
assert.ok(Object.keys(iso3ToIso2).length >= 238);
});
it('every key is valid ISO3, every value is valid ISO2', () => {
for (const [k, v] of Object.entries(iso3ToIso2)) {
assert.ok(isIso3(k), `key "${k}" not valid ISO3`);
assert.ok(isIso2(v), `value "${v}" not valid ISO2`);
}
});
it('bidirectional consistency with iso2-to-iso3', () => {
for (const [iso2, iso3] of Object.entries(iso2ToIso3)) {
assert.equal(iso3ToIso2[iso3], iso2, `iso3ToIso2[${iso3}] !== ${iso2}`);
}
});
it('resolves Taiwan and Kosovo', () => {
assert.equal(iso3ToIso2['TWN'], 'TW');
assert.equal(iso3ToIso2['XKX'], 'XK');
});
});
describe('resolver parity', () => {
const resolvers = createCountryResolvers();
const oldAliases = {
'bahamas the': 'BS', 'cape verde': 'CV', 'congo brazzaville': 'CG',
'congo kinshasa': 'CD', 'congo rep': 'CG', 'congo dem rep': 'CD',
'czech republic': 'CZ', 'egypt arab rep': 'EG', 'gambia the': 'GM',
'hong kong sar china': 'HK', 'iran islamic rep': 'IR',
'korea dem peoples rep': 'KP', 'korea rep': 'KR', 'lao pdr': 'LA',
'macao sar china': 'MO', 'micronesia fed sts': 'FM',
'morocco western sahara': 'MA', 'north macedonia': 'MK',
'occupied palestinian territory': 'PS', 'palestinian territories': 'PS',
'palestine state of': 'PS', 'russian federation': 'RU',
'slovak republic': 'SK', 'st kitts and nevis': 'KN', 'st lucia': 'LC',
'st vincent and the grenadines': 'VC', 'syrian arab republic': 'SY',
'the bahamas': 'BS', 'timor leste': 'TL', 'turkiye': 'TR',
'united states of america': 'US', 'venezuela rb': 'VE',
'viet nam': 'VN', 'west bank and gaza': 'PS', 'yemen rep': 'YE',
};
it('resolves all old COUNTRY_ALIAS_MAP entries', () => {
for (const [name, expected] of Object.entries(oldAliases)) {
const result = resolveIso2({ name }, resolvers);
assert.equal(result, expected, `"${name}" → ${result}, expected ${expected}`);
}
});
it('resolves ISO3 codes', () => {
assert.equal(resolveIso2({ iso3: 'USA' }, resolvers), 'US');
assert.equal(resolveIso2({ iso3: 'GBR' }, resolvers), 'GB');
assert.equal(resolveIso2({ iso3: 'TWN' }, resolvers), 'TW');
assert.equal(resolveIso2({ iso3: 'XKX' }, resolvers), 'XK');
});
});