refactor: consolidate country name/code mappings into single canonical sources (#2676)

* refactor(country-maps): consolidate country name/ISO maps

Expand shared/country-names.json from 265 to 309 entries by merging
geojson names, COUNTRY_ALIAS_MAP, upstream API variants (World Bank,
WHO, UN, FAO), and seed-correlation extras.

Add ISO3 map generator (generate-iso3-maps.cjs) producing
iso3-to-iso2.json (239 entries) and iso2-to-iso3.json (239 entries)
with TWN and XKX supplements.

Add build-country-names.cjs for reproducible expansion from all sources.
Sync scripts/shared/ copies for edge-function test compatibility.

* refactor: consolidate country name/code mappings into single canonical sources

Eliminates fragmented country mapping across the repo. Every feature
(resilience, conflict, correlation, intelligence) was maintaining its
own partial alias map.

Data consolidation:
- Expand shared/country-names.json from 265 to 302 entries covering
  World Bank, WHO, UN, FAO, and correlation script naming variants
- Generate shared/iso3-to-iso2.json (239 entries) and
  shared/iso2-to-iso3.json from countries.geojson + supplements
  (Taiwan TWN, Kosovo XKX)

Consumer migrations:
- _country-resolver.mjs: delete COUNTRY_ALIAS_MAP (37 entries),
  replace 2MB geojson parse with 5KB iso3-to-iso2.json
- conflict/_shared.ts: replace 33-entry ISO2_TO_ISO3 literal
- seed-conflict-intel.mjs: replace 20-entry ISO2_TO_ISO3 literal
- _dimension-scorers.ts: replace geojson-based ISO3 construction
- get-risk-scores.ts: replace 31-entry ISO3_TO_ISO2 literal
- seed-correlation.mjs: replace 102-entry COUNTRY_NAME_TO_ISO2
  and 90-entry ISO3_TO_ISO2, use resolveIso2() from canonical
  resolver, lower short-alias threshold to 2 chars with word
  boundary matching, export matchCountryNamesInText(), add isMain
  guard

Tests:
- New tests/country-resolver.test.mjs with structural validation,
  parity regression for all 37 old aliases, ISO3 bidirectional
  consistency, and Taiwan/Kosovo assertions
- Updated resilience seed test for new resolver signature

Net: -190 lines, 0 hardcoded country maps remaining

* fix: normalize raw text before country name matching

Text matchers (geo-extract, seed-security-advisories, seed-correlation)
were matching normalized keys against raw text containing diacritics
and punctuation. "Curaçao", "Timor-Leste", "Hong Kong S.A.R." all
failed to resolve after country-names.json keys were normalized.

Fix: apply NFKD + diacritic stripping + punctuation normalization to
input text before matching, same transform used on the keys.

Also add "hong kong" and "sao tome" as short-form keys for bigram
headline matching in geo-extract.

* fix: remove 'u s' alias that caused US/VI misattribution

'u s' in country-names.json matched before 'u s virgin islands' in
geo-extract's bigram scanner, attributing Virgin Islands headlines
to US. Removed since 'usa', 'united states', and the uppercase US
expansion already cover the United States.
This commit is contained in:
Elie Habib
2026-04-04 15:38:02 +04:00
committed by GitHub
parent 39f1e1e309
commit 02555671f2
19 changed files with 1443 additions and 244 deletions

View File

@@ -1,53 +1,7 @@
import { readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import { loadSharedConfig } from './_seed-utils.mjs';
const __dirname = dirname(fileURLToPath(import.meta.url));
const DEFAULT_COUNTRY_NAMES = loadSharedConfig('country-names.json');
const DEFAULT_COUNTRIES_GEOJSON = JSON.parse(
readFileSync(join(__dirname, '..', 'public', 'data', 'countries.geojson'), 'utf8'),
);
export const COUNTRY_ALIAS_MAP = {
'bahamas the': 'BS',
'cape verde': 'CV',
'congo brazzaville': 'CG',
'congo kinshasa': 'CD',
'congo rep': 'CG',
'congo dem rep': 'CD',
'czech republic': 'CZ',
'egypt arab rep': 'EG',
'gambia the': 'GM',
'hong kong sar china': 'HK',
'iran islamic rep': 'IR',
'korea dem peoples rep': 'KP',
'korea rep': 'KR',
'lao pdr': 'LA',
'macao sar china': 'MO',
'micronesia fed sts': 'FM',
'morocco western sahara': 'MA',
'north macedonia': 'MK',
'occupied palestinian territory': 'PS',
'palestinian territories': 'PS',
'palestine state of': 'PS',
'russian federation': 'RU',
'slovak republic': 'SK',
'st kitts and nevis': 'KN',
'st lucia': 'LC',
'st vincent and the grenadines': 'VC',
'syrian arab republic': 'SY',
'the bahamas': 'BS',
'timor leste': 'TL',
'turkiye': 'TR',
'u s': 'US',
'united states of america': 'US',
'venezuela rb': 'VE',
'viet nam': 'VN',
'west bank and gaza': 'PS',
'yemen rep': 'YE',
};
const DEFAULT_ISO3_MAP = loadSharedConfig('iso3-to-iso2.json');
export function normalizeCountryToken(value) {
return String(value || '')
@@ -68,7 +22,7 @@ export function isIso3(value) {
return /^[A-Z]{3}$/.test(String(value || '').trim());
}
export function createCountryResolvers(countryNames = DEFAULT_COUNTRY_NAMES, geojson = DEFAULT_COUNTRIES_GEOJSON) {
export function createCountryResolvers(countryNames = DEFAULT_COUNTRY_NAMES, iso3Map = DEFAULT_ISO3_MAP) {
const nameToIso2 = new Map();
const iso3ToIso2 = new Map();
@@ -76,21 +30,8 @@ export function createCountryResolvers(countryNames = DEFAULT_COUNTRY_NAMES, geo
if (isIso2(iso2)) nameToIso2.set(normalizeCountryToken(name), iso2.toUpperCase());
}
for (const [alias, iso2] of Object.entries(COUNTRY_ALIAS_MAP)) {
if (isIso2(iso2)) nameToIso2.set(normalizeCountryToken(alias), iso2.toUpperCase());
}
for (const feature of geojson?.features || []) {
const properties = feature?.properties || {};
const iso2 = String(properties['ISO3166-1-Alpha-2'] || '').toUpperCase();
const iso3 = String(properties['ISO3166-1-Alpha-3'] || '').toUpperCase();
const name = properties.name;
if (isIso2(iso2)) {
if (typeof name === 'string' && name.trim()) {
nameToIso2.set(normalizeCountryToken(name), iso2);
}
if (isIso3(iso3)) iso3ToIso2.set(iso3, iso2);
}
for (const [iso3, iso2] of Object.entries(iso3Map)) {
if (isIso3(iso3) && isIso2(iso2)) iso3ToIso2.set(iso3, iso2.toUpperCase());
}
return { nameToIso2, iso3ToIso2 };

View File

@@ -0,0 +1,186 @@
'use strict';
const fs = require('fs');
const path = require('path');
const root = path.resolve(__dirname, '..');
const geojsonPath = path.join(root, 'public', 'data', 'countries.geojson');
const existingPath = path.join(root, 'shared', 'country-names.json');
const existing = JSON.parse(fs.readFileSync(existingPath, 'utf8'));
const result = Object.assign({}, existing);
let added = 0;
function normalize(value) {
return String(value || '')
.normalize('NFKD')
.replace(/\p{Diacritic}/gu, '')
.toLowerCase()
.replace(/&/g, ' and ')
.replace(/[''.(),/-]/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function add(key, iso2, source) {
const k = normalize(key);
if (!k || !/^[A-Z]{2}$/.test(iso2)) return;
if (result[k]) return;
result[k] = iso2;
added++;
}
// A. Geojson country names
const geojson = JSON.parse(fs.readFileSync(geojsonPath, 'utf8'));
for (const f of geojson.features) {
const props = f.properties || {};
const iso2 = String(props['ISO3166-1-Alpha-2'] || '').trim();
const name = props.name;
if (!/^[A-Z]{2}$/.test(iso2)) continue;
if (typeof name === 'string' && name.trim()) {
add(name, iso2, 'geojson');
}
}
// B. COUNTRY_ALIAS_MAP from _country-resolver.mjs (37 entries, hardcoded)
const COUNTRY_ALIAS_MAP = {
'bahamas the': 'BS',
'cape verde': 'CV',
'congo brazzaville': 'CG',
'congo kinshasa': 'CD',
'congo rep': 'CG',
'congo dem rep': 'CD',
'czech republic': 'CZ',
'egypt arab rep': 'EG',
'gambia the': 'GM',
'hong kong sar china': 'HK',
'iran islamic rep': 'IR',
'korea dem peoples rep': 'KP',
'korea rep': 'KR',
'lao pdr': 'LA',
'macao sar china': 'MO',
'micronesia fed sts': 'FM',
'morocco western sahara': 'MA',
'north macedonia': 'MK',
'occupied palestinian territory': 'PS',
'palestinian territories': 'PS',
'palestine state of': 'PS',
'russian federation': 'RU',
'slovak republic': 'SK',
'st kitts and nevis': 'KN',
'st lucia': 'LC',
'st vincent and the grenadines': 'VC',
'syrian arab republic': 'SY',
'the bahamas': 'BS',
'timor leste': 'TL',
'turkiye': 'TR',
'u s': 'US',
'united states of america': 'US',
'venezuela rb': 'VE',
'viet nam': 'VN',
'west bank and gaza': 'PS',
'yemen rep': 'YE',
};
for (const [alias, iso2] of Object.entries(COUNTRY_ALIAS_MAP)) {
add(alias, iso2, 'alias_map');
}
// C. Additional upstream API variants
const upstream = {
'egypt arab rep': 'EG',
'korea rep': 'KR',
'iran islamic rep': 'IR',
'congo dem rep': 'CD',
'congo rep': 'CG',
'venezuela rb': 'VE',
'yemen rep': 'YE',
'bahamas the': 'BS',
'gambia the': 'GM',
'hong kong sar china': 'HK',
'macao sar china': 'MO',
'micronesia fed sts': 'FM',
'lao pdr': 'LA',
'slovak republic': 'SK',
'syrian arab republic': 'SY',
'viet nam': 'VN',
'turkiye': 'TR',
'timor leste': 'TL',
'occupied palestinian territory': 'PS',
'palestine state of': 'PS',
'west bank and gaza': 'PS',
'bolivarian republic of venezuela': 'VE',
'plurinational state of bolivia': 'BO',
'united republic of tanzania': 'TZ',
'democratic peoples republic of korea': 'KP',
'republic of korea': 'KR',
'ivory coast': 'CI',
'swaziland': 'SZ',
'north macedonia': 'MK',
};
for (const [name, iso2] of Object.entries(upstream)) {
add(name, iso2, 'upstream');
}
// D. Correlation extras from seed-correlation.mjs (hardcoded)
const COUNTRY_NAME_TO_ISO2 = {
'afghanistan': 'AF', 'albania': 'AL', 'algeria': 'DZ', 'angola': 'AO',
'argentina': 'AR', 'armenia': 'AM', 'australia': 'AU', 'austria': 'AT',
'azerbaijan': 'AZ', 'bahrain': 'BH', 'bangladesh': 'BD', 'belarus': 'BY',
'belgium': 'BE', 'bolivia': 'BO', 'bosnia and herzegovina': 'BA',
'brazil': 'BR', 'bulgaria': 'BG', 'burkina faso': 'BF', 'burma': 'MM',
'cambodia': 'KH', 'cameroon': 'CM', 'canada': 'CA', 'chad': 'TD',
'chile': 'CL', 'china': 'CN', 'colombia': 'CO', 'congo': 'CG',
'costa rica': 'CR', 'croatia': 'HR', 'cuba': 'CU', 'cyprus': 'CY',
'czech republic': 'CZ', 'czechia': 'CZ',
'democratic republic of the congo': 'CD', 'dr congo': 'CD', 'drc': 'CD',
'denmark': 'DK', 'djibouti': 'DJ', 'dominican republic': 'DO',
'ecuador': 'EC', 'egypt': 'EG', 'el salvador': 'SV', 'eritrea': 'ER',
'estonia': 'EE', 'ethiopia': 'ET', 'finland': 'FI', 'france': 'FR',
'gabon': 'GA', 'georgia': 'GE', 'germany': 'DE', 'ghana': 'GH',
'greece': 'GR', 'guatemala': 'GT', 'guinea': 'GN', 'haiti': 'HT',
'honduras': 'HN', 'hungary': 'HU', 'iceland': 'IS', 'india': 'IN',
'indonesia': 'ID', 'iran': 'IR', 'iraq': 'IQ', 'ireland': 'IE',
'israel': 'IL', 'italy': 'IT', 'ivory coast': 'CI', "cote d'ivoire": 'CI',
'jamaica': 'JM', 'japan': 'JP', 'jordan': 'JO', 'kazakhstan': 'KZ',
'kenya': 'KE', 'kosovo': 'XK', 'kuwait': 'KW', 'kyrgyzstan': 'KG',
'laos': 'LA', 'latvia': 'LV', 'lebanon': 'LB', 'libya': 'LY',
'lithuania': 'LT', 'madagascar': 'MG', 'malawi': 'MW', 'malaysia': 'MY',
'mali': 'ML', 'mauritania': 'MR', 'mexico': 'MX', 'moldova': 'MD',
'mongolia': 'MN', 'montenegro': 'ME', 'morocco': 'MA', 'mozambique': 'MZ',
'myanmar': 'MM', 'namibia': 'NA', 'nepal': 'NP', 'netherlands': 'NL',
'new zealand': 'NZ', 'nicaragua': 'NI', 'niger': 'NE', 'nigeria': 'NG',
'north korea': 'KP', 'north macedonia': 'MK', 'norway': 'NO',
'oman': 'OM', 'pakistan': 'PK', 'palestine': 'PS', 'panama': 'PA',
'papua new guinea': 'PG', 'paraguay': 'PY', 'peru': 'PE',
'philippines': 'PH', 'poland': 'PL', 'portugal': 'PT', 'qatar': 'QA',
'romania': 'RO', 'russia': 'RU', 'rwanda': 'RW', 'saudi arabia': 'SA',
'senegal': 'SN', 'serbia': 'RS', 'sierra leone': 'SL', 'singapore': 'SG',
'slovakia': 'SK', 'slovenia': 'SI', 'somalia': 'SO', 'south africa': 'ZA',
'south korea': 'KR', 'south sudan': 'SS', 'spain': 'ES',
'sri lanka': 'LK', 'sudan': 'SD', 'sweden': 'SE', 'switzerland': 'CH',
'syria': 'SY', 'taiwan': 'TW', 'tajikistan': 'TJ', 'tanzania': 'TZ',
'thailand': 'TH', 'togo': 'TG', 'trinidad and tobago': 'TT',
'tunisia': 'TN', 'turkey': 'TR', 'turkmenistan': 'TM', 'uganda': 'UG',
'ukraine': 'UA', 'united arab emirates': 'AE', 'uae': 'AE',
'united kingdom': 'GB', 'uk': 'GB', 'united states': 'US', 'usa': 'US',
'uruguay': 'UY', 'uzbekistan': 'UZ', 'venezuela': 'VE', 'vietnam': 'VN',
'yemen': 'YE', 'zambia': 'ZM', 'zimbabwe': 'ZW',
'east timor': 'TL', 'cape verde': 'CV', 'swaziland': 'SZ',
'republic of the congo': 'CG',
};
for (const [name, iso2] of Object.entries(COUNTRY_NAME_TO_ISO2)) {
add(name, iso2, 'correlation');
}
// Sort keys alphabetically
const sorted = Object.fromEntries(
Object.entries(result).sort(([a], [b]) => a.localeCompare(b))
);
fs.writeFileSync(existingPath, JSON.stringify(sorted, null, 2) + '\n');
console.log(`Existing: ${Object.keys(existing).length}, Added: ${added}, Total: ${Object.keys(sorted).length}`);
// Validate all values are ISO2
for (const [k, v] of Object.entries(sorted)) {
if (!/^[A-Z]{2}$/.test(v)) console.error(`INVALID VALUE: ${k}${v}`);
if (k !== k.toLowerCase()) console.error(`NON-LOWERCASE KEY: ${k}`);
}

View File

@@ -0,0 +1,67 @@
'use strict';
const fs = require('fs');
const path = require('path');
const root = path.resolve(__dirname, '..');
const geojson = JSON.parse(fs.readFileSync(path.join(root, 'public', 'data', 'countries.geojson'), 'utf8'));
const iso3ToIso2 = {};
const discrepancies = [];
for (const f of geojson.features) {
const props = f.properties || {};
const iso2 = String(props['ISO3166-1-Alpha-2'] || '').trim();
const iso3 = String(props['ISO3166-1-Alpha-3'] || '').trim();
if (!/^[A-Z]{2}$/.test(iso2)) {
if (/^[A-Z]{3}$/.test(iso3)) {
discrepancies.push(`Skipped ${iso3} (${props.name}): invalid ISO2 "${props['ISO3166-1-Alpha-2']}"`);
}
continue;
}
if (!/^[A-Z]{3}$/.test(iso3)) {
discrepancies.push(`Skipped ${props.name} (${iso2}): invalid ISO3 "${props['ISO3166-1-Alpha-3']}"`);
continue;
}
iso3ToIso2[iso3] = iso2;
}
// Supplements for missing/invalid entries
if (!iso3ToIso2['TWN']) {
iso3ToIso2['TWN'] = 'TW';
console.log('Added supplement: TWN → TW (Taiwan has CN-TW in geojson)');
}
if (!iso3ToIso2['XKX']) {
iso3ToIso2['XKX'] = 'XK';
console.log('Added supplement: XKX → XK (Kosovo absent from geojson)');
}
// Sort by key
const sorted3to2 = Object.fromEntries(
Object.entries(iso3ToIso2).sort(([a], [b]) => a.localeCompare(b))
);
// Invert: ISO2 → ISO3
const iso2ToIso3 = {};
for (const [iso3, iso2] of Object.entries(sorted3to2)) {
if (!iso2ToIso3[iso2]) {
iso2ToIso3[iso2] = iso3;
}
}
const sorted2to3 = Object.fromEntries(
Object.entries(iso2ToIso3).sort(([a], [b]) => a.localeCompare(b))
);
// Write files
const out3to2 = path.join(root, 'shared', 'iso3-to-iso2.json');
fs.writeFileSync(out3to2, JSON.stringify(sorted3to2, null, 2) + '\n');
console.log(`Wrote ${Object.keys(sorted3to2).length} entries to ${out3to2}`);
const out2to3 = path.join(root, 'shared', 'iso2-to-iso3.json');
fs.writeFileSync(out2to3, JSON.stringify(sorted2to3, null, 2) + '\n');
console.log(`Wrote ${Object.keys(sorted2to3).length} entries to ${out2to3}`);
if (discrepancies.length) {
console.log(`\nDiscrepancies (${discrepancies.length}):`);
for (const d of discrepancies) console.log(` ${d}`);
}

View File

@@ -16,7 +16,7 @@
* - searchGdeltDocuments: per-query GDELT search
*/
import { loadEnvFile, CHROME_UA, runSeed, writeExtraKeyWithMeta, sleep } from './_seed-utils.mjs';
import { loadEnvFile, CHROME_UA, runSeed, writeExtraKeyWithMeta, sleep, loadSharedConfig } from './_seed-utils.mjs';
loadEnvFile(import.meta.url);
@@ -26,18 +26,12 @@ const HAPI_CACHE_KEY_PREFIX = 'conflict:humanitarian:v1';
const HAPI_TTL = 21600;
const PIZZINT_TTL = 600;
// Top conflict countries (ISO2) for humanitarian pre-seeding
const CONFLICT_COUNTRIES = [
'AF', 'SY', 'UA', 'SD', 'SS', 'SO', 'CD', 'MM', 'YE', 'ET',
'IQ', 'PS', 'LY', 'ML', 'BF', 'NE', 'NG', 'CM', 'MZ', 'HT',
];
const ISO2_TO_ISO3 = {
AF: 'AFG', SY: 'SYR', UA: 'UKR', SD: 'SDN', SS: 'SSD', SO: 'SOM',
CD: 'COD', MM: 'MMR', YE: 'YEM', ET: 'ETH', IQ: 'IRQ', PS: 'PSE',
LY: 'LBY', ML: 'MLI', BF: 'BFA', NE: 'NER', NG: 'NGA', CM: 'CMR',
MZ: 'MOZ', HT: 'HTI',
};
const ISO2_TO_ISO3 = loadSharedConfig('iso2-to-iso3.json');
// ─── ACLED Events ───

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env node
import { loadEnvFile, runSeed, getRedisCredentials } from './_seed-utils.mjs';
import { loadEnvFile, runSeed, getRedisCredentials, loadSharedConfig } from './_seed-utils.mjs';
import { resolveIso2, normalizeCountryToken } from './_country-resolver.mjs';
loadEnvFile(import.meta.url);
@@ -53,86 +54,8 @@ function haversineKm(lat1, lon1, lat2, lon2) {
return R * 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a));
}
// ── Country Name Resolution ─────────────────────────────────
const COUNTRY_NAME_TO_ISO2 = {
'afghanistan': 'AF', 'albania': 'AL', 'algeria': 'DZ', 'angola': 'AO',
'argentina': 'AR', 'armenia': 'AM', 'australia': 'AU', 'austria': 'AT',
'azerbaijan': 'AZ', 'bahrain': 'BH', 'bangladesh': 'BD', 'belarus': 'BY',
'belgium': 'BE', 'bolivia': 'BO', 'bosnia and herzegovina': 'BA',
'brazil': 'BR', 'bulgaria': 'BG', 'burkina faso': 'BF', 'burma': 'MM',
'cambodia': 'KH', 'cameroon': 'CM', 'canada': 'CA', 'chad': 'TD',
'chile': 'CL', 'china': 'CN', 'colombia': 'CO', 'congo': 'CG',
'costa rica': 'CR', 'croatia': 'HR', 'cuba': 'CU', 'cyprus': 'CY',
'czech republic': 'CZ', 'czechia': 'CZ',
'democratic republic of the congo': 'CD', 'dr congo': 'CD', 'drc': 'CD',
'denmark': 'DK', 'djibouti': 'DJ', 'dominican republic': 'DO',
'ecuador': 'EC', 'egypt': 'EG', 'el salvador': 'SV', 'eritrea': 'ER',
'estonia': 'EE', 'ethiopia': 'ET', 'finland': 'FI', 'france': 'FR',
'gabon': 'GA', 'georgia': 'GE', 'germany': 'DE', 'ghana': 'GH',
'greece': 'GR', 'guatemala': 'GT', 'guinea': 'GN', 'haiti': 'HT',
'honduras': 'HN', 'hungary': 'HU', 'iceland': 'IS', 'india': 'IN',
'indonesia': 'ID', 'iran': 'IR', 'iraq': 'IQ', 'ireland': 'IE',
'israel': 'IL', 'italy': 'IT', 'ivory coast': 'CI', "cote d'ivoire": 'CI',
'jamaica': 'JM', 'japan': 'JP', 'jordan': 'JO', 'kazakhstan': 'KZ',
'kenya': 'KE', 'kosovo': 'XK', 'kuwait': 'KW', 'kyrgyzstan': 'KG',
'laos': 'LA', 'latvia': 'LV', 'lebanon': 'LB', 'libya': 'LY',
'lithuania': 'LT', 'madagascar': 'MG', 'malawi': 'MW', 'malaysia': 'MY',
'mali': 'ML', 'mauritania': 'MR', 'mexico': 'MX', 'moldova': 'MD',
'mongolia': 'MN', 'montenegro': 'ME', 'morocco': 'MA', 'mozambique': 'MZ',
'myanmar': 'MM', 'namibia': 'NA', 'nepal': 'NP', 'netherlands': 'NL',
'new zealand': 'NZ', 'nicaragua': 'NI', 'niger': 'NE', 'nigeria': 'NG',
'north korea': 'KP', 'north macedonia': 'MK', 'norway': 'NO',
'oman': 'OM', 'pakistan': 'PK', 'palestine': 'PS', 'panama': 'PA',
'papua new guinea': 'PG', 'paraguay': 'PY', 'peru': 'PE',
'philippines': 'PH', 'poland': 'PL', 'portugal': 'PT', 'qatar': 'QA',
'romania': 'RO', 'russia': 'RU', 'rwanda': 'RW', 'saudi arabia': 'SA',
'senegal': 'SN', 'serbia': 'RS', 'sierra leone': 'SL', 'singapore': 'SG',
'slovakia': 'SK', 'slovenia': 'SI', 'somalia': 'SO', 'south africa': 'ZA',
'south korea': 'KR', 'south sudan': 'SS', 'spain': 'ES',
'sri lanka': 'LK', 'sudan': 'SD', 'sweden': 'SE', 'switzerland': 'CH',
'syria': 'SY', 'taiwan': 'TW', 'tajikistan': 'TJ', 'tanzania': 'TZ',
'thailand': 'TH', 'togo': 'TG', 'trinidad and tobago': 'TT',
'tunisia': 'TN', 'turkey': 'TR', 'turkmenistan': 'TM', 'uganda': 'UG',
'ukraine': 'UA', 'united arab emirates': 'AE', 'uae': 'AE',
'united kingdom': 'GB', 'uk': 'GB', 'united states': 'US', 'usa': 'US',
'uruguay': 'UY', 'uzbekistan': 'UZ', 'venezuela': 'VE', 'vietnam': 'VN',
'yemen': 'YE', 'zambia': 'ZM', 'zimbabwe': 'ZW',
'east timor': 'TL', 'cape verde': 'CV', 'swaziland': 'SZ',
'republic of the congo': 'CG',
};
const ISO3_TO_ISO2 = {
'AFG': 'AF', 'ALB': 'AL', 'DZA': 'DZ', 'AGO': 'AO', 'ARG': 'AR',
'ARM': 'AM', 'AUS': 'AU', 'AUT': 'AT', 'AZE': 'AZ', 'BHR': 'BH',
'BGD': 'BD', 'BLR': 'BY', 'BEL': 'BE', 'BOL': 'BO', 'BIH': 'BA',
'BRA': 'BR', 'BGR': 'BG', 'BFA': 'BF', 'KHM': 'KH', 'CMR': 'CM',
'CAN': 'CA', 'TCD': 'TD', 'CHL': 'CL', 'CHN': 'CN', 'COL': 'CO',
'COG': 'CG', 'CRI': 'CR', 'HRV': 'HR', 'CUB': 'CU', 'CYP': 'CY',
'CZE': 'CZ', 'COD': 'CD', 'DNK': 'DK', 'DJI': 'DJ', 'DOM': 'DO',
'ECU': 'EC', 'EGY': 'EG', 'SLV': 'SV', 'ERI': 'ER', 'EST': 'EE',
'ETH': 'ET', 'FIN': 'FI', 'FRA': 'FR', 'GAB': 'GA', 'GEO': 'GE',
'DEU': 'DE', 'GHA': 'GH', 'GRC': 'GR', 'GTM': 'GT', 'GIN': 'GN',
'HTI': 'HT', 'HND': 'HN', 'HUN': 'HU', 'ISL': 'IS', 'IND': 'IN',
'IDN': 'ID', 'IRN': 'IR', 'IRQ': 'IQ', 'IRL': 'IE', 'ISR': 'IL',
'ITA': 'IT', 'CIV': 'CI', 'JAM': 'JM', 'JPN': 'JP', 'JOR': 'JO',
'KAZ': 'KZ', 'KEN': 'KE', 'XKX': 'XK', 'KWT': 'KW', 'KGZ': 'KG',
'LAO': 'LA', 'LVA': 'LV', 'LBN': 'LB', 'LBY': 'LY', 'LTU': 'LT',
'MDG': 'MG', 'MWI': 'MW', 'MYS': 'MY', 'MLI': 'ML', 'MRT': 'MR',
'MEX': 'MX', 'MDA': 'MD', 'MNG': 'MN', 'MNE': 'ME', 'MAR': 'MA',
'MOZ': 'MZ', 'MMR': 'MM', 'NAM': 'NA', 'NPL': 'NP', 'NLD': 'NL',
'NZL': 'NZ', 'NIC': 'NI', 'NER': 'NE', 'NGA': 'NG', 'PRK': 'KP',
'MKD': 'MK', 'NOR': 'NO', 'OMN': 'OM', 'PAK': 'PK', 'PSE': 'PS',
'PAN': 'PA', 'PNG': 'PG', 'PRY': 'PY', 'PER': 'PE', 'PHL': 'PH',
'POL': 'PL', 'PRT': 'PT', 'QAT': 'QA', 'ROU': 'RO', 'RUS': 'RU',
'RWA': 'RW', 'SAU': 'SA', 'SEN': 'SN', 'SRB': 'RS', 'SLE': 'SL',
'SGP': 'SG', 'SVK': 'SK', 'SVN': 'SI', 'SOM': 'SO', 'ZAF': 'ZA',
'KOR': 'KR', 'SSD': 'SS', 'ESP': 'ES', 'LKA': 'LK', 'SDN': 'SD',
'SWE': 'SE', 'CHE': 'CH', 'SYR': 'SY', 'TWN': 'TW', 'TJK': 'TJ',
'TZA': 'TZ', 'THA': 'TH', 'TGO': 'TG', 'TTO': 'TT', 'TUN': 'TN',
'TUR': 'TR', 'TKM': 'TM', 'UGA': 'UG', 'UKR': 'UA', 'ARE': 'AE',
'GBR': 'GB', 'USA': 'US', 'URY': 'UY', 'UZB': 'UZ', 'VEN': 'VE',
'VNM': 'VN', 'YEM': 'YE', 'ZMB': 'ZM', 'ZWE': 'ZW',
};
const COUNTRY_NAME_TO_ISO2 = loadSharedConfig('country-names.json');
const ISO3_TO_ISO2 = loadSharedConfig('iso3-to-iso2.json');
const COUNTRY_CENTROIDS = {
'AF':[33.9,67.7],'AL':[41.2,20.2],'DZ':[28.0,1.7],'AO':[-11.2,17.9],'AR':[-38.4,-63.6],
@@ -181,23 +104,21 @@ function nearestCountryByCoords(lat, lon) {
function normalizeToCode(country, lat, lon) {
if (country) {
const t = country.trim();
if (t.length === 2) return t.toUpperCase();
if (t.length === 3) return ISO3_TO_ISO2[t.toUpperCase()] ?? undefined;
const fromName = COUNTRY_NAME_TO_ISO2[t.toLowerCase()];
if (fromName) return fromName;
const resolved = resolveIso2({ iso2: country, iso3: country, name: country });
if (resolved) return resolved;
}
return nearestCountryByCoords(lat, lon);
}
const COUNTRY_NAME_ENTRIES = Object.entries(COUNTRY_NAME_TO_ISO2)
.filter(([name]) => name.length >= 4)
.filter(([name]) => name.length >= 2)
.sort((a, b) => b[0].length - a[0].length)
.map(([name, code]) => ({ name, code, regex: new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i') }));
function matchCountryNamesInText(text) {
export function matchCountryNamesInText(text) {
const matched = [];
let remaining = text.toLowerCase();
let remaining = text.normalize('NFKD').replace(/\p{Diacritic}/gu, '').toLowerCase()
.replace(/['.(),/-]/g, ' ').replace(/\s+/g, ' ');
for (const { code, regex } of COUNTRY_NAME_ENTRIES) {
if (regex.test(remaining)) {
matched.push(code);
@@ -773,6 +694,7 @@ async function computeCorrelation() {
return result;
}
if (process.argv[1]?.endsWith('seed-correlation.mjs')) {
runSeed('correlation', 'cards', CANONICAL_KEY, computeCorrelation, {
ttlSeconds: CACHE_TTL,
sourceVersion: 'correlation-engine-v1',
@@ -791,3 +713,4 @@ runSeed('correlation', 'cards', CANONICAL_KEY, computeCorrelation, {
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; console.error('FATAL:', (err.message || err) + _cause);
process.exit(1);
});
}

View File

@@ -12,7 +12,6 @@ import {
withRetry,
} from './_seed-utils.mjs';
import {
COUNTRY_ALIAS_MAP,
createCountryResolvers,
isIso2,
isIso3,

View File

@@ -71,9 +71,10 @@ const BY_COUNTRY_NAME = Object.fromEntries(
function extractCountry(title, feed) {
if (feed.targetCountry) return feed.targetCountry;
if (feed.sourceCountry === 'EU' || feed.sourceCountry === 'INT') return undefined;
const lower = title.toLowerCase();
const normalized = title.normalize('NFKD').replace(/\p{Diacritic}/gu, '').toLowerCase()
.replace(/['.(),/-]/g, ' ').replace(/\s+/g, ' ');
for (const [name, code] of SORTED_COUNTRY_ENTRIES) {
if (lower.includes(name)) return code;
if (normalized.includes(name)) return code;
}
return undefined;
}

View File

@@ -16,6 +16,7 @@
"austria": "AT",
"azerbaijan": "AZ",
"bahamas": "BS",
"bahamas the": "BS",
"bahrain": "BH",
"bangladesh": "BD",
"barbados": "BB",
@@ -25,6 +26,7 @@
"benin": "BJ",
"bermuda": "BM",
"bhutan": "BT",
"bolivarian republic of venezuela": "VE",
"bolivia": "BO",
"bosnia and herzegovina": "BA",
"botswana": "BW",
@@ -49,25 +51,32 @@
"china": "CN",
"colombia": "CO",
"comoros": "KM",
"congo": "CG",
"congo brazzaville": "CG",
"congo dem rep": "CD",
"congo kinshasa": "CD",
"congo rep": "CG",
"cook islands": "CK",
"costa rica": "CR",
"cote d ivoire": "CI",
"cote d'ivoire": "CI",
"croatia": "HR",
"cuba": "CU",
"curaçao": "CW",
"curacao": "CW",
"cyprus": "CY",
"czech republic": "CZ",
"czechia": "CZ",
"democratic peoples republic of korea": "KP",
"democratic republic of the congo": "CD",
"denmark": "DK",
"djibouti": "DJ",
"dominica": "DM",
"dominican republic": "DO",
"dr congo": "CD",
"drc": "CD",
"east timor": "TL",
"ecuador": "EC",
"egypt": "EG",
"egypt arab rep": "EG",
"el salvador": "SV",
"equatorial guinea": "GQ",
"eritrea": "ER",
@@ -84,6 +93,7 @@
"french southern and antarctic lands": "TF",
"gabon": "GA",
"gambia": "GM",
"gambia the": "GM",
"gaza": "PS",
"georgia": "GE",
"germany": "DE",
@@ -96,17 +106,20 @@
"guatemala": "GT",
"guernsey": "GG",
"guinea": "GN",
"guinea-bissau": "GW",
"guinea bissau": "GW",
"guyana": "GY",
"haiti": "HT",
"heard island and mcdonald islands": "HM",
"honduras": "HN",
"hong kong s.a.r.": "HK",
"hong kong": "HK",
"hong kong s a r": "HK",
"hong kong sar china": "HK",
"hungary": "HU",
"iceland": "IS",
"india": "IN",
"indonesia": "ID",
"iran": "IR",
"iran islamic rep": "IR",
"iraq": "IQ",
"ireland": "IE",
"isle of man": "IM",
@@ -120,6 +133,8 @@
"kazakhstan": "KZ",
"kenya": "KE",
"kiribati": "KI",
"korea dem peoples rep": "KP",
"korea rep": "KR",
"kosovo": "XK",
"kuwait": "KW",
"kyrgyz republic": "KG",
@@ -134,7 +149,8 @@
"liechtenstein": "LI",
"lithuania": "LT",
"luxembourg": "LU",
"macao s.a.r": "MO",
"macao s a r": "MO",
"macao sar china": "MO",
"madagascar": "MG",
"malawi": "MW",
"malaysia": "MY",
@@ -146,12 +162,14 @@
"mauritius": "MU",
"mexico": "MX",
"micronesia": "FM",
"micronesia fed sts": "FM",
"moldova": "MD",
"monaco": "MC",
"mongolia": "MN",
"montenegro": "ME",
"montserrat": "MS",
"morocco": "MA",
"morocco western sahara": "MA",
"mozambique": "MZ",
"myanmar": "MM",
"namibia": "NA",
@@ -169,24 +187,30 @@
"north macedonia": "MK",
"northern mariana islands": "MP",
"norway": "NO",
"occupied palestinian territory": "PS",
"oman": "OM",
"pakistan": "PK",
"palau": "PW",
"palestine": "PS",
"palestine state of": "PS",
"palestinian territories": "PS",
"panama": "PA",
"papua new guinea": "PG",
"paraguay": "PY",
"peru": "PE",
"philippines": "PH",
"pitcairn islands": "PN",
"plurinational state of bolivia": "BO",
"poland": "PL",
"portugal": "PT",
"puerto rico": "PR",
"qatar": "QA",
"republic of korea": "KR",
"republic of serbia": "RS",
"republic of the congo": "CG",
"romania": "RO",
"russia": "RU",
"russian federation": "RU",
"rwanda": "RW",
"saint barthelemy": "BL",
"saint helena": "SH",
@@ -197,13 +221,16 @@
"saint vincent and the grenadines": "VC",
"samoa": "WS",
"san marino": "SM",
"são tomé and principe": "ST",
"sao tome": "ST",
"sao tome and principe": "ST",
"saudi arabia": "SA",
"senegal": "SN",
"serbia": "RS",
"seychelles": "SC",
"sierra leone": "SL",
"singapore": "SG",
"sint maarten": "SX",
"slovak republic": "SK",
"slovakia": "SK",
"slovenia": "SI",
"solomon islands": "SB",
@@ -214,13 +241,19 @@
"south sudan": "SS",
"spain": "ES",
"sri lanka": "LK",
"st kitts and nevis": "KN",
"st lucia": "LC",
"st vincent and the grenadines": "VC",
"sudan": "SD",
"suriname": "SR",
"swaziland": "SZ",
"sweden": "SE",
"switzerland": "CH",
"syria": "SY",
"syrian arab republic": "SY",
"taiwan": "TW",
"tajikistan": "TJ",
"tanzania": "TZ",
"thailand": "TH",
"the bahamas": "BS",
"the comoros": "KM",
@@ -229,17 +262,18 @@
"the netherlands": "NL",
"the philippines": "PH",
"the seychelles": "SC",
"timor-leste": "TL",
"timor leste": "TL",
"togo": "TG",
"tonga": "TO",
"trinidad and tobago": "TT",
"tunisia": "TN",
"turkey": "TR",
"turkiye": "TR",
"turkmenistan": "TM",
"turks and caicos": "TC",
"turks and caicos islands": "TC",
"tuvalu": "TV",
"u.s. virgin islands": "VI",
"u s virgin islands": "VI",
"uae": "AE",
"uganda": "UG",
"uk": "GB",
@@ -257,11 +291,15 @@
"vanuatu": "VU",
"vatican": "VA",
"venezuela": "VE",
"venezuela rb": "VE",
"viet nam": "VN",
"vietnam": "VN",
"wallis and futuna": "WF",
"west bank": "PS",
"west bank and gaza": "PS",
"western sahara": "EH",
"yemen": "YE",
"yemen rep": "YE",
"zambia": "ZM",
"zimbabwe": "ZW"
}

View File

@@ -102,14 +102,11 @@ export function extractCountryCode(text) {
// Normalize uppercase `US` (country abbreviation) to `united states` before lowercasing,
// so it survives the stopword pass. Lowercase `us` (pronoun) has no equivalent expansion
// and is stopped by UNIGRAM_STOPWORDS. `\b` avoids matching inside words like "plus".
const normalized = text.replace(/\bUS\b/g, 'United States');
const lower = normalized.toLowerCase();
const normalized = text.replace(/\bUS\b/g, 'United States')
.normalize('NFKD').replace(/\p{Diacritic}/gu, '').toLowerCase()
.replace(/['.(),/-]/g, ' ');
// Single left-to-right scan with local longest-match priority:
// at each position try bigram first (strips punctuation so "West Bank," works),
// then fall back to unigram. This preserves document order so the first
// country mentioned in the headline wins regardless of alias length.
const words = lower.split(/\s+/);
const words = normalized.split(/\s+/).filter(Boolean);
for (let i = 0; i < words.length; i++) {
if (i < words.length - 1) {
const left = words[i].replace(/[^a-z]/g, '');

View File

@@ -0,0 +1,241 @@
{
"AD": "AND",
"AE": "ARE",
"AF": "AFG",
"AG": "ATG",
"AI": "AIA",
"AL": "ALB",
"AM": "ARM",
"AO": "AGO",
"AQ": "ATA",
"AR": "ARG",
"AS": "ASM",
"AT": "AUT",
"AU": "AUS",
"AW": "ABW",
"AX": "ALA",
"AZ": "AZE",
"BA": "BIH",
"BB": "BRB",
"BD": "BGD",
"BE": "BEL",
"BF": "BFA",
"BG": "BGR",
"BH": "BHR",
"BI": "BDI",
"BJ": "BEN",
"BL": "BLM",
"BM": "BMU",
"BN": "BRN",
"BO": "BOL",
"BR": "BRA",
"BS": "BHS",
"BT": "BTN",
"BW": "BWA",
"BY": "BLR",
"BZ": "BLZ",
"CA": "CAN",
"CD": "COD",
"CF": "CAF",
"CG": "COG",
"CH": "CHE",
"CI": "CIV",
"CK": "COK",
"CL": "CHL",
"CM": "CMR",
"CN": "CHN",
"CO": "COL",
"CR": "CRI",
"CU": "CUB",
"CV": "CPV",
"CW": "CUW",
"CY": "CYP",
"CZ": "CZE",
"DE": "DEU",
"DJ": "DJI",
"DK": "DNK",
"DM": "DMA",
"DO": "DOM",
"DZ": "DZA",
"EC": "ECU",
"EE": "EST",
"EG": "EGY",
"EH": "ESH",
"ER": "ERI",
"ES": "ESP",
"ET": "ETH",
"FI": "FIN",
"FJ": "FJI",
"FK": "FLK",
"FM": "FSM",
"FO": "FRO",
"FR": "FRA",
"GA": "GAB",
"GB": "GBR",
"GD": "GRD",
"GE": "GEO",
"GG": "GGY",
"GH": "GHA",
"GI": "GIB",
"GL": "GRL",
"GM": "GMB",
"GN": "GIN",
"GQ": "GNQ",
"GR": "GRC",
"GS": "SGS",
"GT": "GTM",
"GU": "GUM",
"GW": "GNB",
"GY": "GUY",
"HK": "HKG",
"HM": "HMD",
"HN": "HND",
"HR": "HRV",
"HT": "HTI",
"HU": "HUN",
"ID": "IDN",
"IE": "IRL",
"IL": "ISR",
"IM": "IMN",
"IN": "IND",
"IO": "IOT",
"IQ": "IRQ",
"IR": "IRN",
"IS": "ISL",
"IT": "ITA",
"JE": "JEY",
"JM": "JAM",
"JO": "JOR",
"JP": "JPN",
"KE": "KEN",
"KG": "KGZ",
"KH": "KHM",
"KI": "KIR",
"KM": "COM",
"KN": "KNA",
"KP": "PRK",
"KR": "KOR",
"KW": "KWT",
"KY": "CYM",
"KZ": "KAZ",
"LA": "LAO",
"LB": "LBN",
"LC": "LCA",
"LI": "LIE",
"LK": "LKA",
"LR": "LBR",
"LS": "LSO",
"LT": "LTU",
"LU": "LUX",
"LV": "LVA",
"LY": "LBY",
"MA": "MAR",
"MC": "MCO",
"MD": "MDA",
"ME": "MNE",
"MF": "MAF",
"MG": "MDG",
"MH": "MHL",
"MK": "MKD",
"ML": "MLI",
"MM": "MMR",
"MN": "MNG",
"MO": "MAC",
"MP": "MNP",
"MR": "MRT",
"MS": "MSR",
"MT": "MLT",
"MU": "MUS",
"MV": "MDV",
"MW": "MWI",
"MX": "MEX",
"MY": "MYS",
"MZ": "MOZ",
"NA": "NAM",
"NC": "NCL",
"NE": "NER",
"NF": "NFK",
"NG": "NGA",
"NI": "NIC",
"NL": "NLD",
"NO": "NOR",
"NP": "NPL",
"NR": "NRU",
"NU": "NIU",
"NZ": "NZL",
"OM": "OMN",
"PA": "PAN",
"PE": "PER",
"PF": "PYF",
"PG": "PNG",
"PH": "PHL",
"PK": "PAK",
"PL": "POL",
"PM": "SPM",
"PN": "PCN",
"PR": "PRI",
"PS": "PSE",
"PT": "PRT",
"PW": "PLW",
"PY": "PRY",
"QA": "QAT",
"RO": "ROU",
"RS": "SRB",
"RU": "RUS",
"RW": "RWA",
"SA": "SAU",
"SB": "SLB",
"SC": "SYC",
"SD": "SDN",
"SE": "SWE",
"SG": "SGP",
"SH": "SHN",
"SI": "SVN",
"SK": "SVK",
"SL": "SLE",
"SM": "SMR",
"SN": "SEN",
"SO": "SOM",
"SR": "SUR",
"SS": "SSD",
"ST": "STP",
"SV": "SLV",
"SX": "SXM",
"SY": "SYR",
"SZ": "SWZ",
"TC": "TCA",
"TD": "TCD",
"TF": "ATF",
"TG": "TGO",
"TH": "THA",
"TJ": "TJK",
"TL": "TLS",
"TM": "TKM",
"TN": "TUN",
"TO": "TON",
"TR": "TUR",
"TT": "TTO",
"TV": "TUV",
"TW": "TWN",
"TZ": "TZA",
"UA": "UKR",
"UG": "UGA",
"UM": "UMI",
"US": "USA",
"UY": "URY",
"UZ": "UZB",
"VA": "VAT",
"VC": "VCT",
"VE": "VEN",
"VG": "VGB",
"VI": "VIR",
"VN": "VNM",
"VU": "VUT",
"WF": "WLF",
"WS": "WSM",
"XK": "XKX",
"YE": "YEM",
"ZA": "ZAF",
"ZM": "ZMB",
"ZW": "ZWE"
}

View File

@@ -0,0 +1,241 @@
{
"ABW": "AW",
"AFG": "AF",
"AGO": "AO",
"AIA": "AI",
"ALA": "AX",
"ALB": "AL",
"AND": "AD",
"ARE": "AE",
"ARG": "AR",
"ARM": "AM",
"ASM": "AS",
"ATA": "AQ",
"ATF": "TF",
"ATG": "AG",
"AUS": "AU",
"AUT": "AT",
"AZE": "AZ",
"BDI": "BI",
"BEL": "BE",
"BEN": "BJ",
"BFA": "BF",
"BGD": "BD",
"BGR": "BG",
"BHR": "BH",
"BHS": "BS",
"BIH": "BA",
"BLM": "BL",
"BLR": "BY",
"BLZ": "BZ",
"BMU": "BM",
"BOL": "BO",
"BRA": "BR",
"BRB": "BB",
"BRN": "BN",
"BTN": "BT",
"BWA": "BW",
"CAF": "CF",
"CAN": "CA",
"CHE": "CH",
"CHL": "CL",
"CHN": "CN",
"CIV": "CI",
"CMR": "CM",
"COD": "CD",
"COG": "CG",
"COK": "CK",
"COL": "CO",
"COM": "KM",
"CPV": "CV",
"CRI": "CR",
"CUB": "CU",
"CUW": "CW",
"CYM": "KY",
"CYP": "CY",
"CZE": "CZ",
"DEU": "DE",
"DJI": "DJ",
"DMA": "DM",
"DNK": "DK",
"DOM": "DO",
"DZA": "DZ",
"ECU": "EC",
"EGY": "EG",
"ERI": "ER",
"ESH": "EH",
"ESP": "ES",
"EST": "EE",
"ETH": "ET",
"FIN": "FI",
"FJI": "FJ",
"FLK": "FK",
"FRA": "FR",
"FRO": "FO",
"FSM": "FM",
"GAB": "GA",
"GBR": "GB",
"GEO": "GE",
"GGY": "GG",
"GHA": "GH",
"GIB": "GI",
"GIN": "GN",
"GMB": "GM",
"GNB": "GW",
"GNQ": "GQ",
"GRC": "GR",
"GRD": "GD",
"GRL": "GL",
"GTM": "GT",
"GUM": "GU",
"GUY": "GY",
"HKG": "HK",
"HMD": "HM",
"HND": "HN",
"HRV": "HR",
"HTI": "HT",
"HUN": "HU",
"IDN": "ID",
"IMN": "IM",
"IND": "IN",
"IOT": "IO",
"IRL": "IE",
"IRN": "IR",
"IRQ": "IQ",
"ISL": "IS",
"ISR": "IL",
"ITA": "IT",
"JAM": "JM",
"JEY": "JE",
"JOR": "JO",
"JPN": "JP",
"KAZ": "KZ",
"KEN": "KE",
"KGZ": "KG",
"KHM": "KH",
"KIR": "KI",
"KNA": "KN",
"KOR": "KR",
"KWT": "KW",
"LAO": "LA",
"LBN": "LB",
"LBR": "LR",
"LBY": "LY",
"LCA": "LC",
"LIE": "LI",
"LKA": "LK",
"LSO": "LS",
"LTU": "LT",
"LUX": "LU",
"LVA": "LV",
"MAC": "MO",
"MAF": "MF",
"MAR": "MA",
"MCO": "MC",
"MDA": "MD",
"MDG": "MG",
"MDV": "MV",
"MEX": "MX",
"MHL": "MH",
"MKD": "MK",
"MLI": "ML",
"MLT": "MT",
"MMR": "MM",
"MNE": "ME",
"MNG": "MN",
"MNP": "MP",
"MOZ": "MZ",
"MRT": "MR",
"MSR": "MS",
"MUS": "MU",
"MWI": "MW",
"MYS": "MY",
"NAM": "NA",
"NCL": "NC",
"NER": "NE",
"NFK": "NF",
"NGA": "NG",
"NIC": "NI",
"NIU": "NU",
"NLD": "NL",
"NOR": "NO",
"NPL": "NP",
"NRU": "NR",
"NZL": "NZ",
"OMN": "OM",
"PAK": "PK",
"PAN": "PA",
"PCN": "PN",
"PER": "PE",
"PHL": "PH",
"PLW": "PW",
"PNG": "PG",
"POL": "PL",
"PRI": "PR",
"PRK": "KP",
"PRT": "PT",
"PRY": "PY",
"PSE": "PS",
"PYF": "PF",
"QAT": "QA",
"ROU": "RO",
"RUS": "RU",
"RWA": "RW",
"SAU": "SA",
"SDN": "SD",
"SEN": "SN",
"SGP": "SG",
"SGS": "GS",
"SHN": "SH",
"SLB": "SB",
"SLE": "SL",
"SLV": "SV",
"SMR": "SM",
"SOM": "SO",
"SPM": "PM",
"SRB": "RS",
"SSD": "SS",
"STP": "ST",
"SUR": "SR",
"SVK": "SK",
"SVN": "SI",
"SWE": "SE",
"SWZ": "SZ",
"SXM": "SX",
"SYC": "SC",
"SYR": "SY",
"TCA": "TC",
"TCD": "TD",
"TGO": "TG",
"THA": "TH",
"TJK": "TJ",
"TKM": "TM",
"TLS": "TL",
"TON": "TO",
"TTO": "TT",
"TUN": "TN",
"TUR": "TR",
"TUV": "TV",
"TWN": "TW",
"TZA": "TZ",
"UGA": "UG",
"UKR": "UA",
"UMI": "UM",
"URY": "UY",
"USA": "US",
"UZB": "UZ",
"VAT": "VA",
"VCT": "VC",
"VEN": "VE",
"VGB": "VG",
"VIR": "VI",
"VNM": "VN",
"VUT": "VU",
"WLF": "WF",
"WSM": "WS",
"XKX": "XK",
"YEM": "YE",
"ZAF": "ZA",
"ZMB": "ZM",
"ZWE": "ZW"
}

View File

@@ -1,11 +1,5 @@
import iso2ToIso3Json from '../../../../shared/iso2-to-iso3.json';
export const UPSTREAM_TIMEOUT_MS = 15_000;
export const ISO2_TO_ISO3: Record<string, string> = {
US: 'USA', RU: 'RUS', CN: 'CHN', UA: 'UKR', IR: 'IRN',
IL: 'ISR', TW: 'TWN', KP: 'PRK', SA: 'SAU', TR: 'TUR',
PL: 'POL', DE: 'DEU', FR: 'FRA', GB: 'GBR', IN: 'IND',
PK: 'PAK', SY: 'SYR', YE: 'YEM', MM: 'MMR', VE: 'VEN',
AF: 'AFG', SD: 'SDN', SS: 'SSD', SO: 'SOM', CD: 'COD',
ET: 'ETH', IQ: 'IRQ', CO: 'COL', NG: 'NGA', PS: 'PSE',
BR: 'BRA', AE: 'ARE',
};
export const ISO2_TO_ISO3: Record<string, string> = iso2ToIso3Json;

View File

@@ -8,6 +8,7 @@ import type {
SeverityLevel,
} from '../../../../src/generated/server/worldmonitor/intelligence/v1/service_server';
import iso3ToIso2Json from '../../../../shared/iso3-to-iso2.json';
import { getCachedJson, setCachedJson, cachedFetchJsonWithMeta } from '../../../_shared/redis';
import { CLIMATE_ANOMALIES_KEY } from '../../../_shared/cache-keys';
import { TIER1_COUNTRIES } from './_shared';
@@ -141,15 +142,7 @@ function safeNum(v: unknown): number {
return Number.isFinite(n) ? n : 0;
}
// ISO3 → ISO2 mapping for displacement data (UNHCR uses ISO3)
const ISO3_TO_ISO2: Record<string, string> = {
USA: 'US', RUS: 'RU', CHN: 'CN', UKR: 'UA', IRN: 'IR', ISR: 'IL',
TWN: 'TW', PRK: 'KP', SAU: 'SA', TUR: 'TR', POL: 'PL', DEU: 'DE',
FRA: 'FR', GBR: 'GB', IND: 'IN', PAK: 'PK', SYR: 'SY', YEM: 'YE',
MMR: 'MM', VEN: 'VE', CUB: 'CU', MEX: 'MX', BRA: 'BR', ARE: 'AE',
KOR: 'KR', IRQ: 'IQ', AFG: 'AF', LBN: 'LB', EGY: 'EG', JPN: 'JP',
QAT: 'QA',
};
const ISO3_TO_ISO2: Record<string, string> = iso3ToIso2Json;
interface CountrySignals {
protests: number;

View File

@@ -1,8 +1,5 @@
import { readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
import countryNames from '../../../../shared/country-names.json';
import iso2ToIso3Json from '../../../../shared/iso2-to-iso3.json';
import { normalizeCountryToken } from '../../../_shared/country-token';
import { getCachedJson } from '../../../_shared/redis';
@@ -164,16 +161,7 @@ for (const [name, iso2] of Object.entries(countryNames as Record<string, string>
COUNTRY_NAME_ALIASES.set(code, current);
}
const ISO2_TO_ISO3: Record<string, string> = {};
{
const __dirname = dirname(fileURLToPath(import.meta.url));
const geojson = JSON.parse(readFileSync(join(__dirname, '../../../../public/data/countries.geojson'), 'utf8'));
for (const feature of geojson?.features ?? []) {
const iso2 = String(feature?.properties?.['ISO3166-1-Alpha-2'] ?? '').toUpperCase();
const iso3 = String(feature?.properties?.['ISO3166-1-Alpha-3'] ?? '').toUpperCase();
if (/^[A-Z]{2}$/.test(iso2) && /^[A-Z]{3}$/.test(iso3)) ISO2_TO_ISO3[iso2] = iso3;
}
}
const ISO2_TO_ISO3: Record<string, string> = iso2ToIso3Json;
const RESILIENCE_DOMAIN_WEIGHTS: Record<ResilienceDomainId, number> = {
economic: 0.22,

View File

@@ -16,6 +16,7 @@
"austria": "AT",
"azerbaijan": "AZ",
"bahamas": "BS",
"bahamas the": "BS",
"bahrain": "BH",
"bangladesh": "BD",
"barbados": "BB",
@@ -25,6 +26,7 @@
"benin": "BJ",
"bermuda": "BM",
"bhutan": "BT",
"bolivarian republic of venezuela": "VE",
"bolivia": "BO",
"bosnia and herzegovina": "BA",
"botswana": "BW",
@@ -49,25 +51,32 @@
"china": "CN",
"colombia": "CO",
"comoros": "KM",
"congo": "CG",
"congo brazzaville": "CG",
"congo dem rep": "CD",
"congo kinshasa": "CD",
"congo rep": "CG",
"cook islands": "CK",
"costa rica": "CR",
"cote d ivoire": "CI",
"cote d'ivoire": "CI",
"croatia": "HR",
"cuba": "CU",
"curaçao": "CW",
"curacao": "CW",
"cyprus": "CY",
"czech republic": "CZ",
"czechia": "CZ",
"democratic peoples republic of korea": "KP",
"democratic republic of the congo": "CD",
"denmark": "DK",
"djibouti": "DJ",
"dominica": "DM",
"dominican republic": "DO",
"dr congo": "CD",
"drc": "CD",
"east timor": "TL",
"ecuador": "EC",
"egypt": "EG",
"egypt arab rep": "EG",
"el salvador": "SV",
"equatorial guinea": "GQ",
"eritrea": "ER",
@@ -84,6 +93,7 @@
"french southern and antarctic lands": "TF",
"gabon": "GA",
"gambia": "GM",
"gambia the": "GM",
"gaza": "PS",
"georgia": "GE",
"germany": "DE",
@@ -96,17 +106,20 @@
"guatemala": "GT",
"guernsey": "GG",
"guinea": "GN",
"guinea-bissau": "GW",
"guinea bissau": "GW",
"guyana": "GY",
"haiti": "HT",
"heard island and mcdonald islands": "HM",
"honduras": "HN",
"hong kong s.a.r.": "HK",
"hong kong": "HK",
"hong kong s a r": "HK",
"hong kong sar china": "HK",
"hungary": "HU",
"iceland": "IS",
"india": "IN",
"indonesia": "ID",
"iran": "IR",
"iran islamic rep": "IR",
"iraq": "IQ",
"ireland": "IE",
"isle of man": "IM",
@@ -120,6 +133,8 @@
"kazakhstan": "KZ",
"kenya": "KE",
"kiribati": "KI",
"korea dem peoples rep": "KP",
"korea rep": "KR",
"kosovo": "XK",
"kuwait": "KW",
"kyrgyz republic": "KG",
@@ -134,7 +149,8 @@
"liechtenstein": "LI",
"lithuania": "LT",
"luxembourg": "LU",
"macao s.a.r": "MO",
"macao s a r": "MO",
"macao sar china": "MO",
"madagascar": "MG",
"malawi": "MW",
"malaysia": "MY",
@@ -146,12 +162,14 @@
"mauritius": "MU",
"mexico": "MX",
"micronesia": "FM",
"micronesia fed sts": "FM",
"moldova": "MD",
"monaco": "MC",
"mongolia": "MN",
"montenegro": "ME",
"montserrat": "MS",
"morocco": "MA",
"morocco western sahara": "MA",
"mozambique": "MZ",
"myanmar": "MM",
"namibia": "NA",
@@ -169,24 +187,30 @@
"north macedonia": "MK",
"northern mariana islands": "MP",
"norway": "NO",
"occupied palestinian territory": "PS",
"oman": "OM",
"pakistan": "PK",
"palau": "PW",
"palestine": "PS",
"palestine state of": "PS",
"palestinian territories": "PS",
"panama": "PA",
"papua new guinea": "PG",
"paraguay": "PY",
"peru": "PE",
"philippines": "PH",
"pitcairn islands": "PN",
"plurinational state of bolivia": "BO",
"poland": "PL",
"portugal": "PT",
"puerto rico": "PR",
"qatar": "QA",
"republic of korea": "KR",
"republic of serbia": "RS",
"republic of the congo": "CG",
"romania": "RO",
"russia": "RU",
"russian federation": "RU",
"rwanda": "RW",
"saint barthelemy": "BL",
"saint helena": "SH",
@@ -197,13 +221,16 @@
"saint vincent and the grenadines": "VC",
"samoa": "WS",
"san marino": "SM",
"são tomé and principe": "ST",
"sao tome": "ST",
"sao tome and principe": "ST",
"saudi arabia": "SA",
"senegal": "SN",
"serbia": "RS",
"seychelles": "SC",
"sierra leone": "SL",
"singapore": "SG",
"sint maarten": "SX",
"slovak republic": "SK",
"slovakia": "SK",
"slovenia": "SI",
"solomon islands": "SB",
@@ -214,13 +241,19 @@
"south sudan": "SS",
"spain": "ES",
"sri lanka": "LK",
"st kitts and nevis": "KN",
"st lucia": "LC",
"st vincent and the grenadines": "VC",
"sudan": "SD",
"suriname": "SR",
"swaziland": "SZ",
"sweden": "SE",
"switzerland": "CH",
"syria": "SY",
"syrian arab republic": "SY",
"taiwan": "TW",
"tajikistan": "TJ",
"tanzania": "TZ",
"thailand": "TH",
"the bahamas": "BS",
"the comoros": "KM",
@@ -229,17 +262,18 @@
"the netherlands": "NL",
"the philippines": "PH",
"the seychelles": "SC",
"timor-leste": "TL",
"timor leste": "TL",
"togo": "TG",
"tonga": "TO",
"trinidad and tobago": "TT",
"tunisia": "TN",
"turkey": "TR",
"turkiye": "TR",
"turkmenistan": "TM",
"turks and caicos": "TC",
"turks and caicos islands": "TC",
"tuvalu": "TV",
"u.s. virgin islands": "VI",
"u s virgin islands": "VI",
"uae": "AE",
"uganda": "UG",
"uk": "GB",
@@ -257,11 +291,15 @@
"vanuatu": "VU",
"vatican": "VA",
"venezuela": "VE",
"venezuela rb": "VE",
"viet nam": "VN",
"vietnam": "VN",
"wallis and futuna": "WF",
"west bank": "PS",
"west bank and gaza": "PS",
"western sahara": "EH",
"yemen": "YE",
"yemen rep": "YE",
"zambia": "ZM",
"zimbabwe": "ZW"
}

241
shared/iso2-to-iso3.json Normal file
View File

@@ -0,0 +1,241 @@
{
"AD": "AND",
"AE": "ARE",
"AF": "AFG",
"AG": "ATG",
"AI": "AIA",
"AL": "ALB",
"AM": "ARM",
"AO": "AGO",
"AQ": "ATA",
"AR": "ARG",
"AS": "ASM",
"AT": "AUT",
"AU": "AUS",
"AW": "ABW",
"AX": "ALA",
"AZ": "AZE",
"BA": "BIH",
"BB": "BRB",
"BD": "BGD",
"BE": "BEL",
"BF": "BFA",
"BG": "BGR",
"BH": "BHR",
"BI": "BDI",
"BJ": "BEN",
"BL": "BLM",
"BM": "BMU",
"BN": "BRN",
"BO": "BOL",
"BR": "BRA",
"BS": "BHS",
"BT": "BTN",
"BW": "BWA",
"BY": "BLR",
"BZ": "BLZ",
"CA": "CAN",
"CD": "COD",
"CF": "CAF",
"CG": "COG",
"CH": "CHE",
"CI": "CIV",
"CK": "COK",
"CL": "CHL",
"CM": "CMR",
"CN": "CHN",
"CO": "COL",
"CR": "CRI",
"CU": "CUB",
"CV": "CPV",
"CW": "CUW",
"CY": "CYP",
"CZ": "CZE",
"DE": "DEU",
"DJ": "DJI",
"DK": "DNK",
"DM": "DMA",
"DO": "DOM",
"DZ": "DZA",
"EC": "ECU",
"EE": "EST",
"EG": "EGY",
"EH": "ESH",
"ER": "ERI",
"ES": "ESP",
"ET": "ETH",
"FI": "FIN",
"FJ": "FJI",
"FK": "FLK",
"FM": "FSM",
"FO": "FRO",
"FR": "FRA",
"GA": "GAB",
"GB": "GBR",
"GD": "GRD",
"GE": "GEO",
"GG": "GGY",
"GH": "GHA",
"GI": "GIB",
"GL": "GRL",
"GM": "GMB",
"GN": "GIN",
"GQ": "GNQ",
"GR": "GRC",
"GS": "SGS",
"GT": "GTM",
"GU": "GUM",
"GW": "GNB",
"GY": "GUY",
"HK": "HKG",
"HM": "HMD",
"HN": "HND",
"HR": "HRV",
"HT": "HTI",
"HU": "HUN",
"ID": "IDN",
"IE": "IRL",
"IL": "ISR",
"IM": "IMN",
"IN": "IND",
"IO": "IOT",
"IQ": "IRQ",
"IR": "IRN",
"IS": "ISL",
"IT": "ITA",
"JE": "JEY",
"JM": "JAM",
"JO": "JOR",
"JP": "JPN",
"KE": "KEN",
"KG": "KGZ",
"KH": "KHM",
"KI": "KIR",
"KM": "COM",
"KN": "KNA",
"KP": "PRK",
"KR": "KOR",
"KW": "KWT",
"KY": "CYM",
"KZ": "KAZ",
"LA": "LAO",
"LB": "LBN",
"LC": "LCA",
"LI": "LIE",
"LK": "LKA",
"LR": "LBR",
"LS": "LSO",
"LT": "LTU",
"LU": "LUX",
"LV": "LVA",
"LY": "LBY",
"MA": "MAR",
"MC": "MCO",
"MD": "MDA",
"ME": "MNE",
"MF": "MAF",
"MG": "MDG",
"MH": "MHL",
"MK": "MKD",
"ML": "MLI",
"MM": "MMR",
"MN": "MNG",
"MO": "MAC",
"MP": "MNP",
"MR": "MRT",
"MS": "MSR",
"MT": "MLT",
"MU": "MUS",
"MV": "MDV",
"MW": "MWI",
"MX": "MEX",
"MY": "MYS",
"MZ": "MOZ",
"NA": "NAM",
"NC": "NCL",
"NE": "NER",
"NF": "NFK",
"NG": "NGA",
"NI": "NIC",
"NL": "NLD",
"NO": "NOR",
"NP": "NPL",
"NR": "NRU",
"NU": "NIU",
"NZ": "NZL",
"OM": "OMN",
"PA": "PAN",
"PE": "PER",
"PF": "PYF",
"PG": "PNG",
"PH": "PHL",
"PK": "PAK",
"PL": "POL",
"PM": "SPM",
"PN": "PCN",
"PR": "PRI",
"PS": "PSE",
"PT": "PRT",
"PW": "PLW",
"PY": "PRY",
"QA": "QAT",
"RO": "ROU",
"RS": "SRB",
"RU": "RUS",
"RW": "RWA",
"SA": "SAU",
"SB": "SLB",
"SC": "SYC",
"SD": "SDN",
"SE": "SWE",
"SG": "SGP",
"SH": "SHN",
"SI": "SVN",
"SK": "SVK",
"SL": "SLE",
"SM": "SMR",
"SN": "SEN",
"SO": "SOM",
"SR": "SUR",
"SS": "SSD",
"ST": "STP",
"SV": "SLV",
"SX": "SXM",
"SY": "SYR",
"SZ": "SWZ",
"TC": "TCA",
"TD": "TCD",
"TF": "ATF",
"TG": "TGO",
"TH": "THA",
"TJ": "TJK",
"TL": "TLS",
"TM": "TKM",
"TN": "TUN",
"TO": "TON",
"TR": "TUR",
"TT": "TTO",
"TV": "TUV",
"TW": "TWN",
"TZ": "TZA",
"UA": "UKR",
"UG": "UGA",
"UM": "UMI",
"US": "USA",
"UY": "URY",
"UZ": "UZB",
"VA": "VAT",
"VC": "VCT",
"VE": "VEN",
"VG": "VGB",
"VI": "VIR",
"VN": "VNM",
"VU": "VUT",
"WF": "WLF",
"WS": "WSM",
"XK": "XKX",
"YE": "YEM",
"ZA": "ZAF",
"ZM": "ZMB",
"ZW": "ZWE"
}

241
shared/iso3-to-iso2.json Normal file
View File

@@ -0,0 +1,241 @@
{
"ABW": "AW",
"AFG": "AF",
"AGO": "AO",
"AIA": "AI",
"ALA": "AX",
"ALB": "AL",
"AND": "AD",
"ARE": "AE",
"ARG": "AR",
"ARM": "AM",
"ASM": "AS",
"ATA": "AQ",
"ATF": "TF",
"ATG": "AG",
"AUS": "AU",
"AUT": "AT",
"AZE": "AZ",
"BDI": "BI",
"BEL": "BE",
"BEN": "BJ",
"BFA": "BF",
"BGD": "BD",
"BGR": "BG",
"BHR": "BH",
"BHS": "BS",
"BIH": "BA",
"BLM": "BL",
"BLR": "BY",
"BLZ": "BZ",
"BMU": "BM",
"BOL": "BO",
"BRA": "BR",
"BRB": "BB",
"BRN": "BN",
"BTN": "BT",
"BWA": "BW",
"CAF": "CF",
"CAN": "CA",
"CHE": "CH",
"CHL": "CL",
"CHN": "CN",
"CIV": "CI",
"CMR": "CM",
"COD": "CD",
"COG": "CG",
"COK": "CK",
"COL": "CO",
"COM": "KM",
"CPV": "CV",
"CRI": "CR",
"CUB": "CU",
"CUW": "CW",
"CYM": "KY",
"CYP": "CY",
"CZE": "CZ",
"DEU": "DE",
"DJI": "DJ",
"DMA": "DM",
"DNK": "DK",
"DOM": "DO",
"DZA": "DZ",
"ECU": "EC",
"EGY": "EG",
"ERI": "ER",
"ESH": "EH",
"ESP": "ES",
"EST": "EE",
"ETH": "ET",
"FIN": "FI",
"FJI": "FJ",
"FLK": "FK",
"FRA": "FR",
"FRO": "FO",
"FSM": "FM",
"GAB": "GA",
"GBR": "GB",
"GEO": "GE",
"GGY": "GG",
"GHA": "GH",
"GIB": "GI",
"GIN": "GN",
"GMB": "GM",
"GNB": "GW",
"GNQ": "GQ",
"GRC": "GR",
"GRD": "GD",
"GRL": "GL",
"GTM": "GT",
"GUM": "GU",
"GUY": "GY",
"HKG": "HK",
"HMD": "HM",
"HND": "HN",
"HRV": "HR",
"HTI": "HT",
"HUN": "HU",
"IDN": "ID",
"IMN": "IM",
"IND": "IN",
"IOT": "IO",
"IRL": "IE",
"IRN": "IR",
"IRQ": "IQ",
"ISL": "IS",
"ISR": "IL",
"ITA": "IT",
"JAM": "JM",
"JEY": "JE",
"JOR": "JO",
"JPN": "JP",
"KAZ": "KZ",
"KEN": "KE",
"KGZ": "KG",
"KHM": "KH",
"KIR": "KI",
"KNA": "KN",
"KOR": "KR",
"KWT": "KW",
"LAO": "LA",
"LBN": "LB",
"LBR": "LR",
"LBY": "LY",
"LCA": "LC",
"LIE": "LI",
"LKA": "LK",
"LSO": "LS",
"LTU": "LT",
"LUX": "LU",
"LVA": "LV",
"MAC": "MO",
"MAF": "MF",
"MAR": "MA",
"MCO": "MC",
"MDA": "MD",
"MDG": "MG",
"MDV": "MV",
"MEX": "MX",
"MHL": "MH",
"MKD": "MK",
"MLI": "ML",
"MLT": "MT",
"MMR": "MM",
"MNE": "ME",
"MNG": "MN",
"MNP": "MP",
"MOZ": "MZ",
"MRT": "MR",
"MSR": "MS",
"MUS": "MU",
"MWI": "MW",
"MYS": "MY",
"NAM": "NA",
"NCL": "NC",
"NER": "NE",
"NFK": "NF",
"NGA": "NG",
"NIC": "NI",
"NIU": "NU",
"NLD": "NL",
"NOR": "NO",
"NPL": "NP",
"NRU": "NR",
"NZL": "NZ",
"OMN": "OM",
"PAK": "PK",
"PAN": "PA",
"PCN": "PN",
"PER": "PE",
"PHL": "PH",
"PLW": "PW",
"PNG": "PG",
"POL": "PL",
"PRI": "PR",
"PRK": "KP",
"PRT": "PT",
"PRY": "PY",
"PSE": "PS",
"PYF": "PF",
"QAT": "QA",
"ROU": "RO",
"RUS": "RU",
"RWA": "RW",
"SAU": "SA",
"SDN": "SD",
"SEN": "SN",
"SGP": "SG",
"SGS": "GS",
"SHN": "SH",
"SLB": "SB",
"SLE": "SL",
"SLV": "SV",
"SMR": "SM",
"SOM": "SO",
"SPM": "PM",
"SRB": "RS",
"SSD": "SS",
"STP": "ST",
"SUR": "SR",
"SVK": "SK",
"SVN": "SI",
"SWE": "SE",
"SWZ": "SZ",
"SXM": "SX",
"SYC": "SC",
"SYR": "SY",
"TCA": "TC",
"TCD": "TD",
"TGO": "TG",
"THA": "TH",
"TJK": "TJ",
"TKM": "TM",
"TLS": "TL",
"TON": "TO",
"TTO": "TT",
"TUN": "TN",
"TUR": "TR",
"TUV": "TV",
"TWN": "TW",
"TZA": "TZ",
"UGA": "UG",
"UKR": "UA",
"UMI": "UM",
"URY": "UY",
"USA": "US",
"UZB": "UZ",
"VAT": "VA",
"VCT": "VC",
"VEN": "VE",
"VGB": "VG",
"VIR": "VI",
"VNM": "VN",
"VUT": "VU",
"WLF": "WF",
"WSM": "WS",
"XKX": "XK",
"YEM": "YE",
"ZAF": "ZA",
"ZMB": "ZM",
"ZWE": "ZW"
}

View File

@@ -0,0 +1,83 @@
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import { normalizeCountryToken, createCountryResolvers, resolveIso2, isIso2, isIso3 } from '../scripts/_country-resolver.mjs';
const root = resolve(import.meta.dirname, '..');
const countryNames = JSON.parse(readFileSync(resolve(root, 'shared/country-names.json'), 'utf8'));
const iso3ToIso2 = JSON.parse(readFileSync(resolve(root, 'shared/iso3-to-iso2.json'), 'utf8'));
const iso2ToIso3 = JSON.parse(readFileSync(resolve(root, 'shared/iso2-to-iso3.json'), 'utf8'));
describe('country-names.json structural validation', () => {
it('every key equals normalizeCountryToken(key)', () => {
for (const key of Object.keys(countryNames)) {
assert.equal(key, normalizeCountryToken(key), `key "${key}" is not normalized`);
}
});
it('every value is a valid ISO2 code', () => {
for (const [key, value] of Object.entries(countryNames)) {
assert.ok(isIso2(value), `"${key}" → "${value}" is not valid ISO2`);
}
});
it('has at least 300 entries', () => {
assert.ok(Object.keys(countryNames).length >= 300);
});
});
describe('iso3-to-iso2.json validation', () => {
it('has at least 238 entries', () => {
assert.ok(Object.keys(iso3ToIso2).length >= 238);
});
it('every key is valid ISO3, every value is valid ISO2', () => {
for (const [k, v] of Object.entries(iso3ToIso2)) {
assert.ok(isIso3(k), `key "${k}" not valid ISO3`);
assert.ok(isIso2(v), `value "${v}" not valid ISO2`);
}
});
it('bidirectional consistency with iso2-to-iso3', () => {
for (const [iso2, iso3] of Object.entries(iso2ToIso3)) {
assert.equal(iso3ToIso2[iso3], iso2, `iso3ToIso2[${iso3}] !== ${iso2}`);
}
});
it('resolves Taiwan and Kosovo', () => {
assert.equal(iso3ToIso2['TWN'], 'TW');
assert.equal(iso3ToIso2['XKX'], 'XK');
});
});
describe('resolver parity', () => {
const resolvers = createCountryResolvers();
const oldAliases = {
'bahamas the': 'BS', 'cape verde': 'CV', 'congo brazzaville': 'CG',
'congo kinshasa': 'CD', 'congo rep': 'CG', 'congo dem rep': 'CD',
'czech republic': 'CZ', 'egypt arab rep': 'EG', 'gambia the': 'GM',
'hong kong sar china': 'HK', 'iran islamic rep': 'IR',
'korea dem peoples rep': 'KP', 'korea rep': 'KR', 'lao pdr': 'LA',
'macao sar china': 'MO', 'micronesia fed sts': 'FM',
'morocco western sahara': 'MA', 'north macedonia': 'MK',
'occupied palestinian territory': 'PS', 'palestinian territories': 'PS',
'palestine state of': 'PS', 'russian federation': 'RU',
'slovak republic': 'SK', 'st kitts and nevis': 'KN', 'st lucia': 'LC',
'st vincent and the grenadines': 'VC', 'syrian arab republic': 'SY',
'the bahamas': 'BS', 'timor leste': 'TL', 'turkiye': 'TR',
'united states of america': 'US', 'venezuela rb': 'VE',
'viet nam': 'VN', 'west bank and gaza': 'PS', 'yemen rep': 'YE',
};
it('resolves all old COUNTRY_ALIAS_MAP entries', () => {
for (const [name, expected] of Object.entries(oldAliases)) {
const result = resolveIso2({ name }, resolvers);
assert.equal(result, expected, `"${name}" → ${result}, expected ${expected}`);
}
});
it('resolves ISO3 codes', () => {
assert.equal(resolveIso2({ iso3: 'USA' }, resolvers), 'US');
assert.equal(resolveIso2({ iso3: 'GBR' }, resolvers), 'GB');
assert.equal(resolveIso2({ iso3: 'TWN' }, resolvers), 'TW');
assert.equal(resolveIso2({ iso3: 'XKX' }, resolvers), 'XK');
});
});

View File

@@ -29,14 +29,7 @@ function makeResolvers() {
yemen: 'YE',
'cape verde': 'CV',
},
{
features: [
{ properties: { name: 'Norway', 'ISO3166-1-Alpha-2': 'NO', 'ISO3166-1-Alpha-3': 'NOR' } },
{ properties: { name: 'United States', 'ISO3166-1-Alpha-2': 'US', 'ISO3166-1-Alpha-3': 'USA' } },
{ properties: { name: 'Yemen', 'ISO3166-1-Alpha-2': 'YE', 'ISO3166-1-Alpha-3': 'YEM' } },
{ properties: { name: 'Cape Verde', 'ISO3166-1-Alpha-2': 'CV', 'ISO3166-1-Alpha-3': 'CPV' } },
],
},
{ NOR: 'NO', USA: 'US', YEM: 'YE', CPV: 'CV' },
);
}