diff --git a/scripts/_country-resolver.mjs b/scripts/_country-resolver.mjs index c2e08181b..a371d6c6c 100644 --- a/scripts/_country-resolver.mjs +++ b/scripts/_country-resolver.mjs @@ -1,53 +1,7 @@ -import { readFileSync } from 'node:fs'; -import { dirname, join } from 'node:path'; -import { fileURLToPath } from 'node:url'; import { loadSharedConfig } from './_seed-utils.mjs'; -const __dirname = dirname(fileURLToPath(import.meta.url)); - const DEFAULT_COUNTRY_NAMES = loadSharedConfig('country-names.json'); -const DEFAULT_COUNTRIES_GEOJSON = JSON.parse( - readFileSync(join(__dirname, '..', 'public', 'data', 'countries.geojson'), 'utf8'), -); - -export const COUNTRY_ALIAS_MAP = { - 'bahamas the': 'BS', - 'cape verde': 'CV', - 'congo brazzaville': 'CG', - 'congo kinshasa': 'CD', - 'congo rep': 'CG', - 'congo dem rep': 'CD', - 'czech republic': 'CZ', - 'egypt arab rep': 'EG', - 'gambia the': 'GM', - 'hong kong sar china': 'HK', - 'iran islamic rep': 'IR', - 'korea dem peoples rep': 'KP', - 'korea rep': 'KR', - 'lao pdr': 'LA', - 'macao sar china': 'MO', - 'micronesia fed sts': 'FM', - 'morocco western sahara': 'MA', - 'north macedonia': 'MK', - 'occupied palestinian territory': 'PS', - 'palestinian territories': 'PS', - 'palestine state of': 'PS', - 'russian federation': 'RU', - 'slovak republic': 'SK', - 'st kitts and nevis': 'KN', - 'st lucia': 'LC', - 'st vincent and the grenadines': 'VC', - 'syrian arab republic': 'SY', - 'the bahamas': 'BS', - 'timor leste': 'TL', - 'turkiye': 'TR', - 'u s': 'US', - 'united states of america': 'US', - 'venezuela rb': 'VE', - 'viet nam': 'VN', - 'west bank and gaza': 'PS', - 'yemen rep': 'YE', -}; +const DEFAULT_ISO3_MAP = loadSharedConfig('iso3-to-iso2.json'); export function normalizeCountryToken(value) { return String(value || '') @@ -68,7 +22,7 @@ export function isIso3(value) { return /^[A-Z]{3}$/.test(String(value || '').trim()); } -export function createCountryResolvers(countryNames = DEFAULT_COUNTRY_NAMES, geojson = DEFAULT_COUNTRIES_GEOJSON) { +export function createCountryResolvers(countryNames = DEFAULT_COUNTRY_NAMES, iso3Map = DEFAULT_ISO3_MAP) { const nameToIso2 = new Map(); const iso3ToIso2 = new Map(); @@ -76,21 +30,8 @@ export function createCountryResolvers(countryNames = DEFAULT_COUNTRY_NAMES, geo if (isIso2(iso2)) nameToIso2.set(normalizeCountryToken(name), iso2.toUpperCase()); } - for (const [alias, iso2] of Object.entries(COUNTRY_ALIAS_MAP)) { - if (isIso2(iso2)) nameToIso2.set(normalizeCountryToken(alias), iso2.toUpperCase()); - } - - for (const feature of geojson?.features || []) { - const properties = feature?.properties || {}; - const iso2 = String(properties['ISO3166-1-Alpha-2'] || '').toUpperCase(); - const iso3 = String(properties['ISO3166-1-Alpha-3'] || '').toUpperCase(); - const name = properties.name; - if (isIso2(iso2)) { - if (typeof name === 'string' && name.trim()) { - nameToIso2.set(normalizeCountryToken(name), iso2); - } - if (isIso3(iso3)) iso3ToIso2.set(iso3, iso2); - } + for (const [iso3, iso2] of Object.entries(iso3Map)) { + if (isIso3(iso3) && isIso2(iso2)) iso3ToIso2.set(iso3, iso2.toUpperCase()); } return { nameToIso2, iso3ToIso2 }; diff --git a/scripts/build-country-names.cjs b/scripts/build-country-names.cjs new file mode 100644 index 000000000..11e54d390 --- /dev/null +++ b/scripts/build-country-names.cjs @@ -0,0 +1,186 @@ +'use strict'; +const fs = require('fs'); +const path = require('path'); + +const root = path.resolve(__dirname, '..'); +const geojsonPath = path.join(root, 'public', 'data', 'countries.geojson'); +const existingPath = path.join(root, 'shared', 'country-names.json'); + +const existing = JSON.parse(fs.readFileSync(existingPath, 'utf8')); +const result = Object.assign({}, existing); +let added = 0; + +function normalize(value) { + return String(value || '') + .normalize('NFKD') + .replace(/\p{Diacritic}/gu, '') + .toLowerCase() + .replace(/&/g, ' and ') + .replace(/[''.(),/-]/g, ' ') + .replace(/\s+/g, ' ') + .trim(); +} + +function add(key, iso2, source) { + const k = normalize(key); + if (!k || !/^[A-Z]{2}$/.test(iso2)) return; + if (result[k]) return; + result[k] = iso2; + added++; +} + +// A. Geojson country names +const geojson = JSON.parse(fs.readFileSync(geojsonPath, 'utf8')); +for (const f of geojson.features) { + const props = f.properties || {}; + const iso2 = String(props['ISO3166-1-Alpha-2'] || '').trim(); + const name = props.name; + if (!/^[A-Z]{2}$/.test(iso2)) continue; + if (typeof name === 'string' && name.trim()) { + add(name, iso2, 'geojson'); + } +} + +// B. COUNTRY_ALIAS_MAP from _country-resolver.mjs (37 entries, hardcoded) +const COUNTRY_ALIAS_MAP = { + 'bahamas the': 'BS', + 'cape verde': 'CV', + 'congo brazzaville': 'CG', + 'congo kinshasa': 'CD', + 'congo rep': 'CG', + 'congo dem rep': 'CD', + 'czech republic': 'CZ', + 'egypt arab rep': 'EG', + 'gambia the': 'GM', + 'hong kong sar china': 'HK', + 'iran islamic rep': 'IR', + 'korea dem peoples rep': 'KP', + 'korea rep': 'KR', + 'lao pdr': 'LA', + 'macao sar china': 'MO', + 'micronesia fed sts': 'FM', + 'morocco western sahara': 'MA', + 'north macedonia': 'MK', + 'occupied palestinian territory': 'PS', + 'palestinian territories': 'PS', + 'palestine state of': 'PS', + 'russian federation': 'RU', + 'slovak republic': 'SK', + 'st kitts and nevis': 'KN', + 'st lucia': 'LC', + 'st vincent and the grenadines': 'VC', + 'syrian arab republic': 'SY', + 'the bahamas': 'BS', + 'timor leste': 'TL', + 'turkiye': 'TR', + 'u s': 'US', + 'united states of america': 'US', + 'venezuela rb': 'VE', + 'viet nam': 'VN', + 'west bank and gaza': 'PS', + 'yemen rep': 'YE', +}; +for (const [alias, iso2] of Object.entries(COUNTRY_ALIAS_MAP)) { + add(alias, iso2, 'alias_map'); +} + +// C. Additional upstream API variants +const upstream = { + 'egypt arab rep': 'EG', + 'korea rep': 'KR', + 'iran islamic rep': 'IR', + 'congo dem rep': 'CD', + 'congo rep': 'CG', + 'venezuela rb': 'VE', + 'yemen rep': 'YE', + 'bahamas the': 'BS', + 'gambia the': 'GM', + 'hong kong sar china': 'HK', + 'macao sar china': 'MO', + 'micronesia fed sts': 'FM', + 'lao pdr': 'LA', + 'slovak republic': 'SK', + 'syrian arab republic': 'SY', + 'viet nam': 'VN', + 'turkiye': 'TR', + 'timor leste': 'TL', + 'occupied palestinian territory': 'PS', + 'palestine state of': 'PS', + 'west bank and gaza': 'PS', + 'bolivarian republic of venezuela': 'VE', + 'plurinational state of bolivia': 'BO', + 'united republic of tanzania': 'TZ', + 'democratic peoples republic of korea': 'KP', + 'republic of korea': 'KR', + 'ivory coast': 'CI', + 'swaziland': 'SZ', + 'north macedonia': 'MK', +}; +for (const [name, iso2] of Object.entries(upstream)) { + add(name, iso2, 'upstream'); +} + +// D. Correlation extras from seed-correlation.mjs (hardcoded) +const COUNTRY_NAME_TO_ISO2 = { + 'afghanistan': 'AF', 'albania': 'AL', 'algeria': 'DZ', 'angola': 'AO', + 'argentina': 'AR', 'armenia': 'AM', 'australia': 'AU', 'austria': 'AT', + 'azerbaijan': 'AZ', 'bahrain': 'BH', 'bangladesh': 'BD', 'belarus': 'BY', + 'belgium': 'BE', 'bolivia': 'BO', 'bosnia and herzegovina': 'BA', + 'brazil': 'BR', 'bulgaria': 'BG', 'burkina faso': 'BF', 'burma': 'MM', + 'cambodia': 'KH', 'cameroon': 'CM', 'canada': 'CA', 'chad': 'TD', + 'chile': 'CL', 'china': 'CN', 'colombia': 'CO', 'congo': 'CG', + 'costa rica': 'CR', 'croatia': 'HR', 'cuba': 'CU', 'cyprus': 'CY', + 'czech republic': 'CZ', 'czechia': 'CZ', + 'democratic republic of the congo': 'CD', 'dr congo': 'CD', 'drc': 'CD', + 'denmark': 'DK', 'djibouti': 'DJ', 'dominican republic': 'DO', + 'ecuador': 'EC', 'egypt': 'EG', 'el salvador': 'SV', 'eritrea': 'ER', + 'estonia': 'EE', 'ethiopia': 'ET', 'finland': 'FI', 'france': 'FR', + 'gabon': 'GA', 'georgia': 'GE', 'germany': 'DE', 'ghana': 'GH', + 'greece': 'GR', 'guatemala': 'GT', 'guinea': 'GN', 'haiti': 'HT', + 'honduras': 'HN', 'hungary': 'HU', 'iceland': 'IS', 'india': 'IN', + 'indonesia': 'ID', 'iran': 'IR', 'iraq': 'IQ', 'ireland': 'IE', + 'israel': 'IL', 'italy': 'IT', 'ivory coast': 'CI', "cote d'ivoire": 'CI', + 'jamaica': 'JM', 'japan': 'JP', 'jordan': 'JO', 'kazakhstan': 'KZ', + 'kenya': 'KE', 'kosovo': 'XK', 'kuwait': 'KW', 'kyrgyzstan': 'KG', + 'laos': 'LA', 'latvia': 'LV', 'lebanon': 'LB', 'libya': 'LY', + 'lithuania': 'LT', 'madagascar': 'MG', 'malawi': 'MW', 'malaysia': 'MY', + 'mali': 'ML', 'mauritania': 'MR', 'mexico': 'MX', 'moldova': 'MD', + 'mongolia': 'MN', 'montenegro': 'ME', 'morocco': 'MA', 'mozambique': 'MZ', + 'myanmar': 'MM', 'namibia': 'NA', 'nepal': 'NP', 'netherlands': 'NL', + 'new zealand': 'NZ', 'nicaragua': 'NI', 'niger': 'NE', 'nigeria': 'NG', + 'north korea': 'KP', 'north macedonia': 'MK', 'norway': 'NO', + 'oman': 'OM', 'pakistan': 'PK', 'palestine': 'PS', 'panama': 'PA', + 'papua new guinea': 'PG', 'paraguay': 'PY', 'peru': 'PE', + 'philippines': 'PH', 'poland': 'PL', 'portugal': 'PT', 'qatar': 'QA', + 'romania': 'RO', 'russia': 'RU', 'rwanda': 'RW', 'saudi arabia': 'SA', + 'senegal': 'SN', 'serbia': 'RS', 'sierra leone': 'SL', 'singapore': 'SG', + 'slovakia': 'SK', 'slovenia': 'SI', 'somalia': 'SO', 'south africa': 'ZA', + 'south korea': 'KR', 'south sudan': 'SS', 'spain': 'ES', + 'sri lanka': 'LK', 'sudan': 'SD', 'sweden': 'SE', 'switzerland': 'CH', + 'syria': 'SY', 'taiwan': 'TW', 'tajikistan': 'TJ', 'tanzania': 'TZ', + 'thailand': 'TH', 'togo': 'TG', 'trinidad and tobago': 'TT', + 'tunisia': 'TN', 'turkey': 'TR', 'turkmenistan': 'TM', 'uganda': 'UG', + 'ukraine': 'UA', 'united arab emirates': 'AE', 'uae': 'AE', + 'united kingdom': 'GB', 'uk': 'GB', 'united states': 'US', 'usa': 'US', + 'uruguay': 'UY', 'uzbekistan': 'UZ', 'venezuela': 'VE', 'vietnam': 'VN', + 'yemen': 'YE', 'zambia': 'ZM', 'zimbabwe': 'ZW', + 'east timor': 'TL', 'cape verde': 'CV', 'swaziland': 'SZ', + 'republic of the congo': 'CG', +}; +for (const [name, iso2] of Object.entries(COUNTRY_NAME_TO_ISO2)) { + add(name, iso2, 'correlation'); +} + +// Sort keys alphabetically +const sorted = Object.fromEntries( + Object.entries(result).sort(([a], [b]) => a.localeCompare(b)) +); + +fs.writeFileSync(existingPath, JSON.stringify(sorted, null, 2) + '\n'); +console.log(`Existing: ${Object.keys(existing).length}, Added: ${added}, Total: ${Object.keys(sorted).length}`); + +// Validate all values are ISO2 +for (const [k, v] of Object.entries(sorted)) { + if (!/^[A-Z]{2}$/.test(v)) console.error(`INVALID VALUE: ${k} → ${v}`); + if (k !== k.toLowerCase()) console.error(`NON-LOWERCASE KEY: ${k}`); +} diff --git a/scripts/generate-iso3-maps.cjs b/scripts/generate-iso3-maps.cjs new file mode 100644 index 000000000..de682e27e --- /dev/null +++ b/scripts/generate-iso3-maps.cjs @@ -0,0 +1,67 @@ +'use strict'; +const fs = require('fs'); +const path = require('path'); + +const root = path.resolve(__dirname, '..'); +const geojson = JSON.parse(fs.readFileSync(path.join(root, 'public', 'data', 'countries.geojson'), 'utf8')); + +const iso3ToIso2 = {}; +const discrepancies = []; + +for (const f of geojson.features) { + const props = f.properties || {}; + const iso2 = String(props['ISO3166-1-Alpha-2'] || '').trim(); + const iso3 = String(props['ISO3166-1-Alpha-3'] || '').trim(); + + if (!/^[A-Z]{2}$/.test(iso2)) { + if (/^[A-Z]{3}$/.test(iso3)) { + discrepancies.push(`Skipped ${iso3} (${props.name}): invalid ISO2 "${props['ISO3166-1-Alpha-2']}"`); + } + continue; + } + if (!/^[A-Z]{3}$/.test(iso3)) { + discrepancies.push(`Skipped ${props.name} (${iso2}): invalid ISO3 "${props['ISO3166-1-Alpha-3']}"`); + continue; + } + iso3ToIso2[iso3] = iso2; +} + +// Supplements for missing/invalid entries +if (!iso3ToIso2['TWN']) { + iso3ToIso2['TWN'] = 'TW'; + console.log('Added supplement: TWN → TW (Taiwan has CN-TW in geojson)'); +} +if (!iso3ToIso2['XKX']) { + iso3ToIso2['XKX'] = 'XK'; + console.log('Added supplement: XKX → XK (Kosovo absent from geojson)'); +} + +// Sort by key +const sorted3to2 = Object.fromEntries( + Object.entries(iso3ToIso2).sort(([a], [b]) => a.localeCompare(b)) +); + +// Invert: ISO2 → ISO3 +const iso2ToIso3 = {}; +for (const [iso3, iso2] of Object.entries(sorted3to2)) { + if (!iso2ToIso3[iso2]) { + iso2ToIso3[iso2] = iso3; + } +} +const sorted2to3 = Object.fromEntries( + Object.entries(iso2ToIso3).sort(([a], [b]) => a.localeCompare(b)) +); + +// Write files +const out3to2 = path.join(root, 'shared', 'iso3-to-iso2.json'); +fs.writeFileSync(out3to2, JSON.stringify(sorted3to2, null, 2) + '\n'); +console.log(`Wrote ${Object.keys(sorted3to2).length} entries to ${out3to2}`); + +const out2to3 = path.join(root, 'shared', 'iso2-to-iso3.json'); +fs.writeFileSync(out2to3, JSON.stringify(sorted2to3, null, 2) + '\n'); +console.log(`Wrote ${Object.keys(sorted2to3).length} entries to ${out2to3}`); + +if (discrepancies.length) { + console.log(`\nDiscrepancies (${discrepancies.length}):`); + for (const d of discrepancies) console.log(` ${d}`); +} diff --git a/scripts/seed-conflict-intel.mjs b/scripts/seed-conflict-intel.mjs index 5af504c0d..39ffc3954 100755 --- a/scripts/seed-conflict-intel.mjs +++ b/scripts/seed-conflict-intel.mjs @@ -16,7 +16,7 @@ * - searchGdeltDocuments: per-query GDELT search */ -import { loadEnvFile, CHROME_UA, runSeed, writeExtraKeyWithMeta, sleep } from './_seed-utils.mjs'; +import { loadEnvFile, CHROME_UA, runSeed, writeExtraKeyWithMeta, sleep, loadSharedConfig } from './_seed-utils.mjs'; loadEnvFile(import.meta.url); @@ -26,18 +26,12 @@ const HAPI_CACHE_KEY_PREFIX = 'conflict:humanitarian:v1'; const HAPI_TTL = 21600; const PIZZINT_TTL = 600; -// Top conflict countries (ISO2) for humanitarian pre-seeding const CONFLICT_COUNTRIES = [ 'AF', 'SY', 'UA', 'SD', 'SS', 'SO', 'CD', 'MM', 'YE', 'ET', 'IQ', 'PS', 'LY', 'ML', 'BF', 'NE', 'NG', 'CM', 'MZ', 'HT', ]; -const ISO2_TO_ISO3 = { - AF: 'AFG', SY: 'SYR', UA: 'UKR', SD: 'SDN', SS: 'SSD', SO: 'SOM', - CD: 'COD', MM: 'MMR', YE: 'YEM', ET: 'ETH', IQ: 'IRQ', PS: 'PSE', - LY: 'LBY', ML: 'MLI', BF: 'BFA', NE: 'NER', NG: 'NGA', CM: 'CMR', - MZ: 'MOZ', HT: 'HTI', -}; +const ISO2_TO_ISO3 = loadSharedConfig('iso2-to-iso3.json'); // ─── ACLED Events ─── diff --git a/scripts/seed-correlation.mjs b/scripts/seed-correlation.mjs index 1485546e9..2df76b821 100644 --- a/scripts/seed-correlation.mjs +++ b/scripts/seed-correlation.mjs @@ -1,6 +1,7 @@ #!/usr/bin/env node -import { loadEnvFile, runSeed, getRedisCredentials } from './_seed-utils.mjs'; +import { loadEnvFile, runSeed, getRedisCredentials, loadSharedConfig } from './_seed-utils.mjs'; +import { resolveIso2, normalizeCountryToken } from './_country-resolver.mjs'; loadEnvFile(import.meta.url); @@ -53,86 +54,8 @@ function haversineKm(lat1, lon1, lat2, lon2) { return R * 2 * Math.atan2(Math.sqrt(a), Math.sqrt(1 - a)); } -// ── Country Name Resolution ───────────────────────────────── -const COUNTRY_NAME_TO_ISO2 = { - 'afghanistan': 'AF', 'albania': 'AL', 'algeria': 'DZ', 'angola': 'AO', - 'argentina': 'AR', 'armenia': 'AM', 'australia': 'AU', 'austria': 'AT', - 'azerbaijan': 'AZ', 'bahrain': 'BH', 'bangladesh': 'BD', 'belarus': 'BY', - 'belgium': 'BE', 'bolivia': 'BO', 'bosnia and herzegovina': 'BA', - 'brazil': 'BR', 'bulgaria': 'BG', 'burkina faso': 'BF', 'burma': 'MM', - 'cambodia': 'KH', 'cameroon': 'CM', 'canada': 'CA', 'chad': 'TD', - 'chile': 'CL', 'china': 'CN', 'colombia': 'CO', 'congo': 'CG', - 'costa rica': 'CR', 'croatia': 'HR', 'cuba': 'CU', 'cyprus': 'CY', - 'czech republic': 'CZ', 'czechia': 'CZ', - 'democratic republic of the congo': 'CD', 'dr congo': 'CD', 'drc': 'CD', - 'denmark': 'DK', 'djibouti': 'DJ', 'dominican republic': 'DO', - 'ecuador': 'EC', 'egypt': 'EG', 'el salvador': 'SV', 'eritrea': 'ER', - 'estonia': 'EE', 'ethiopia': 'ET', 'finland': 'FI', 'france': 'FR', - 'gabon': 'GA', 'georgia': 'GE', 'germany': 'DE', 'ghana': 'GH', - 'greece': 'GR', 'guatemala': 'GT', 'guinea': 'GN', 'haiti': 'HT', - 'honduras': 'HN', 'hungary': 'HU', 'iceland': 'IS', 'india': 'IN', - 'indonesia': 'ID', 'iran': 'IR', 'iraq': 'IQ', 'ireland': 'IE', - 'israel': 'IL', 'italy': 'IT', 'ivory coast': 'CI', "cote d'ivoire": 'CI', - 'jamaica': 'JM', 'japan': 'JP', 'jordan': 'JO', 'kazakhstan': 'KZ', - 'kenya': 'KE', 'kosovo': 'XK', 'kuwait': 'KW', 'kyrgyzstan': 'KG', - 'laos': 'LA', 'latvia': 'LV', 'lebanon': 'LB', 'libya': 'LY', - 'lithuania': 'LT', 'madagascar': 'MG', 'malawi': 'MW', 'malaysia': 'MY', - 'mali': 'ML', 'mauritania': 'MR', 'mexico': 'MX', 'moldova': 'MD', - 'mongolia': 'MN', 'montenegro': 'ME', 'morocco': 'MA', 'mozambique': 'MZ', - 'myanmar': 'MM', 'namibia': 'NA', 'nepal': 'NP', 'netherlands': 'NL', - 'new zealand': 'NZ', 'nicaragua': 'NI', 'niger': 'NE', 'nigeria': 'NG', - 'north korea': 'KP', 'north macedonia': 'MK', 'norway': 'NO', - 'oman': 'OM', 'pakistan': 'PK', 'palestine': 'PS', 'panama': 'PA', - 'papua new guinea': 'PG', 'paraguay': 'PY', 'peru': 'PE', - 'philippines': 'PH', 'poland': 'PL', 'portugal': 'PT', 'qatar': 'QA', - 'romania': 'RO', 'russia': 'RU', 'rwanda': 'RW', 'saudi arabia': 'SA', - 'senegal': 'SN', 'serbia': 'RS', 'sierra leone': 'SL', 'singapore': 'SG', - 'slovakia': 'SK', 'slovenia': 'SI', 'somalia': 'SO', 'south africa': 'ZA', - 'south korea': 'KR', 'south sudan': 'SS', 'spain': 'ES', - 'sri lanka': 'LK', 'sudan': 'SD', 'sweden': 'SE', 'switzerland': 'CH', - 'syria': 'SY', 'taiwan': 'TW', 'tajikistan': 'TJ', 'tanzania': 'TZ', - 'thailand': 'TH', 'togo': 'TG', 'trinidad and tobago': 'TT', - 'tunisia': 'TN', 'turkey': 'TR', 'turkmenistan': 'TM', 'uganda': 'UG', - 'ukraine': 'UA', 'united arab emirates': 'AE', 'uae': 'AE', - 'united kingdom': 'GB', 'uk': 'GB', 'united states': 'US', 'usa': 'US', - 'uruguay': 'UY', 'uzbekistan': 'UZ', 'venezuela': 'VE', 'vietnam': 'VN', - 'yemen': 'YE', 'zambia': 'ZM', 'zimbabwe': 'ZW', - 'east timor': 'TL', 'cape verde': 'CV', 'swaziland': 'SZ', - 'republic of the congo': 'CG', -}; - -const ISO3_TO_ISO2 = { - 'AFG': 'AF', 'ALB': 'AL', 'DZA': 'DZ', 'AGO': 'AO', 'ARG': 'AR', - 'ARM': 'AM', 'AUS': 'AU', 'AUT': 'AT', 'AZE': 'AZ', 'BHR': 'BH', - 'BGD': 'BD', 'BLR': 'BY', 'BEL': 'BE', 'BOL': 'BO', 'BIH': 'BA', - 'BRA': 'BR', 'BGR': 'BG', 'BFA': 'BF', 'KHM': 'KH', 'CMR': 'CM', - 'CAN': 'CA', 'TCD': 'TD', 'CHL': 'CL', 'CHN': 'CN', 'COL': 'CO', - 'COG': 'CG', 'CRI': 'CR', 'HRV': 'HR', 'CUB': 'CU', 'CYP': 'CY', - 'CZE': 'CZ', 'COD': 'CD', 'DNK': 'DK', 'DJI': 'DJ', 'DOM': 'DO', - 'ECU': 'EC', 'EGY': 'EG', 'SLV': 'SV', 'ERI': 'ER', 'EST': 'EE', - 'ETH': 'ET', 'FIN': 'FI', 'FRA': 'FR', 'GAB': 'GA', 'GEO': 'GE', - 'DEU': 'DE', 'GHA': 'GH', 'GRC': 'GR', 'GTM': 'GT', 'GIN': 'GN', - 'HTI': 'HT', 'HND': 'HN', 'HUN': 'HU', 'ISL': 'IS', 'IND': 'IN', - 'IDN': 'ID', 'IRN': 'IR', 'IRQ': 'IQ', 'IRL': 'IE', 'ISR': 'IL', - 'ITA': 'IT', 'CIV': 'CI', 'JAM': 'JM', 'JPN': 'JP', 'JOR': 'JO', - 'KAZ': 'KZ', 'KEN': 'KE', 'XKX': 'XK', 'KWT': 'KW', 'KGZ': 'KG', - 'LAO': 'LA', 'LVA': 'LV', 'LBN': 'LB', 'LBY': 'LY', 'LTU': 'LT', - 'MDG': 'MG', 'MWI': 'MW', 'MYS': 'MY', 'MLI': 'ML', 'MRT': 'MR', - 'MEX': 'MX', 'MDA': 'MD', 'MNG': 'MN', 'MNE': 'ME', 'MAR': 'MA', - 'MOZ': 'MZ', 'MMR': 'MM', 'NAM': 'NA', 'NPL': 'NP', 'NLD': 'NL', - 'NZL': 'NZ', 'NIC': 'NI', 'NER': 'NE', 'NGA': 'NG', 'PRK': 'KP', - 'MKD': 'MK', 'NOR': 'NO', 'OMN': 'OM', 'PAK': 'PK', 'PSE': 'PS', - 'PAN': 'PA', 'PNG': 'PG', 'PRY': 'PY', 'PER': 'PE', 'PHL': 'PH', - 'POL': 'PL', 'PRT': 'PT', 'QAT': 'QA', 'ROU': 'RO', 'RUS': 'RU', - 'RWA': 'RW', 'SAU': 'SA', 'SEN': 'SN', 'SRB': 'RS', 'SLE': 'SL', - 'SGP': 'SG', 'SVK': 'SK', 'SVN': 'SI', 'SOM': 'SO', 'ZAF': 'ZA', - 'KOR': 'KR', 'SSD': 'SS', 'ESP': 'ES', 'LKA': 'LK', 'SDN': 'SD', - 'SWE': 'SE', 'CHE': 'CH', 'SYR': 'SY', 'TWN': 'TW', 'TJK': 'TJ', - 'TZA': 'TZ', 'THA': 'TH', 'TGO': 'TG', 'TTO': 'TT', 'TUN': 'TN', - 'TUR': 'TR', 'TKM': 'TM', 'UGA': 'UG', 'UKR': 'UA', 'ARE': 'AE', - 'GBR': 'GB', 'USA': 'US', 'URY': 'UY', 'UZB': 'UZ', 'VEN': 'VE', - 'VNM': 'VN', 'YEM': 'YE', 'ZMB': 'ZM', 'ZWE': 'ZW', -}; +const COUNTRY_NAME_TO_ISO2 = loadSharedConfig('country-names.json'); +const ISO3_TO_ISO2 = loadSharedConfig('iso3-to-iso2.json'); const COUNTRY_CENTROIDS = { 'AF':[33.9,67.7],'AL':[41.2,20.2],'DZ':[28.0,1.7],'AO':[-11.2,17.9],'AR':[-38.4,-63.6], @@ -181,23 +104,21 @@ function nearestCountryByCoords(lat, lon) { function normalizeToCode(country, lat, lon) { if (country) { - const t = country.trim(); - if (t.length === 2) return t.toUpperCase(); - if (t.length === 3) return ISO3_TO_ISO2[t.toUpperCase()] ?? undefined; - const fromName = COUNTRY_NAME_TO_ISO2[t.toLowerCase()]; - if (fromName) return fromName; + const resolved = resolveIso2({ iso2: country, iso3: country, name: country }); + if (resolved) return resolved; } return nearestCountryByCoords(lat, lon); } const COUNTRY_NAME_ENTRIES = Object.entries(COUNTRY_NAME_TO_ISO2) - .filter(([name]) => name.length >= 4) + .filter(([name]) => name.length >= 2) .sort((a, b) => b[0].length - a[0].length) .map(([name, code]) => ({ name, code, regex: new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\b`, 'i') })); -function matchCountryNamesInText(text) { +export function matchCountryNamesInText(text) { const matched = []; - let remaining = text.toLowerCase(); + let remaining = text.normalize('NFKD').replace(/\p{Diacritic}/gu, '').toLowerCase() + .replace(/['.(),/-]/g, ' ').replace(/\s+/g, ' '); for (const { code, regex } of COUNTRY_NAME_ENTRIES) { if (regex.test(remaining)) { matched.push(code); @@ -773,21 +694,23 @@ async function computeCorrelation() { return result; } -runSeed('correlation', 'cards', CANONICAL_KEY, computeCorrelation, { - ttlSeconds: CACHE_TTL, - sourceVersion: 'correlation-engine-v1', - recordCount: (data) => (data.military?.length ?? 0) + (data.escalation?.length ?? 0) + (data.economic?.length ?? 0) + (data.disaster?.length ?? 0), - extraKeys: [ - { key: 'correlation:military:v1', ttl: CACHE_TTL }, - { key: 'correlation:escalation:v1', ttl: CACHE_TTL }, - { key: 'correlation:economic:v1', ttl: CACHE_TTL }, - { key: 'correlation:disaster:v1', ttl: CACHE_TTL }, - ].map(ek => ({ - key: ek.key, - ttl: ek.ttl, - transform: (data) => data[ek.key.split(':')[1]], - })), -}).catch((err) => { - const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; console.error('FATAL:', (err.message || err) + _cause); - process.exit(1); -}); +if (process.argv[1]?.endsWith('seed-correlation.mjs')) { + runSeed('correlation', 'cards', CANONICAL_KEY, computeCorrelation, { + ttlSeconds: CACHE_TTL, + sourceVersion: 'correlation-engine-v1', + recordCount: (data) => (data.military?.length ?? 0) + (data.escalation?.length ?? 0) + (data.economic?.length ?? 0) + (data.disaster?.length ?? 0), + extraKeys: [ + { key: 'correlation:military:v1', ttl: CACHE_TTL }, + { key: 'correlation:escalation:v1', ttl: CACHE_TTL }, + { key: 'correlation:economic:v1', ttl: CACHE_TTL }, + { key: 'correlation:disaster:v1', ttl: CACHE_TTL }, + ].map(ek => ({ + key: ek.key, + ttl: ek.ttl, + transform: (data) => data[ek.key.split(':')[1]], + })), + }).catch((err) => { + const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; console.error('FATAL:', (err.message || err) + _cause); + process.exit(1); + }); +} diff --git a/scripts/seed-resilience-static.mjs b/scripts/seed-resilience-static.mjs index 861e31e5a..690c65dd4 100644 --- a/scripts/seed-resilience-static.mjs +++ b/scripts/seed-resilience-static.mjs @@ -12,7 +12,6 @@ import { withRetry, } from './_seed-utils.mjs'; import { - COUNTRY_ALIAS_MAP, createCountryResolvers, isIso2, isIso3, diff --git a/scripts/seed-security-advisories.mjs b/scripts/seed-security-advisories.mjs index 78d636fd2..f0f186500 100644 --- a/scripts/seed-security-advisories.mjs +++ b/scripts/seed-security-advisories.mjs @@ -71,9 +71,10 @@ const BY_COUNTRY_NAME = Object.fromEntries( function extractCountry(title, feed) { if (feed.targetCountry) return feed.targetCountry; if (feed.sourceCountry === 'EU' || feed.sourceCountry === 'INT') return undefined; - const lower = title.toLowerCase(); + const normalized = title.normalize('NFKD').replace(/\p{Diacritic}/gu, '').toLowerCase() + .replace(/['.(),/-]/g, ' ').replace(/\s+/g, ' '); for (const [name, code] of SORTED_COUNTRY_ENTRIES) { - if (lower.includes(name)) return code; + if (normalized.includes(name)) return code; } return undefined; } diff --git a/scripts/shared/country-names.json b/scripts/shared/country-names.json index eb7586ea1..1c9b7a173 100644 --- a/scripts/shared/country-names.json +++ b/scripts/shared/country-names.json @@ -16,6 +16,7 @@ "austria": "AT", "azerbaijan": "AZ", "bahamas": "BS", + "bahamas the": "BS", "bahrain": "BH", "bangladesh": "BD", "barbados": "BB", @@ -25,6 +26,7 @@ "benin": "BJ", "bermuda": "BM", "bhutan": "BT", + "bolivarian republic of venezuela": "VE", "bolivia": "BO", "bosnia and herzegovina": "BA", "botswana": "BW", @@ -49,25 +51,32 @@ "china": "CN", "colombia": "CO", "comoros": "KM", + "congo": "CG", + "congo brazzaville": "CG", + "congo dem rep": "CD", + "congo kinshasa": "CD", + "congo rep": "CG", "cook islands": "CK", "costa rica": "CR", "cote d ivoire": "CI", - "cote d'ivoire": "CI", "croatia": "HR", "cuba": "CU", - "curaçao": "CW", + "curacao": "CW", "cyprus": "CY", "czech republic": "CZ", "czechia": "CZ", + "democratic peoples republic of korea": "KP", "democratic republic of the congo": "CD", "denmark": "DK", "djibouti": "DJ", "dominica": "DM", "dominican republic": "DO", "dr congo": "CD", + "drc": "CD", "east timor": "TL", "ecuador": "EC", "egypt": "EG", + "egypt arab rep": "EG", "el salvador": "SV", "equatorial guinea": "GQ", "eritrea": "ER", @@ -84,6 +93,7 @@ "french southern and antarctic lands": "TF", "gabon": "GA", "gambia": "GM", + "gambia the": "GM", "gaza": "PS", "georgia": "GE", "germany": "DE", @@ -96,17 +106,20 @@ "guatemala": "GT", "guernsey": "GG", "guinea": "GN", - "guinea-bissau": "GW", + "guinea bissau": "GW", "guyana": "GY", "haiti": "HT", "heard island and mcdonald islands": "HM", "honduras": "HN", - "hong kong s.a.r.": "HK", + "hong kong": "HK", + "hong kong s a r": "HK", + "hong kong sar china": "HK", "hungary": "HU", "iceland": "IS", "india": "IN", "indonesia": "ID", "iran": "IR", + "iran islamic rep": "IR", "iraq": "IQ", "ireland": "IE", "isle of man": "IM", @@ -120,6 +133,8 @@ "kazakhstan": "KZ", "kenya": "KE", "kiribati": "KI", + "korea dem peoples rep": "KP", + "korea rep": "KR", "kosovo": "XK", "kuwait": "KW", "kyrgyz republic": "KG", @@ -134,7 +149,8 @@ "liechtenstein": "LI", "lithuania": "LT", "luxembourg": "LU", - "macao s.a.r": "MO", + "macao s a r": "MO", + "macao sar china": "MO", "madagascar": "MG", "malawi": "MW", "malaysia": "MY", @@ -146,12 +162,14 @@ "mauritius": "MU", "mexico": "MX", "micronesia": "FM", + "micronesia fed sts": "FM", "moldova": "MD", "monaco": "MC", "mongolia": "MN", "montenegro": "ME", "montserrat": "MS", "morocco": "MA", + "morocco western sahara": "MA", "mozambique": "MZ", "myanmar": "MM", "namibia": "NA", @@ -169,24 +187,30 @@ "north macedonia": "MK", "northern mariana islands": "MP", "norway": "NO", + "occupied palestinian territory": "PS", "oman": "OM", "pakistan": "PK", "palau": "PW", "palestine": "PS", + "palestine state of": "PS", + "palestinian territories": "PS", "panama": "PA", "papua new guinea": "PG", "paraguay": "PY", "peru": "PE", "philippines": "PH", "pitcairn islands": "PN", + "plurinational state of bolivia": "BO", "poland": "PL", "portugal": "PT", "puerto rico": "PR", "qatar": "QA", + "republic of korea": "KR", "republic of serbia": "RS", "republic of the congo": "CG", "romania": "RO", "russia": "RU", + "russian federation": "RU", "rwanda": "RW", "saint barthelemy": "BL", "saint helena": "SH", @@ -197,13 +221,16 @@ "saint vincent and the grenadines": "VC", "samoa": "WS", "san marino": "SM", - "são tomé and principe": "ST", + "sao tome": "ST", + "sao tome and principe": "ST", "saudi arabia": "SA", "senegal": "SN", + "serbia": "RS", "seychelles": "SC", "sierra leone": "SL", "singapore": "SG", "sint maarten": "SX", + "slovak republic": "SK", "slovakia": "SK", "slovenia": "SI", "solomon islands": "SB", @@ -214,13 +241,19 @@ "south sudan": "SS", "spain": "ES", "sri lanka": "LK", + "st kitts and nevis": "KN", + "st lucia": "LC", + "st vincent and the grenadines": "VC", "sudan": "SD", "suriname": "SR", "swaziland": "SZ", "sweden": "SE", "switzerland": "CH", "syria": "SY", + "syrian arab republic": "SY", + "taiwan": "TW", "tajikistan": "TJ", + "tanzania": "TZ", "thailand": "TH", "the bahamas": "BS", "the comoros": "KM", @@ -229,17 +262,18 @@ "the netherlands": "NL", "the philippines": "PH", "the seychelles": "SC", - "timor-leste": "TL", + "timor leste": "TL", "togo": "TG", "tonga": "TO", "trinidad and tobago": "TT", "tunisia": "TN", "turkey": "TR", + "turkiye": "TR", "turkmenistan": "TM", "turks and caicos": "TC", "turks and caicos islands": "TC", "tuvalu": "TV", - "u.s. virgin islands": "VI", + "u s virgin islands": "VI", "uae": "AE", "uganda": "UG", "uk": "GB", @@ -257,11 +291,15 @@ "vanuatu": "VU", "vatican": "VA", "venezuela": "VE", + "venezuela rb": "VE", + "viet nam": "VN", "vietnam": "VN", "wallis and futuna": "WF", "west bank": "PS", + "west bank and gaza": "PS", "western sahara": "EH", "yemen": "YE", + "yemen rep": "YE", "zambia": "ZM", "zimbabwe": "ZW" } diff --git a/scripts/shared/geo-extract.mjs b/scripts/shared/geo-extract.mjs index 81ec30a7b..db8a9c305 100644 --- a/scripts/shared/geo-extract.mjs +++ b/scripts/shared/geo-extract.mjs @@ -102,14 +102,11 @@ export function extractCountryCode(text) { // Normalize uppercase `US` (country abbreviation) to `united states` before lowercasing, // so it survives the stopword pass. Lowercase `us` (pronoun) has no equivalent expansion // and is stopped by UNIGRAM_STOPWORDS. `\b` avoids matching inside words like "plus". - const normalized = text.replace(/\bUS\b/g, 'United States'); - const lower = normalized.toLowerCase(); + const normalized = text.replace(/\bUS\b/g, 'United States') + .normalize('NFKD').replace(/\p{Diacritic}/gu, '').toLowerCase() + .replace(/['.(),/-]/g, ' '); - // Single left-to-right scan with local longest-match priority: - // at each position try bigram first (strips punctuation so "West Bank," works), - // then fall back to unigram. This preserves document order so the first - // country mentioned in the headline wins regardless of alias length. - const words = lower.split(/\s+/); + const words = normalized.split(/\s+/).filter(Boolean); for (let i = 0; i < words.length; i++) { if (i < words.length - 1) { const left = words[i].replace(/[^a-z]/g, ''); diff --git a/scripts/shared/iso2-to-iso3.json b/scripts/shared/iso2-to-iso3.json new file mode 100644 index 000000000..92a213357 --- /dev/null +++ b/scripts/shared/iso2-to-iso3.json @@ -0,0 +1,241 @@ +{ + "AD": "AND", + "AE": "ARE", + "AF": "AFG", + "AG": "ATG", + "AI": "AIA", + "AL": "ALB", + "AM": "ARM", + "AO": "AGO", + "AQ": "ATA", + "AR": "ARG", + "AS": "ASM", + "AT": "AUT", + "AU": "AUS", + "AW": "ABW", + "AX": "ALA", + "AZ": "AZE", + "BA": "BIH", + "BB": "BRB", + "BD": "BGD", + "BE": "BEL", + "BF": "BFA", + "BG": "BGR", + "BH": "BHR", + "BI": "BDI", + "BJ": "BEN", + "BL": "BLM", + "BM": "BMU", + "BN": "BRN", + "BO": "BOL", + "BR": "BRA", + "BS": "BHS", + "BT": "BTN", + "BW": "BWA", + "BY": "BLR", + "BZ": "BLZ", + "CA": "CAN", + "CD": "COD", + "CF": "CAF", + "CG": "COG", + "CH": "CHE", + "CI": "CIV", + "CK": "COK", + "CL": "CHL", + "CM": "CMR", + "CN": "CHN", + "CO": "COL", + "CR": "CRI", + "CU": "CUB", + "CV": "CPV", + "CW": "CUW", + "CY": "CYP", + "CZ": "CZE", + "DE": "DEU", + "DJ": "DJI", + "DK": "DNK", + "DM": "DMA", + "DO": "DOM", + "DZ": "DZA", + "EC": "ECU", + "EE": "EST", + "EG": "EGY", + "EH": "ESH", + "ER": "ERI", + "ES": "ESP", + "ET": "ETH", + "FI": "FIN", + "FJ": "FJI", + "FK": "FLK", + "FM": "FSM", + "FO": "FRO", + "FR": "FRA", + "GA": "GAB", + "GB": "GBR", + "GD": "GRD", + "GE": "GEO", + "GG": "GGY", + "GH": "GHA", + "GI": "GIB", + "GL": "GRL", + "GM": "GMB", + "GN": "GIN", + "GQ": "GNQ", + "GR": "GRC", + "GS": "SGS", + "GT": "GTM", + "GU": "GUM", + "GW": "GNB", + "GY": "GUY", + "HK": "HKG", + "HM": "HMD", + "HN": "HND", + "HR": "HRV", + "HT": "HTI", + "HU": "HUN", + "ID": "IDN", + "IE": "IRL", + "IL": "ISR", + "IM": "IMN", + "IN": "IND", + "IO": "IOT", + "IQ": "IRQ", + "IR": "IRN", + "IS": "ISL", + "IT": "ITA", + "JE": "JEY", + "JM": "JAM", + "JO": "JOR", + "JP": "JPN", + "KE": "KEN", + "KG": "KGZ", + "KH": "KHM", + "KI": "KIR", + "KM": "COM", + "KN": "KNA", + "KP": "PRK", + "KR": "KOR", + "KW": "KWT", + "KY": "CYM", + "KZ": "KAZ", + "LA": "LAO", + "LB": "LBN", + "LC": "LCA", + "LI": "LIE", + "LK": "LKA", + "LR": "LBR", + "LS": "LSO", + "LT": "LTU", + "LU": "LUX", + "LV": "LVA", + "LY": "LBY", + "MA": "MAR", + "MC": "MCO", + "MD": "MDA", + "ME": "MNE", + "MF": "MAF", + "MG": "MDG", + "MH": "MHL", + "MK": "MKD", + "ML": "MLI", + "MM": "MMR", + "MN": "MNG", + "MO": "MAC", + "MP": "MNP", + "MR": "MRT", + "MS": "MSR", + "MT": "MLT", + "MU": "MUS", + "MV": "MDV", + "MW": "MWI", + "MX": "MEX", + "MY": "MYS", + "MZ": "MOZ", + "NA": "NAM", + "NC": "NCL", + "NE": "NER", + "NF": "NFK", + "NG": "NGA", + "NI": "NIC", + "NL": "NLD", + "NO": "NOR", + "NP": "NPL", + "NR": "NRU", + "NU": "NIU", + "NZ": "NZL", + "OM": "OMN", + "PA": "PAN", + "PE": "PER", + "PF": "PYF", + "PG": "PNG", + "PH": "PHL", + "PK": "PAK", + "PL": "POL", + "PM": "SPM", + "PN": "PCN", + "PR": "PRI", + "PS": "PSE", + "PT": "PRT", + "PW": "PLW", + "PY": "PRY", + "QA": "QAT", + "RO": "ROU", + "RS": "SRB", + "RU": "RUS", + "RW": "RWA", + "SA": "SAU", + "SB": "SLB", + "SC": "SYC", + "SD": "SDN", + "SE": "SWE", + "SG": "SGP", + "SH": "SHN", + "SI": "SVN", + "SK": "SVK", + "SL": "SLE", + "SM": "SMR", + "SN": "SEN", + "SO": "SOM", + "SR": "SUR", + "SS": "SSD", + "ST": "STP", + "SV": "SLV", + "SX": "SXM", + "SY": "SYR", + "SZ": "SWZ", + "TC": "TCA", + "TD": "TCD", + "TF": "ATF", + "TG": "TGO", + "TH": "THA", + "TJ": "TJK", + "TL": "TLS", + "TM": "TKM", + "TN": "TUN", + "TO": "TON", + "TR": "TUR", + "TT": "TTO", + "TV": "TUV", + "TW": "TWN", + "TZ": "TZA", + "UA": "UKR", + "UG": "UGA", + "UM": "UMI", + "US": "USA", + "UY": "URY", + "UZ": "UZB", + "VA": "VAT", + "VC": "VCT", + "VE": "VEN", + "VG": "VGB", + "VI": "VIR", + "VN": "VNM", + "VU": "VUT", + "WF": "WLF", + "WS": "WSM", + "XK": "XKX", + "YE": "YEM", + "ZA": "ZAF", + "ZM": "ZMB", + "ZW": "ZWE" +} diff --git a/scripts/shared/iso3-to-iso2.json b/scripts/shared/iso3-to-iso2.json new file mode 100644 index 000000000..ec90cfb2d --- /dev/null +++ b/scripts/shared/iso3-to-iso2.json @@ -0,0 +1,241 @@ +{ + "ABW": "AW", + "AFG": "AF", + "AGO": "AO", + "AIA": "AI", + "ALA": "AX", + "ALB": "AL", + "AND": "AD", + "ARE": "AE", + "ARG": "AR", + "ARM": "AM", + "ASM": "AS", + "ATA": "AQ", + "ATF": "TF", + "ATG": "AG", + "AUS": "AU", + "AUT": "AT", + "AZE": "AZ", + "BDI": "BI", + "BEL": "BE", + "BEN": "BJ", + "BFA": "BF", + "BGD": "BD", + "BGR": "BG", + "BHR": "BH", + "BHS": "BS", + "BIH": "BA", + "BLM": "BL", + "BLR": "BY", + "BLZ": "BZ", + "BMU": "BM", + "BOL": "BO", + "BRA": "BR", + "BRB": "BB", + "BRN": "BN", + "BTN": "BT", + "BWA": "BW", + "CAF": "CF", + "CAN": "CA", + "CHE": "CH", + "CHL": "CL", + "CHN": "CN", + "CIV": "CI", + "CMR": "CM", + "COD": "CD", + "COG": "CG", + "COK": "CK", + "COL": "CO", + "COM": "KM", + "CPV": "CV", + "CRI": "CR", + "CUB": "CU", + "CUW": "CW", + "CYM": "KY", + "CYP": "CY", + "CZE": "CZ", + "DEU": "DE", + "DJI": "DJ", + "DMA": "DM", + "DNK": "DK", + "DOM": "DO", + "DZA": "DZ", + "ECU": "EC", + "EGY": "EG", + "ERI": "ER", + "ESH": "EH", + "ESP": "ES", + "EST": "EE", + "ETH": "ET", + "FIN": "FI", + "FJI": "FJ", + "FLK": "FK", + "FRA": "FR", + "FRO": "FO", + "FSM": "FM", + "GAB": "GA", + "GBR": "GB", + "GEO": "GE", + "GGY": "GG", + "GHA": "GH", + "GIB": "GI", + "GIN": "GN", + "GMB": "GM", + "GNB": "GW", + "GNQ": "GQ", + "GRC": "GR", + "GRD": "GD", + "GRL": "GL", + "GTM": "GT", + "GUM": "GU", + "GUY": "GY", + "HKG": "HK", + "HMD": "HM", + "HND": "HN", + "HRV": "HR", + "HTI": "HT", + "HUN": "HU", + "IDN": "ID", + "IMN": "IM", + "IND": "IN", + "IOT": "IO", + "IRL": "IE", + "IRN": "IR", + "IRQ": "IQ", + "ISL": "IS", + "ISR": "IL", + "ITA": "IT", + "JAM": "JM", + "JEY": "JE", + "JOR": "JO", + "JPN": "JP", + "KAZ": "KZ", + "KEN": "KE", + "KGZ": "KG", + "KHM": "KH", + "KIR": "KI", + "KNA": "KN", + "KOR": "KR", + "KWT": "KW", + "LAO": "LA", + "LBN": "LB", + "LBR": "LR", + "LBY": "LY", + "LCA": "LC", + "LIE": "LI", + "LKA": "LK", + "LSO": "LS", + "LTU": "LT", + "LUX": "LU", + "LVA": "LV", + "MAC": "MO", + "MAF": "MF", + "MAR": "MA", + "MCO": "MC", + "MDA": "MD", + "MDG": "MG", + "MDV": "MV", + "MEX": "MX", + "MHL": "MH", + "MKD": "MK", + "MLI": "ML", + "MLT": "MT", + "MMR": "MM", + "MNE": "ME", + "MNG": "MN", + "MNP": "MP", + "MOZ": "MZ", + "MRT": "MR", + "MSR": "MS", + "MUS": "MU", + "MWI": "MW", + "MYS": "MY", + "NAM": "NA", + "NCL": "NC", + "NER": "NE", + "NFK": "NF", + "NGA": "NG", + "NIC": "NI", + "NIU": "NU", + "NLD": "NL", + "NOR": "NO", + "NPL": "NP", + "NRU": "NR", + "NZL": "NZ", + "OMN": "OM", + "PAK": "PK", + "PAN": "PA", + "PCN": "PN", + "PER": "PE", + "PHL": "PH", + "PLW": "PW", + "PNG": "PG", + "POL": "PL", + "PRI": "PR", + "PRK": "KP", + "PRT": "PT", + "PRY": "PY", + "PSE": "PS", + "PYF": "PF", + "QAT": "QA", + "ROU": "RO", + "RUS": "RU", + "RWA": "RW", + "SAU": "SA", + "SDN": "SD", + "SEN": "SN", + "SGP": "SG", + "SGS": "GS", + "SHN": "SH", + "SLB": "SB", + "SLE": "SL", + "SLV": "SV", + "SMR": "SM", + "SOM": "SO", + "SPM": "PM", + "SRB": "RS", + "SSD": "SS", + "STP": "ST", + "SUR": "SR", + "SVK": "SK", + "SVN": "SI", + "SWE": "SE", + "SWZ": "SZ", + "SXM": "SX", + "SYC": "SC", + "SYR": "SY", + "TCA": "TC", + "TCD": "TD", + "TGO": "TG", + "THA": "TH", + "TJK": "TJ", + "TKM": "TM", + "TLS": "TL", + "TON": "TO", + "TTO": "TT", + "TUN": "TN", + "TUR": "TR", + "TUV": "TV", + "TWN": "TW", + "TZA": "TZ", + "UGA": "UG", + "UKR": "UA", + "UMI": "UM", + "URY": "UY", + "USA": "US", + "UZB": "UZ", + "VAT": "VA", + "VCT": "VC", + "VEN": "VE", + "VGB": "VG", + "VIR": "VI", + "VNM": "VN", + "VUT": "VU", + "WLF": "WF", + "WSM": "WS", + "XKX": "XK", + "YEM": "YE", + "ZAF": "ZA", + "ZMB": "ZM", + "ZWE": "ZW" +} diff --git a/server/worldmonitor/conflict/v1/_shared.ts b/server/worldmonitor/conflict/v1/_shared.ts index b528a7cce..f24096630 100644 --- a/server/worldmonitor/conflict/v1/_shared.ts +++ b/server/worldmonitor/conflict/v1/_shared.ts @@ -1,11 +1,5 @@ +import iso2ToIso3Json from '../../../../shared/iso2-to-iso3.json'; + export const UPSTREAM_TIMEOUT_MS = 15_000; -export const ISO2_TO_ISO3: Record = { - US: 'USA', RU: 'RUS', CN: 'CHN', UA: 'UKR', IR: 'IRN', - IL: 'ISR', TW: 'TWN', KP: 'PRK', SA: 'SAU', TR: 'TUR', - PL: 'POL', DE: 'DEU', FR: 'FRA', GB: 'GBR', IN: 'IND', - PK: 'PAK', SY: 'SYR', YE: 'YEM', MM: 'MMR', VE: 'VEN', - AF: 'AFG', SD: 'SDN', SS: 'SSD', SO: 'SOM', CD: 'COD', - ET: 'ETH', IQ: 'IRQ', CO: 'COL', NG: 'NGA', PS: 'PSE', - BR: 'BRA', AE: 'ARE', -}; +export const ISO2_TO_ISO3: Record = iso2ToIso3Json; diff --git a/server/worldmonitor/intelligence/v1/get-risk-scores.ts b/server/worldmonitor/intelligence/v1/get-risk-scores.ts index 1f35cae4e..d24fc80f6 100644 --- a/server/worldmonitor/intelligence/v1/get-risk-scores.ts +++ b/server/worldmonitor/intelligence/v1/get-risk-scores.ts @@ -8,6 +8,7 @@ import type { SeverityLevel, } from '../../../../src/generated/server/worldmonitor/intelligence/v1/service_server'; +import iso3ToIso2Json from '../../../../shared/iso3-to-iso2.json'; import { getCachedJson, setCachedJson, cachedFetchJsonWithMeta } from '../../../_shared/redis'; import { CLIMATE_ANOMALIES_KEY } from '../../../_shared/cache-keys'; import { TIER1_COUNTRIES } from './_shared'; @@ -141,15 +142,7 @@ function safeNum(v: unknown): number { return Number.isFinite(n) ? n : 0; } -// ISO3 → ISO2 mapping for displacement data (UNHCR uses ISO3) -const ISO3_TO_ISO2: Record = { - USA: 'US', RUS: 'RU', CHN: 'CN', UKR: 'UA', IRN: 'IR', ISR: 'IL', - TWN: 'TW', PRK: 'KP', SAU: 'SA', TUR: 'TR', POL: 'PL', DEU: 'DE', - FRA: 'FR', GBR: 'GB', IND: 'IN', PAK: 'PK', SYR: 'SY', YEM: 'YE', - MMR: 'MM', VEN: 'VE', CUB: 'CU', MEX: 'MX', BRA: 'BR', ARE: 'AE', - KOR: 'KR', IRQ: 'IQ', AFG: 'AF', LBN: 'LB', EGY: 'EG', JPN: 'JP', - QAT: 'QA', -}; +const ISO3_TO_ISO2: Record = iso3ToIso2Json; interface CountrySignals { protests: number; diff --git a/server/worldmonitor/resilience/v1/_dimension-scorers.ts b/server/worldmonitor/resilience/v1/_dimension-scorers.ts index 05988f4ea..68cd9fcd9 100644 --- a/server/worldmonitor/resilience/v1/_dimension-scorers.ts +++ b/server/worldmonitor/resilience/v1/_dimension-scorers.ts @@ -1,8 +1,5 @@ -import { readFileSync } from 'node:fs'; -import { dirname, join } from 'node:path'; -import { fileURLToPath } from 'node:url'; - import countryNames from '../../../../shared/country-names.json'; +import iso2ToIso3Json from '../../../../shared/iso2-to-iso3.json'; import { normalizeCountryToken } from '../../../_shared/country-token'; import { getCachedJson } from '../../../_shared/redis'; @@ -164,16 +161,7 @@ for (const [name, iso2] of Object.entries(countryNames as Record COUNTRY_NAME_ALIASES.set(code, current); } -const ISO2_TO_ISO3: Record = {}; -{ - const __dirname = dirname(fileURLToPath(import.meta.url)); - const geojson = JSON.parse(readFileSync(join(__dirname, '../../../../public/data/countries.geojson'), 'utf8')); - for (const feature of geojson?.features ?? []) { - const iso2 = String(feature?.properties?.['ISO3166-1-Alpha-2'] ?? '').toUpperCase(); - const iso3 = String(feature?.properties?.['ISO3166-1-Alpha-3'] ?? '').toUpperCase(); - if (/^[A-Z]{2}$/.test(iso2) && /^[A-Z]{3}$/.test(iso3)) ISO2_TO_ISO3[iso2] = iso3; - } -} +const ISO2_TO_ISO3: Record = iso2ToIso3Json; const RESILIENCE_DOMAIN_WEIGHTS: Record = { economic: 0.22, diff --git a/shared/country-names.json b/shared/country-names.json index eb7586ea1..1c9b7a173 100644 --- a/shared/country-names.json +++ b/shared/country-names.json @@ -16,6 +16,7 @@ "austria": "AT", "azerbaijan": "AZ", "bahamas": "BS", + "bahamas the": "BS", "bahrain": "BH", "bangladesh": "BD", "barbados": "BB", @@ -25,6 +26,7 @@ "benin": "BJ", "bermuda": "BM", "bhutan": "BT", + "bolivarian republic of venezuela": "VE", "bolivia": "BO", "bosnia and herzegovina": "BA", "botswana": "BW", @@ -49,25 +51,32 @@ "china": "CN", "colombia": "CO", "comoros": "KM", + "congo": "CG", + "congo brazzaville": "CG", + "congo dem rep": "CD", + "congo kinshasa": "CD", + "congo rep": "CG", "cook islands": "CK", "costa rica": "CR", "cote d ivoire": "CI", - "cote d'ivoire": "CI", "croatia": "HR", "cuba": "CU", - "curaçao": "CW", + "curacao": "CW", "cyprus": "CY", "czech republic": "CZ", "czechia": "CZ", + "democratic peoples republic of korea": "KP", "democratic republic of the congo": "CD", "denmark": "DK", "djibouti": "DJ", "dominica": "DM", "dominican republic": "DO", "dr congo": "CD", + "drc": "CD", "east timor": "TL", "ecuador": "EC", "egypt": "EG", + "egypt arab rep": "EG", "el salvador": "SV", "equatorial guinea": "GQ", "eritrea": "ER", @@ -84,6 +93,7 @@ "french southern and antarctic lands": "TF", "gabon": "GA", "gambia": "GM", + "gambia the": "GM", "gaza": "PS", "georgia": "GE", "germany": "DE", @@ -96,17 +106,20 @@ "guatemala": "GT", "guernsey": "GG", "guinea": "GN", - "guinea-bissau": "GW", + "guinea bissau": "GW", "guyana": "GY", "haiti": "HT", "heard island and mcdonald islands": "HM", "honduras": "HN", - "hong kong s.a.r.": "HK", + "hong kong": "HK", + "hong kong s a r": "HK", + "hong kong sar china": "HK", "hungary": "HU", "iceland": "IS", "india": "IN", "indonesia": "ID", "iran": "IR", + "iran islamic rep": "IR", "iraq": "IQ", "ireland": "IE", "isle of man": "IM", @@ -120,6 +133,8 @@ "kazakhstan": "KZ", "kenya": "KE", "kiribati": "KI", + "korea dem peoples rep": "KP", + "korea rep": "KR", "kosovo": "XK", "kuwait": "KW", "kyrgyz republic": "KG", @@ -134,7 +149,8 @@ "liechtenstein": "LI", "lithuania": "LT", "luxembourg": "LU", - "macao s.a.r": "MO", + "macao s a r": "MO", + "macao sar china": "MO", "madagascar": "MG", "malawi": "MW", "malaysia": "MY", @@ -146,12 +162,14 @@ "mauritius": "MU", "mexico": "MX", "micronesia": "FM", + "micronesia fed sts": "FM", "moldova": "MD", "monaco": "MC", "mongolia": "MN", "montenegro": "ME", "montserrat": "MS", "morocco": "MA", + "morocco western sahara": "MA", "mozambique": "MZ", "myanmar": "MM", "namibia": "NA", @@ -169,24 +187,30 @@ "north macedonia": "MK", "northern mariana islands": "MP", "norway": "NO", + "occupied palestinian territory": "PS", "oman": "OM", "pakistan": "PK", "palau": "PW", "palestine": "PS", + "palestine state of": "PS", + "palestinian territories": "PS", "panama": "PA", "papua new guinea": "PG", "paraguay": "PY", "peru": "PE", "philippines": "PH", "pitcairn islands": "PN", + "plurinational state of bolivia": "BO", "poland": "PL", "portugal": "PT", "puerto rico": "PR", "qatar": "QA", + "republic of korea": "KR", "republic of serbia": "RS", "republic of the congo": "CG", "romania": "RO", "russia": "RU", + "russian federation": "RU", "rwanda": "RW", "saint barthelemy": "BL", "saint helena": "SH", @@ -197,13 +221,16 @@ "saint vincent and the grenadines": "VC", "samoa": "WS", "san marino": "SM", - "são tomé and principe": "ST", + "sao tome": "ST", + "sao tome and principe": "ST", "saudi arabia": "SA", "senegal": "SN", + "serbia": "RS", "seychelles": "SC", "sierra leone": "SL", "singapore": "SG", "sint maarten": "SX", + "slovak republic": "SK", "slovakia": "SK", "slovenia": "SI", "solomon islands": "SB", @@ -214,13 +241,19 @@ "south sudan": "SS", "spain": "ES", "sri lanka": "LK", + "st kitts and nevis": "KN", + "st lucia": "LC", + "st vincent and the grenadines": "VC", "sudan": "SD", "suriname": "SR", "swaziland": "SZ", "sweden": "SE", "switzerland": "CH", "syria": "SY", + "syrian arab republic": "SY", + "taiwan": "TW", "tajikistan": "TJ", + "tanzania": "TZ", "thailand": "TH", "the bahamas": "BS", "the comoros": "KM", @@ -229,17 +262,18 @@ "the netherlands": "NL", "the philippines": "PH", "the seychelles": "SC", - "timor-leste": "TL", + "timor leste": "TL", "togo": "TG", "tonga": "TO", "trinidad and tobago": "TT", "tunisia": "TN", "turkey": "TR", + "turkiye": "TR", "turkmenistan": "TM", "turks and caicos": "TC", "turks and caicos islands": "TC", "tuvalu": "TV", - "u.s. virgin islands": "VI", + "u s virgin islands": "VI", "uae": "AE", "uganda": "UG", "uk": "GB", @@ -257,11 +291,15 @@ "vanuatu": "VU", "vatican": "VA", "venezuela": "VE", + "venezuela rb": "VE", + "viet nam": "VN", "vietnam": "VN", "wallis and futuna": "WF", "west bank": "PS", + "west bank and gaza": "PS", "western sahara": "EH", "yemen": "YE", + "yemen rep": "YE", "zambia": "ZM", "zimbabwe": "ZW" } diff --git a/shared/iso2-to-iso3.json b/shared/iso2-to-iso3.json new file mode 100644 index 000000000..92a213357 --- /dev/null +++ b/shared/iso2-to-iso3.json @@ -0,0 +1,241 @@ +{ + "AD": "AND", + "AE": "ARE", + "AF": "AFG", + "AG": "ATG", + "AI": "AIA", + "AL": "ALB", + "AM": "ARM", + "AO": "AGO", + "AQ": "ATA", + "AR": "ARG", + "AS": "ASM", + "AT": "AUT", + "AU": "AUS", + "AW": "ABW", + "AX": "ALA", + "AZ": "AZE", + "BA": "BIH", + "BB": "BRB", + "BD": "BGD", + "BE": "BEL", + "BF": "BFA", + "BG": "BGR", + "BH": "BHR", + "BI": "BDI", + "BJ": "BEN", + "BL": "BLM", + "BM": "BMU", + "BN": "BRN", + "BO": "BOL", + "BR": "BRA", + "BS": "BHS", + "BT": "BTN", + "BW": "BWA", + "BY": "BLR", + "BZ": "BLZ", + "CA": "CAN", + "CD": "COD", + "CF": "CAF", + "CG": "COG", + "CH": "CHE", + "CI": "CIV", + "CK": "COK", + "CL": "CHL", + "CM": "CMR", + "CN": "CHN", + "CO": "COL", + "CR": "CRI", + "CU": "CUB", + "CV": "CPV", + "CW": "CUW", + "CY": "CYP", + "CZ": "CZE", + "DE": "DEU", + "DJ": "DJI", + "DK": "DNK", + "DM": "DMA", + "DO": "DOM", + "DZ": "DZA", + "EC": "ECU", + "EE": "EST", + "EG": "EGY", + "EH": "ESH", + "ER": "ERI", + "ES": "ESP", + "ET": "ETH", + "FI": "FIN", + "FJ": "FJI", + "FK": "FLK", + "FM": "FSM", + "FO": "FRO", + "FR": "FRA", + "GA": "GAB", + "GB": "GBR", + "GD": "GRD", + "GE": "GEO", + "GG": "GGY", + "GH": "GHA", + "GI": "GIB", + "GL": "GRL", + "GM": "GMB", + "GN": "GIN", + "GQ": "GNQ", + "GR": "GRC", + "GS": "SGS", + "GT": "GTM", + "GU": "GUM", + "GW": "GNB", + "GY": "GUY", + "HK": "HKG", + "HM": "HMD", + "HN": "HND", + "HR": "HRV", + "HT": "HTI", + "HU": "HUN", + "ID": "IDN", + "IE": "IRL", + "IL": "ISR", + "IM": "IMN", + "IN": "IND", + "IO": "IOT", + "IQ": "IRQ", + "IR": "IRN", + "IS": "ISL", + "IT": "ITA", + "JE": "JEY", + "JM": "JAM", + "JO": "JOR", + "JP": "JPN", + "KE": "KEN", + "KG": "KGZ", + "KH": "KHM", + "KI": "KIR", + "KM": "COM", + "KN": "KNA", + "KP": "PRK", + "KR": "KOR", + "KW": "KWT", + "KY": "CYM", + "KZ": "KAZ", + "LA": "LAO", + "LB": "LBN", + "LC": "LCA", + "LI": "LIE", + "LK": "LKA", + "LR": "LBR", + "LS": "LSO", + "LT": "LTU", + "LU": "LUX", + "LV": "LVA", + "LY": "LBY", + "MA": "MAR", + "MC": "MCO", + "MD": "MDA", + "ME": "MNE", + "MF": "MAF", + "MG": "MDG", + "MH": "MHL", + "MK": "MKD", + "ML": "MLI", + "MM": "MMR", + "MN": "MNG", + "MO": "MAC", + "MP": "MNP", + "MR": "MRT", + "MS": "MSR", + "MT": "MLT", + "MU": "MUS", + "MV": "MDV", + "MW": "MWI", + "MX": "MEX", + "MY": "MYS", + "MZ": "MOZ", + "NA": "NAM", + "NC": "NCL", + "NE": "NER", + "NF": "NFK", + "NG": "NGA", + "NI": "NIC", + "NL": "NLD", + "NO": "NOR", + "NP": "NPL", + "NR": "NRU", + "NU": "NIU", + "NZ": "NZL", + "OM": "OMN", + "PA": "PAN", + "PE": "PER", + "PF": "PYF", + "PG": "PNG", + "PH": "PHL", + "PK": "PAK", + "PL": "POL", + "PM": "SPM", + "PN": "PCN", + "PR": "PRI", + "PS": "PSE", + "PT": "PRT", + "PW": "PLW", + "PY": "PRY", + "QA": "QAT", + "RO": "ROU", + "RS": "SRB", + "RU": "RUS", + "RW": "RWA", + "SA": "SAU", + "SB": "SLB", + "SC": "SYC", + "SD": "SDN", + "SE": "SWE", + "SG": "SGP", + "SH": "SHN", + "SI": "SVN", + "SK": "SVK", + "SL": "SLE", + "SM": "SMR", + "SN": "SEN", + "SO": "SOM", + "SR": "SUR", + "SS": "SSD", + "ST": "STP", + "SV": "SLV", + "SX": "SXM", + "SY": "SYR", + "SZ": "SWZ", + "TC": "TCA", + "TD": "TCD", + "TF": "ATF", + "TG": "TGO", + "TH": "THA", + "TJ": "TJK", + "TL": "TLS", + "TM": "TKM", + "TN": "TUN", + "TO": "TON", + "TR": "TUR", + "TT": "TTO", + "TV": "TUV", + "TW": "TWN", + "TZ": "TZA", + "UA": "UKR", + "UG": "UGA", + "UM": "UMI", + "US": "USA", + "UY": "URY", + "UZ": "UZB", + "VA": "VAT", + "VC": "VCT", + "VE": "VEN", + "VG": "VGB", + "VI": "VIR", + "VN": "VNM", + "VU": "VUT", + "WF": "WLF", + "WS": "WSM", + "XK": "XKX", + "YE": "YEM", + "ZA": "ZAF", + "ZM": "ZMB", + "ZW": "ZWE" +} diff --git a/shared/iso3-to-iso2.json b/shared/iso3-to-iso2.json new file mode 100644 index 000000000..ec90cfb2d --- /dev/null +++ b/shared/iso3-to-iso2.json @@ -0,0 +1,241 @@ +{ + "ABW": "AW", + "AFG": "AF", + "AGO": "AO", + "AIA": "AI", + "ALA": "AX", + "ALB": "AL", + "AND": "AD", + "ARE": "AE", + "ARG": "AR", + "ARM": "AM", + "ASM": "AS", + "ATA": "AQ", + "ATF": "TF", + "ATG": "AG", + "AUS": "AU", + "AUT": "AT", + "AZE": "AZ", + "BDI": "BI", + "BEL": "BE", + "BEN": "BJ", + "BFA": "BF", + "BGD": "BD", + "BGR": "BG", + "BHR": "BH", + "BHS": "BS", + "BIH": "BA", + "BLM": "BL", + "BLR": "BY", + "BLZ": "BZ", + "BMU": "BM", + "BOL": "BO", + "BRA": "BR", + "BRB": "BB", + "BRN": "BN", + "BTN": "BT", + "BWA": "BW", + "CAF": "CF", + "CAN": "CA", + "CHE": "CH", + "CHL": "CL", + "CHN": "CN", + "CIV": "CI", + "CMR": "CM", + "COD": "CD", + "COG": "CG", + "COK": "CK", + "COL": "CO", + "COM": "KM", + "CPV": "CV", + "CRI": "CR", + "CUB": "CU", + "CUW": "CW", + "CYM": "KY", + "CYP": "CY", + "CZE": "CZ", + "DEU": "DE", + "DJI": "DJ", + "DMA": "DM", + "DNK": "DK", + "DOM": "DO", + "DZA": "DZ", + "ECU": "EC", + "EGY": "EG", + "ERI": "ER", + "ESH": "EH", + "ESP": "ES", + "EST": "EE", + "ETH": "ET", + "FIN": "FI", + "FJI": "FJ", + "FLK": "FK", + "FRA": "FR", + "FRO": "FO", + "FSM": "FM", + "GAB": "GA", + "GBR": "GB", + "GEO": "GE", + "GGY": "GG", + "GHA": "GH", + "GIB": "GI", + "GIN": "GN", + "GMB": "GM", + "GNB": "GW", + "GNQ": "GQ", + "GRC": "GR", + "GRD": "GD", + "GRL": "GL", + "GTM": "GT", + "GUM": "GU", + "GUY": "GY", + "HKG": "HK", + "HMD": "HM", + "HND": "HN", + "HRV": "HR", + "HTI": "HT", + "HUN": "HU", + "IDN": "ID", + "IMN": "IM", + "IND": "IN", + "IOT": "IO", + "IRL": "IE", + "IRN": "IR", + "IRQ": "IQ", + "ISL": "IS", + "ISR": "IL", + "ITA": "IT", + "JAM": "JM", + "JEY": "JE", + "JOR": "JO", + "JPN": "JP", + "KAZ": "KZ", + "KEN": "KE", + "KGZ": "KG", + "KHM": "KH", + "KIR": "KI", + "KNA": "KN", + "KOR": "KR", + "KWT": "KW", + "LAO": "LA", + "LBN": "LB", + "LBR": "LR", + "LBY": "LY", + "LCA": "LC", + "LIE": "LI", + "LKA": "LK", + "LSO": "LS", + "LTU": "LT", + "LUX": "LU", + "LVA": "LV", + "MAC": "MO", + "MAF": "MF", + "MAR": "MA", + "MCO": "MC", + "MDA": "MD", + "MDG": "MG", + "MDV": "MV", + "MEX": "MX", + "MHL": "MH", + "MKD": "MK", + "MLI": "ML", + "MLT": "MT", + "MMR": "MM", + "MNE": "ME", + "MNG": "MN", + "MNP": "MP", + "MOZ": "MZ", + "MRT": "MR", + "MSR": "MS", + "MUS": "MU", + "MWI": "MW", + "MYS": "MY", + "NAM": "NA", + "NCL": "NC", + "NER": "NE", + "NFK": "NF", + "NGA": "NG", + "NIC": "NI", + "NIU": "NU", + "NLD": "NL", + "NOR": "NO", + "NPL": "NP", + "NRU": "NR", + "NZL": "NZ", + "OMN": "OM", + "PAK": "PK", + "PAN": "PA", + "PCN": "PN", + "PER": "PE", + "PHL": "PH", + "PLW": "PW", + "PNG": "PG", + "POL": "PL", + "PRI": "PR", + "PRK": "KP", + "PRT": "PT", + "PRY": "PY", + "PSE": "PS", + "PYF": "PF", + "QAT": "QA", + "ROU": "RO", + "RUS": "RU", + "RWA": "RW", + "SAU": "SA", + "SDN": "SD", + "SEN": "SN", + "SGP": "SG", + "SGS": "GS", + "SHN": "SH", + "SLB": "SB", + "SLE": "SL", + "SLV": "SV", + "SMR": "SM", + "SOM": "SO", + "SPM": "PM", + "SRB": "RS", + "SSD": "SS", + "STP": "ST", + "SUR": "SR", + "SVK": "SK", + "SVN": "SI", + "SWE": "SE", + "SWZ": "SZ", + "SXM": "SX", + "SYC": "SC", + "SYR": "SY", + "TCA": "TC", + "TCD": "TD", + "TGO": "TG", + "THA": "TH", + "TJK": "TJ", + "TKM": "TM", + "TLS": "TL", + "TON": "TO", + "TTO": "TT", + "TUN": "TN", + "TUR": "TR", + "TUV": "TV", + "TWN": "TW", + "TZA": "TZ", + "UGA": "UG", + "UKR": "UA", + "UMI": "UM", + "URY": "UY", + "USA": "US", + "UZB": "UZ", + "VAT": "VA", + "VCT": "VC", + "VEN": "VE", + "VGB": "VG", + "VIR": "VI", + "VNM": "VN", + "VUT": "VU", + "WLF": "WF", + "WSM": "WS", + "XKX": "XK", + "YEM": "YE", + "ZAF": "ZA", + "ZMB": "ZM", + "ZWE": "ZW" +} diff --git a/tests/country-resolver.test.mjs b/tests/country-resolver.test.mjs new file mode 100644 index 000000000..409a3cada --- /dev/null +++ b/tests/country-resolver.test.mjs @@ -0,0 +1,83 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; + +import { normalizeCountryToken, createCountryResolvers, resolveIso2, isIso2, isIso3 } from '../scripts/_country-resolver.mjs'; + +const root = resolve(import.meta.dirname, '..'); +const countryNames = JSON.parse(readFileSync(resolve(root, 'shared/country-names.json'), 'utf8')); +const iso3ToIso2 = JSON.parse(readFileSync(resolve(root, 'shared/iso3-to-iso2.json'), 'utf8')); +const iso2ToIso3 = JSON.parse(readFileSync(resolve(root, 'shared/iso2-to-iso3.json'), 'utf8')); + +describe('country-names.json structural validation', () => { + it('every key equals normalizeCountryToken(key)', () => { + for (const key of Object.keys(countryNames)) { + assert.equal(key, normalizeCountryToken(key), `key "${key}" is not normalized`); + } + }); + it('every value is a valid ISO2 code', () => { + for (const [key, value] of Object.entries(countryNames)) { + assert.ok(isIso2(value), `"${key}" → "${value}" is not valid ISO2`); + } + }); + it('has at least 300 entries', () => { + assert.ok(Object.keys(countryNames).length >= 300); + }); +}); + +describe('iso3-to-iso2.json validation', () => { + it('has at least 238 entries', () => { + assert.ok(Object.keys(iso3ToIso2).length >= 238); + }); + it('every key is valid ISO3, every value is valid ISO2', () => { + for (const [k, v] of Object.entries(iso3ToIso2)) { + assert.ok(isIso3(k), `key "${k}" not valid ISO3`); + assert.ok(isIso2(v), `value "${v}" not valid ISO2`); + } + }); + it('bidirectional consistency with iso2-to-iso3', () => { + for (const [iso2, iso3] of Object.entries(iso2ToIso3)) { + assert.equal(iso3ToIso2[iso3], iso2, `iso3ToIso2[${iso3}] !== ${iso2}`); + } + }); + it('resolves Taiwan and Kosovo', () => { + assert.equal(iso3ToIso2['TWN'], 'TW'); + assert.equal(iso3ToIso2['XKX'], 'XK'); + }); +}); + +describe('resolver parity', () => { + const resolvers = createCountryResolvers(); + + const oldAliases = { + 'bahamas the': 'BS', 'cape verde': 'CV', 'congo brazzaville': 'CG', + 'congo kinshasa': 'CD', 'congo rep': 'CG', 'congo dem rep': 'CD', + 'czech republic': 'CZ', 'egypt arab rep': 'EG', 'gambia the': 'GM', + 'hong kong sar china': 'HK', 'iran islamic rep': 'IR', + 'korea dem peoples rep': 'KP', 'korea rep': 'KR', 'lao pdr': 'LA', + 'macao sar china': 'MO', 'micronesia fed sts': 'FM', + 'morocco western sahara': 'MA', 'north macedonia': 'MK', + 'occupied palestinian territory': 'PS', 'palestinian territories': 'PS', + 'palestine state of': 'PS', 'russian federation': 'RU', + 'slovak republic': 'SK', 'st kitts and nevis': 'KN', 'st lucia': 'LC', + 'st vincent and the grenadines': 'VC', 'syrian arab republic': 'SY', + 'the bahamas': 'BS', 'timor leste': 'TL', 'turkiye': 'TR', + 'united states of america': 'US', 'venezuela rb': 'VE', + 'viet nam': 'VN', 'west bank and gaza': 'PS', 'yemen rep': 'YE', + }; + + it('resolves all old COUNTRY_ALIAS_MAP entries', () => { + for (const [name, expected] of Object.entries(oldAliases)) { + const result = resolveIso2({ name }, resolvers); + assert.equal(result, expected, `"${name}" → ${result}, expected ${expected}`); + } + }); + + it('resolves ISO3 codes', () => { + assert.equal(resolveIso2({ iso3: 'USA' }, resolvers), 'US'); + assert.equal(resolveIso2({ iso3: 'GBR' }, resolvers), 'GB'); + assert.equal(resolveIso2({ iso3: 'TWN' }, resolvers), 'TW'); + assert.equal(resolveIso2({ iso3: 'XKX' }, resolvers), 'XK'); + }); +}); diff --git a/tests/resilience-static-seed.test.mjs b/tests/resilience-static-seed.test.mjs index 6d4634032..314ae4afb 100644 --- a/tests/resilience-static-seed.test.mjs +++ b/tests/resilience-static-seed.test.mjs @@ -29,14 +29,7 @@ function makeResolvers() { yemen: 'YE', 'cape verde': 'CV', }, - { - features: [ - { properties: { name: 'Norway', 'ISO3166-1-Alpha-2': 'NO', 'ISO3166-1-Alpha-3': 'NOR' } }, - { properties: { name: 'United States', 'ISO3166-1-Alpha-2': 'US', 'ISO3166-1-Alpha-3': 'USA' } }, - { properties: { name: 'Yemen', 'ISO3166-1-Alpha-2': 'YE', 'ISO3166-1-Alpha-3': 'YEM' } }, - { properties: { name: 'Cape Verde', 'ISO3166-1-Alpha-2': 'CV', 'ISO3166-1-Alpha-3': 'CPV' } }, - ], - }, + { NOR: 'NO', USA: 'US', YEM: 'YE', CPV: 'CV' }, ); }