Files
worldmonitor/scripts/seed-vpd-tracker.mjs
Elie Habib e7ba05553d fix(health): disease outbreaks CDC/Outbreak feeds, VPD tracker seed, BOOTSTRAP_KEYS gold standard (#2378)
* feat(panels): Disease Outbreaks, Shipping Stress, Social Velocity, nuclear test site monitoring

- Add HealthService proto with ListDiseaseOutbreaks RPC (WHO + ProMED RSS)
- Add GetShippingStress RPC to SupplyChainService (Yahoo Finance carrier ETFs)
- Add GetSocialVelocity RPC to IntelligenceService (Reddit r/worldnews + r/geopolitics)
- Enrich earthquake seed with Haversine nuclear test-site proximity scoring
- Add 5 nuclear test sites to NUCLEAR_FACILITIES (Punggye-ri, Lop Nur, Novaya Zemlya, Nevada NTS, Semipalatinsk)
- Add shipping stress + social velocity seed loops to ais-relay.cjs
- Add seed-disease-outbreaks.mjs Railway cron script
- Wire all new RPCs: edge functions, handlers, gateway cache tiers, health.js STANDALONE_KEYS/SEED_META

* fix(relay): apply gold standard retry/TTL-extend pattern to shipping-stress and social-velocity seeders

* fix(review): address all PR #2375 review findings

- health.js: shippingStress maxStaleMin 30→45 (3x interval), socialVelocity 20→30 (3x interval)
- health.js: remove shippingStress/diseaseOutbreaks/socialVelocity from ON_DEMAND_KEYS (relay/cron seeds, not on-demand)
- cache-keys.ts: add shippingStress, diseaseOutbreaks, socialVelocity to BOOTSTRAP_CACHE_KEYS
- ais-relay.cjs: stressScore formula 50→40 (neutral market = moderate, not elevated)
- ais-relay.cjs: fetchedAt Date.now() (consistent with other seeders)
- ais-relay.cjs: deduplicate cross-subreddit article URLs in social velocity loop
- seed-disease-outbreaks.mjs: WHO URL → specific DON RSS endpoint (not dead general news feed)
- seed-disease-outbreaks.mjs: validate() requires outbreaks.length >= 1 (reject empty array)
- seed-disease-outbreaks.mjs: stable id using hash(link) not array index
- seed-disease-outbreaks.mjs: RSS regexes use [\s\S]*? for CDATA multiline content
- seed-earthquakes.mjs: Lop Nur coordinates corrected (41.39,89.03 not 41.75,88.35)
- seed-earthquakes.mjs: sourceVersion bumped to usgs-4.5-day-nuclear-v1
- earthquake.proto: fields 8-11 marked optional (distinguish not-enriched from enriched=false/0)
- buf generate: regenerate seismology service stubs

* revert(cache-keys): don't add new keys to bootstrap without frontend consumers

* fix(panels): address all P1/P2/P3 review findings for PR #2375

- proto: add INT64_ENCODING_NUMBER annotation + sebuf import to get_shipping_stress.proto (run make generate)
- bootstrap: register shippingStress (fast), socialVelocity (fast), diseaseOutbreaks (slow) in api/bootstrap.js + cache-keys.ts
- relay: update WIDGET_SYSTEM_PROMPT with new bootstrap keys and live RPCs for health/supply-chain/intelligence
- seeder: remove broken ProMED feed URL (promedmail.org/feed/ returns HTML 404); add 500K size guard to fetchRssItems; replace private COUNTRY_CODE_MAP with shared geo-extract.mjs; remove permanently-empty location field; bump sourceVersion to who-don-rss-v2
- handlers: remove dead .catch from all 3 new RPC handlers; fix stressLevel fallback to low; fix fetchedAt fallback to 0
- services: add fetchShippingStress, disease-outbreaks.ts, social-velocity.ts with getHydratedData consumers

* fix(health): move seeded keys to BOOTSTRAP_KEYS, add VPD tracker seed and feeds

- Reclassify diseaseOutbreaks, shippingStress, socialVelocity from
  STANDALONE_KEYS to BOOTSTRAP_KEYS so health endpoint reports CRIT
  (not WARN) when their seeds miss a cycle
- Add vpdTrackerRealtime and vpdTrackerHistorical to BOOTSTRAP_KEYS
  with SEED_META entries (maxStaleMin: 2880 = 2x daily interval)
- Fix seed-disease-outbreaks: add CDC and Outbreak News Today feeds
  alongside WHO, populate location field from title parsing, fix TTL
  to 259200s (3x daily interval per gold standard)
- Add seed-vpd-tracker.mjs: scrapes Think Global Health VPD Tracker
  bundle (1,827 realtime alerts + 25,960 historical WHO records),
  writes both Redis keys in one runSeed call via extraKeys
- Add review todos 049-059 from PR #2375 code review
2026-03-27 22:47:24 +04:00

128 lines
4.6 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env node
/**
* Seed: Think Global Health Vaccine-Preventable Disease Tracker
*
* Source: https://thinkglobalhealth.github.io/disease_tracker
* Both datasets are embedded in index_bundle.js (updated ~weekly by CFR staff).
* No API key required — the bundle is public GitHub Pages.
*
* Writes two Redis keys:
* health:vpd-tracker:realtime:v1 — geo-located outbreak alerts (lat/lng, cases, source URL)
* health:vpd-tracker:historical:v1 — WHO annual case counts by country/disease/year
*/
import { loadEnvFile, CHROME_UA, runSeed } from './_seed-utils.mjs';
loadEnvFile(import.meta.url);
const CANONICAL_KEY = 'health:vpd-tracker:realtime:v1';
const HISTORICAL_KEY = 'health:vpd-tracker:historical:v1';
const BUNDLE_URL = 'https://thinkglobalhealth.github.io/disease_tracker/index_bundle.js';
const CACHE_TTL = 259200; // 72h (3 days) — 3× daily cron interval per gold standard; survives 2 consecutive missed runs
/**
* Parse realtime outbreak alerts from the embedded object array.
*
* Bundle format (webpack CommonJS):
* var a=[{Alert_ID:"8731706",lat:"56.85",lng:"24.92",diseases:"Measles",...},
* ...
* {Alert_ID:"8707570",...}];
* a.columns=["Alert_ID","lat","lng","diseases","place_name","country","date","cases","link","Type","summary"]
*
* The .columns metadata property marks the end of the array.
*/
function parseRealtimeAlerts(bundle) {
const colIdx = bundle.indexOf('.columns=["Alert_ID"');
if (colIdx === -1) throw new Error('[VPD] Realtime data columns marker not found in bundle');
const arrayEnd = bundle.lastIndexOf('}]', colIdx);
const arrayStart = bundle.lastIndexOf('var a=[', arrayEnd);
if (arrayStart === -1) throw new Error('[VPD] Realtime data array start not found');
const rawArray = bundle.slice(arrayStart + 6, arrayEnd + 2); // skip 'var a='
// eslint-disable-next-line no-new-func
const rows = Function('"use strict"; return ' + rawArray)();
return rows
.filter(r => r.lat && r.lng)
.map(r => ({
alertId: r.Alert_ID,
lat: parseFloat(r.lat),
lng: parseFloat(r.lng),
disease: r.diseases,
placeName: r.place_name,
country: r.country,
date: r.date,
cases: r.cases ? parseInt(String(r.cases).replace(/,/g, ''), 10) || 0 : null,
sourceUrl: r.link,
summary: r.summary,
}));
}
/**
* Parse historical WHO annual case counts from the embedded JS object array.
*
* Bundle format (second dataset, follows immediately after realtime module):
* [{"country":"Afghanistan","iso":"AF","disease":"Diphtheria","year":"2024","cases":"207"}, ...]
*/
function parseHistoricalData(bundle) {
const colIdx = bundle.indexOf('.columns=["Alert_ID"');
if (colIdx === -1) throw new Error('[VPD] Bundle anchor not found for historical data search');
const arrayStart = bundle.indexOf('[{country:"', colIdx);
if (arrayStart === -1) throw new Error('[VPD] Historical data array not found');
const arrayEnd = bundle.indexOf('];', arrayStart);
if (arrayEnd === -1) throw new Error('[VPD] Historical data end marker not found');
const rawArray = bundle.slice(arrayStart, arrayEnd + 1);
// eslint-disable-next-line no-new-func
const rows = Function('"use strict"; return ' + rawArray)();
return rows.map(r => ({
country: r.country,
iso: r.iso,
disease: r.disease,
year: parseInt(r.year, 10),
cases: parseInt(r.cases, 10) || 0,
}));
}
async function fetchVpdTracker() {
const resp = await fetch(BUNDLE_URL, {
headers: { 'User-Agent': CHROME_UA },
signal: AbortSignal.timeout(30000),
});
if (!resp.ok) throw new Error(`[VPD] Bundle fetch failed: HTTP ${resp.status}`);
const bundle = await resp.text();
const alerts = parseRealtimeAlerts(bundle);
const historical = parseHistoricalData(bundle);
console.log(`[VPD] Realtime alerts: ${alerts.length} | Historical records: ${historical.length}`);
return { alerts, historical, fetchedAt: Date.now() };
}
function validate(data) {
return Array.isArray(data?.alerts) && data.alerts.length >= 10
&& Array.isArray(data?.historical) && data.historical.length >= 100;
}
runSeed('health', 'vpd-tracker', CANONICAL_KEY, fetchVpdTracker, {
validateFn: validate,
ttlSeconds: CACHE_TTL,
sourceVersion: 'tgh-bundle-v1',
extraKeys: [
{
key: HISTORICAL_KEY,
ttl: CACHE_TTL,
transform: data => ({ records: data.historical, fetchedAt: data.fetchedAt }),
},
],
}).catch((err) => {
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
console.error('FATAL:', (err.message || err) + _cause);
process.exit(1);
});