mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
* feat(panels): Disease Outbreaks, Shipping Stress, Social Velocity, nuclear test site monitoring - Add HealthService proto with ListDiseaseOutbreaks RPC (WHO + ProMED RSS) - Add GetShippingStress RPC to SupplyChainService (Yahoo Finance carrier ETFs) - Add GetSocialVelocity RPC to IntelligenceService (Reddit r/worldnews + r/geopolitics) - Enrich earthquake seed with Haversine nuclear test-site proximity scoring - Add 5 nuclear test sites to NUCLEAR_FACILITIES (Punggye-ri, Lop Nur, Novaya Zemlya, Nevada NTS, Semipalatinsk) - Add shipping stress + social velocity seed loops to ais-relay.cjs - Add seed-disease-outbreaks.mjs Railway cron script - Wire all new RPCs: edge functions, handlers, gateway cache tiers, health.js STANDALONE_KEYS/SEED_META * fix(relay): apply gold standard retry/TTL-extend pattern to shipping-stress and social-velocity seeders * fix(review): address all PR #2375 review findings - health.js: shippingStress maxStaleMin 30→45 (3x interval), socialVelocity 20→30 (3x interval) - health.js: remove shippingStress/diseaseOutbreaks/socialVelocity from ON_DEMAND_KEYS (relay/cron seeds, not on-demand) - cache-keys.ts: add shippingStress, diseaseOutbreaks, socialVelocity to BOOTSTRAP_CACHE_KEYS - ais-relay.cjs: stressScore formula 50→40 (neutral market = moderate, not elevated) - ais-relay.cjs: fetchedAt Date.now() (consistent with other seeders) - ais-relay.cjs: deduplicate cross-subreddit article URLs in social velocity loop - seed-disease-outbreaks.mjs: WHO URL → specific DON RSS endpoint (not dead general news feed) - seed-disease-outbreaks.mjs: validate() requires outbreaks.length >= 1 (reject empty array) - seed-disease-outbreaks.mjs: stable id using hash(link) not array index - seed-disease-outbreaks.mjs: RSS regexes use [\s\S]*? for CDATA multiline content - seed-earthquakes.mjs: Lop Nur coordinates corrected (41.39,89.03 not 41.75,88.35) - seed-earthquakes.mjs: sourceVersion bumped to usgs-4.5-day-nuclear-v1 - earthquake.proto: fields 8-11 marked optional (distinguish not-enriched from enriched=false/0) - buf generate: regenerate seismology service stubs * revert(cache-keys): don't add new keys to bootstrap without frontend consumers * fix(panels): address all P1/P2/P3 review findings for PR #2375 - proto: add INT64_ENCODING_NUMBER annotation + sebuf import to get_shipping_stress.proto (run make generate) - bootstrap: register shippingStress (fast), socialVelocity (fast), diseaseOutbreaks (slow) in api/bootstrap.js + cache-keys.ts - relay: update WIDGET_SYSTEM_PROMPT with new bootstrap keys and live RPCs for health/supply-chain/intelligence - seeder: remove broken ProMED feed URL (promedmail.org/feed/ returns HTML 404); add 500K size guard to fetchRssItems; replace private COUNTRY_CODE_MAP with shared geo-extract.mjs; remove permanently-empty location field; bump sourceVersion to who-don-rss-v2 - handlers: remove dead .catch from all 3 new RPC handlers; fix stressLevel fallback to low; fix fetchedAt fallback to 0 - services: add fetchShippingStress, disease-outbreaks.ts, social-velocity.ts with getHydratedData consumers * fix(health): move seeded keys to BOOTSTRAP_KEYS, add VPD tracker seed and feeds - Reclassify diseaseOutbreaks, shippingStress, socialVelocity from STANDALONE_KEYS to BOOTSTRAP_KEYS so health endpoint reports CRIT (not WARN) when their seeds miss a cycle - Add vpdTrackerRealtime and vpdTrackerHistorical to BOOTSTRAP_KEYS with SEED_META entries (maxStaleMin: 2880 = 2x daily interval) - Fix seed-disease-outbreaks: add CDC and Outbreak News Today feeds alongside WHO, populate location field from title parsing, fix TTL to 259200s (3x daily interval per gold standard) - Add seed-vpd-tracker.mjs: scrapes Think Global Health VPD Tracker bundle (1,827 realtime alerts + 25,960 historical WHO records), writes both Redis keys in one runSeed call via extraKeys - Add review todos 049-059 from PR #2375 code review
128 lines
4.6 KiB
JavaScript
128 lines
4.6 KiB
JavaScript
#!/usr/bin/env node
|
||
|
||
/**
|
||
* Seed: Think Global Health Vaccine-Preventable Disease Tracker
|
||
*
|
||
* Source: https://thinkglobalhealth.github.io/disease_tracker
|
||
* Both datasets are embedded in index_bundle.js (updated ~weekly by CFR staff).
|
||
* No API key required — the bundle is public GitHub Pages.
|
||
*
|
||
* Writes two Redis keys:
|
||
* health:vpd-tracker:realtime:v1 — geo-located outbreak alerts (lat/lng, cases, source URL)
|
||
* health:vpd-tracker:historical:v1 — WHO annual case counts by country/disease/year
|
||
*/
|
||
|
||
import { loadEnvFile, CHROME_UA, runSeed } from './_seed-utils.mjs';
|
||
|
||
loadEnvFile(import.meta.url);
|
||
|
||
const CANONICAL_KEY = 'health:vpd-tracker:realtime:v1';
|
||
const HISTORICAL_KEY = 'health:vpd-tracker:historical:v1';
|
||
const BUNDLE_URL = 'https://thinkglobalhealth.github.io/disease_tracker/index_bundle.js';
|
||
const CACHE_TTL = 259200; // 72h (3 days) — 3× daily cron interval per gold standard; survives 2 consecutive missed runs
|
||
|
||
/**
|
||
* Parse realtime outbreak alerts from the embedded object array.
|
||
*
|
||
* Bundle format (webpack CommonJS):
|
||
* var a=[{Alert_ID:"8731706",lat:"56.85",lng:"24.92",diseases:"Measles",...},
|
||
* ...
|
||
* {Alert_ID:"8707570",...}];
|
||
* a.columns=["Alert_ID","lat","lng","diseases","place_name","country","date","cases","link","Type","summary"]
|
||
*
|
||
* The .columns metadata property marks the end of the array.
|
||
*/
|
||
function parseRealtimeAlerts(bundle) {
|
||
const colIdx = bundle.indexOf('.columns=["Alert_ID"');
|
||
if (colIdx === -1) throw new Error('[VPD] Realtime data columns marker not found in bundle');
|
||
|
||
const arrayEnd = bundle.lastIndexOf('}]', colIdx);
|
||
const arrayStart = bundle.lastIndexOf('var a=[', arrayEnd);
|
||
if (arrayStart === -1) throw new Error('[VPD] Realtime data array start not found');
|
||
|
||
const rawArray = bundle.slice(arrayStart + 6, arrayEnd + 2); // skip 'var a='
|
||
// eslint-disable-next-line no-new-func
|
||
const rows = Function('"use strict"; return ' + rawArray)();
|
||
|
||
return rows
|
||
.filter(r => r.lat && r.lng)
|
||
.map(r => ({
|
||
alertId: r.Alert_ID,
|
||
lat: parseFloat(r.lat),
|
||
lng: parseFloat(r.lng),
|
||
disease: r.diseases,
|
||
placeName: r.place_name,
|
||
country: r.country,
|
||
date: r.date,
|
||
cases: r.cases ? parseInt(String(r.cases).replace(/,/g, ''), 10) || 0 : null,
|
||
sourceUrl: r.link,
|
||
summary: r.summary,
|
||
}));
|
||
}
|
||
|
||
/**
|
||
* Parse historical WHO annual case counts from the embedded JS object array.
|
||
*
|
||
* Bundle format (second dataset, follows immediately after realtime module):
|
||
* [{"country":"Afghanistan","iso":"AF","disease":"Diphtheria","year":"2024","cases":"207"}, ...]
|
||
*/
|
||
function parseHistoricalData(bundle) {
|
||
const colIdx = bundle.indexOf('.columns=["Alert_ID"');
|
||
if (colIdx === -1) throw new Error('[VPD] Bundle anchor not found for historical data search');
|
||
|
||
const arrayStart = bundle.indexOf('[{country:"', colIdx);
|
||
if (arrayStart === -1) throw new Error('[VPD] Historical data array not found');
|
||
const arrayEnd = bundle.indexOf('];', arrayStart);
|
||
if (arrayEnd === -1) throw new Error('[VPD] Historical data end marker not found');
|
||
|
||
const rawArray = bundle.slice(arrayStart, arrayEnd + 1);
|
||
// eslint-disable-next-line no-new-func
|
||
const rows = Function('"use strict"; return ' + rawArray)();
|
||
|
||
return rows.map(r => ({
|
||
country: r.country,
|
||
iso: r.iso,
|
||
disease: r.disease,
|
||
year: parseInt(r.year, 10),
|
||
cases: parseInt(r.cases, 10) || 0,
|
||
}));
|
||
}
|
||
|
||
async function fetchVpdTracker() {
|
||
const resp = await fetch(BUNDLE_URL, {
|
||
headers: { 'User-Agent': CHROME_UA },
|
||
signal: AbortSignal.timeout(30000),
|
||
});
|
||
if (!resp.ok) throw new Error(`[VPD] Bundle fetch failed: HTTP ${resp.status}`);
|
||
const bundle = await resp.text();
|
||
|
||
const alerts = parseRealtimeAlerts(bundle);
|
||
const historical = parseHistoricalData(bundle);
|
||
|
||
console.log(`[VPD] Realtime alerts: ${alerts.length} | Historical records: ${historical.length}`);
|
||
|
||
return { alerts, historical, fetchedAt: Date.now() };
|
||
}
|
||
|
||
function validate(data) {
|
||
return Array.isArray(data?.alerts) && data.alerts.length >= 10
|
||
&& Array.isArray(data?.historical) && data.historical.length >= 100;
|
||
}
|
||
|
||
runSeed('health', 'vpd-tracker', CANONICAL_KEY, fetchVpdTracker, {
|
||
validateFn: validate,
|
||
ttlSeconds: CACHE_TTL,
|
||
sourceVersion: 'tgh-bundle-v1',
|
||
extraKeys: [
|
||
{
|
||
key: HISTORICAL_KEY,
|
||
ttl: CACHE_TTL,
|
||
transform: data => ({ records: data.historical, fetchedAt: data.fetchedAt }),
|
||
},
|
||
],
|
||
}).catch((err) => {
|
||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
|
||
console.error('FATAL:', (err.message || err) + _cause);
|
||
process.exit(1);
|
||
});
|