diff --git a/api/health.js b/api/health.js index 405949a8f..69784c4ec 100644 --- a/api/health.js +++ b/api/health.js @@ -69,6 +69,11 @@ const BOOTSTRAP_KEYS = { eurostatCountryData: 'economic:eurostat-country-data:v1', euGasStorage: 'economic:eu-gas-storage:v1', euFsi: 'economic:fsi-eu:v1', + shippingStress: 'supply_chain:shipping_stress:v1', + diseaseOutbreaks: 'health:disease-outbreaks:v1', + socialVelocity: 'intelligence:social:reddit:v1', + vpdTrackerRealtime: 'health:vpd-tracker:realtime:v1', + vpdTrackerHistorical: 'health:vpd-tracker:historical:v1', }; const STANDALONE_KEYS = { @@ -112,9 +117,6 @@ const STANDALONE_KEYS = { hormuzTracker: 'supply_chain:hormuz_tracker:v1', simulationPackageLatest: 'forecast:simulation-package:latest', simulationOutcomeLatest: 'forecast:simulation-outcome:latest', - shippingStress: 'supply_chain:shipping_stress:v1', - diseaseOutbreaks: 'health:disease-outbreaks:v1', - socialVelocity: 'intelligence:social:reddit:v1', }; const SEED_META = { @@ -215,6 +217,8 @@ const SEED_META = { shippingStress: { key: 'seed-meta:supply_chain:shipping_stress', maxStaleMin: 45 }, // relay loop every 15min; 45 = 3x interval (was 30 = 2×, too tight on relay hiccup) diseaseOutbreaks: { key: 'seed-meta:health:disease-outbreaks', maxStaleMin: 2880 }, // daily seed; 2880 = 48h = 2x interval socialVelocity: { key: 'seed-meta:intelligence:social-reddit', maxStaleMin: 30 }, // relay loop every 10min; 30 = 3x interval (was 20 = equals retry window, too tight) + vpdTrackerRealtime: { key: 'seed-meta:health:vpd-tracker', maxStaleMin: 2880 }, // daily seed (0 2 * * *); 2880min = 48h = 2x interval + vpdTrackerHistorical: { key: 'seed-meta:health:vpd-tracker', maxStaleMin: 2880 }, // shares seed-meta key with vpdTrackerRealtime (same run) }; // Standalone keys that are populated on-demand by RPC handlers (not seeds). diff --git a/scripts/seed-disease-outbreaks.mjs b/scripts/seed-disease-outbreaks.mjs index aa6fbafae..2a4e6ca7b 100644 --- a/scripts/seed-disease-outbreaks.mjs +++ b/scripts/seed-disease-outbreaks.mjs @@ -6,12 +6,14 @@ import { extractCountryCode } from './shared/geo-extract.mjs'; loadEnvFile(import.meta.url); const CANONICAL_KEY = 'health:disease-outbreaks:v1'; -const CACHE_TTL = 86400; // 24h — daily seed +const CACHE_TTL = 259200; // 72h (3 days) — 3× daily cron interval per gold standard; survives 2 consecutive missed runs // WHO Disease Outbreak News RSS (specific DON feed, not general news) const WHO_FEED = 'https://www.who.int/feeds/entity/csr/don/en/rss.xml'; -// ProMED RSS — promedmail.org/feed/ returns HTML 404; omitted until a valid feed URL is confirmed -// const PROMED_FEED = 'https://promedmail.org/feed/'; +// CDC Health Alert Network RSS +const CDC_FEED = 'https://tools.cdc.gov/api/v2/resources/media/132608.rss'; +// Outbreak News Today — aggregates WHO, CDC, and regional health ministry alerts +const OUTBREAK_NEWS_FEED = 'https://outbreaknewstoday.com/feed/'; const RSS_MAX_BYTES = 500_000; // guard against oversized responses before regex @@ -21,6 +23,20 @@ function stableHash(str) { return Math.abs(h).toString(36); } +/** + * Extract location string from WHO-style titles: "Disease Name – Country" or "Disease in Country". + * Returns empty string when no location can be determined. + */ +function extractLocationFromTitle(title) { + // WHO DON pattern: "Avian influenza A(H5N1) – Cambodia" + const dashMatch = title.match(/[–—]\s*(.+)$/); + if (dashMatch) return dashMatch[1].trim(); + // Fallback: "... in " + const inMatch = title.match(/\bin\s+([A-Z][^,.(]+)/); + if (inMatch) return inMatch[1].trim(); + return ''; +} + function detectAlertLevel(title, desc) { const text = `${title} ${desc}`.toLowerCase(); if (text.includes('outbreak') || text.includes('emergency') || text.includes('epidemic') || text.includes('pandemic')) return 'alert'; @@ -70,13 +86,18 @@ async function fetchRssItems(url, sourceName) { } async function fetchDiseaseOutbreaks() { - const whoItems = await fetchRssItems(WHO_FEED, 'WHO'); + const [whoItems, cdcItems, outbreakNewsItems] = await Promise.all([ + fetchRssItems(WHO_FEED, 'WHO'), + fetchRssItems(CDC_FEED, 'CDC'), + fetchRssItems(OUTBREAK_NEWS_FEED, 'Outbreak News Today'), + ]); + const allItems = [...whoItems, ...cdcItems, ...outbreakNewsItems]; const diseaseKeywords = ['outbreak', 'disease', 'virus', 'fever', 'flu', 'ebola', 'mpox', 'cholera', 'dengue', 'measles', 'polio', 'plague', 'avian', 'h5n1', 'epidemic', 'infection', 'pathogen', 'rabies', 'meningitis', 'hepatitis', 'nipah', 'marburg']; - const relevant = whoItems.filter(item => { + const relevant = allItems.filter(item => { const text = `${item.title} ${item.desc}`.toLowerCase(); return diseaseKeywords.some(k => text.includes(k)); }); @@ -84,6 +105,7 @@ async function fetchDiseaseOutbreaks() { const outbreaks = relevant.map((item) => ({ id: `${item.sourceName.toLowerCase()}-${stableHash(item.link || item.title)}-${item.publishedMs}`, disease: detectDisease(item.title), + location: extractLocationFromTitle(item.title), countryCode: extractCountryCode(`${item.title} ${item.desc}`) ?? '', alertLevel: detectAlertLevel(item.title, item.desc), summary: item.desc, @@ -104,7 +126,7 @@ function validate(data) { runSeed('health', 'disease-outbreaks', CANONICAL_KEY, fetchDiseaseOutbreaks, { validateFn: validate, ttlSeconds: CACHE_TTL, - sourceVersion: 'who-don-rss-v2', + sourceVersion: 'who-cdc-outbreaknews-v3', }).catch((err) => { const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; console.error('FATAL:', (err.message || err) + _cause); diff --git a/scripts/seed-vpd-tracker.mjs b/scripts/seed-vpd-tracker.mjs new file mode 100644 index 000000000..a77c45407 --- /dev/null +++ b/scripts/seed-vpd-tracker.mjs @@ -0,0 +1,127 @@ +#!/usr/bin/env node + +/** + * Seed: Think Global Health Vaccine-Preventable Disease Tracker + * + * Source: https://thinkglobalhealth.github.io/disease_tracker + * Both datasets are embedded in index_bundle.js (updated ~weekly by CFR staff). + * No API key required — the bundle is public GitHub Pages. + * + * Writes two Redis keys: + * health:vpd-tracker:realtime:v1 — geo-located outbreak alerts (lat/lng, cases, source URL) + * health:vpd-tracker:historical:v1 — WHO annual case counts by country/disease/year + */ + +import { loadEnvFile, CHROME_UA, runSeed } from './_seed-utils.mjs'; + +loadEnvFile(import.meta.url); + +const CANONICAL_KEY = 'health:vpd-tracker:realtime:v1'; +const HISTORICAL_KEY = 'health:vpd-tracker:historical:v1'; +const BUNDLE_URL = 'https://thinkglobalhealth.github.io/disease_tracker/index_bundle.js'; +const CACHE_TTL = 259200; // 72h (3 days) — 3× daily cron interval per gold standard; survives 2 consecutive missed runs + +/** + * Parse realtime outbreak alerts from the embedded object array. + * + * Bundle format (webpack CommonJS): + * var a=[{Alert_ID:"8731706",lat:"56.85",lng:"24.92",diseases:"Measles",...}, + * ... + * {Alert_ID:"8707570",...}]; + * a.columns=["Alert_ID","lat","lng","diseases","place_name","country","date","cases","link","Type","summary"] + * + * The .columns metadata property marks the end of the array. + */ +function parseRealtimeAlerts(bundle) { + const colIdx = bundle.indexOf('.columns=["Alert_ID"'); + if (colIdx === -1) throw new Error('[VPD] Realtime data columns marker not found in bundle'); + + const arrayEnd = bundle.lastIndexOf('}]', colIdx); + const arrayStart = bundle.lastIndexOf('var a=[', arrayEnd); + if (arrayStart === -1) throw new Error('[VPD] Realtime data array start not found'); + + const rawArray = bundle.slice(arrayStart + 6, arrayEnd + 2); // skip 'var a=' + // eslint-disable-next-line no-new-func + const rows = Function('"use strict"; return ' + rawArray)(); + + return rows + .filter(r => r.lat && r.lng) + .map(r => ({ + alertId: r.Alert_ID, + lat: parseFloat(r.lat), + lng: parseFloat(r.lng), + disease: r.diseases, + placeName: r.place_name, + country: r.country, + date: r.date, + cases: r.cases ? parseInt(String(r.cases).replace(/,/g, ''), 10) || 0 : null, + sourceUrl: r.link, + summary: r.summary, + })); +} + +/** + * Parse historical WHO annual case counts from the embedded JS object array. + * + * Bundle format (second dataset, follows immediately after realtime module): + * [{"country":"Afghanistan","iso":"AF","disease":"Diphtheria","year":"2024","cases":"207"}, ...] + */ +function parseHistoricalData(bundle) { + const colIdx = bundle.indexOf('.columns=["Alert_ID"'); + if (colIdx === -1) throw new Error('[VPD] Bundle anchor not found for historical data search'); + + const arrayStart = bundle.indexOf('[{country:"', colIdx); + if (arrayStart === -1) throw new Error('[VPD] Historical data array not found'); + const arrayEnd = bundle.indexOf('];', arrayStart); + if (arrayEnd === -1) throw new Error('[VPD] Historical data end marker not found'); + + const rawArray = bundle.slice(arrayStart, arrayEnd + 1); + // eslint-disable-next-line no-new-func + const rows = Function('"use strict"; return ' + rawArray)(); + + return rows.map(r => ({ + country: r.country, + iso: r.iso, + disease: r.disease, + year: parseInt(r.year, 10), + cases: parseInt(r.cases, 10) || 0, + })); +} + +async function fetchVpdTracker() { + const resp = await fetch(BUNDLE_URL, { + headers: { 'User-Agent': CHROME_UA }, + signal: AbortSignal.timeout(30000), + }); + if (!resp.ok) throw new Error(`[VPD] Bundle fetch failed: HTTP ${resp.status}`); + const bundle = await resp.text(); + + const alerts = parseRealtimeAlerts(bundle); + const historical = parseHistoricalData(bundle); + + console.log(`[VPD] Realtime alerts: ${alerts.length} | Historical records: ${historical.length}`); + + return { alerts, historical, fetchedAt: Date.now() }; +} + +function validate(data) { + return Array.isArray(data?.alerts) && data.alerts.length >= 10 + && Array.isArray(data?.historical) && data.historical.length >= 100; +} + +runSeed('health', 'vpd-tracker', CANONICAL_KEY, fetchVpdTracker, { + validateFn: validate, + ttlSeconds: CACHE_TTL, + sourceVersion: 'tgh-bundle-v1', + extraKeys: [ + { + key: HISTORICAL_KEY, + ttl: CACHE_TTL, + transform: data => ({ records: data.historical, fetchedAt: data.fetchedAt }), + }, + ], +}).catch((err) => { + const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; + console.error('FATAL:', (err.message || err) + _cause); + process.exit(1); +}); diff --git a/todos/049-pending-p1-standalone-keys-misclassification-seeded-keys.md b/todos/049-pending-p1-standalone-keys-misclassification-seeded-keys.md new file mode 100644 index 000000000..976087acc --- /dev/null +++ b/todos/049-pending-p1-standalone-keys-misclassification-seeded-keys.md @@ -0,0 +1,48 @@ +--- +status: pending +priority: p1 +issue_id: "049" +tags: [code-review, security, health, seeding, pr-2375] +dependencies: [] +--- + +## Problem Statement + +`api/health.js` classifies three seeded Redis keys (`shippingStress`, `diseaseOutbreaks`, `socialVelocity`) under `STANDALONE_KEYS` instead of `BOOTSTRAP_KEYS`. These keys are written by seed loops in `ais-relay.cjs` and `scripts/seed-disease-outbreaks.mjs`, so the health monitor should alert CRITICAL when they are empty — not just WARN. Using `STANDALONE_KEYS` masks genuine seed failures as non-critical, silently degrading these panels for all users. + +## Findings + +- **File:** `api/health.js:115-117` — `shippingStress`, `diseaseOutbreaks`, `socialVelocity` listed in `STANDALONE_KEYS` +- **Contrast:** Other relay-seeded keys (`marketQuotes`, `commodities`, `gpsjam`, etc.) correctly sit in `BOOTSTRAP_KEYS` +- **Seed sources:** + - `diseaseOutbreaks` → `scripts/seed-disease-outbreaks.mjs` (Railway cron) + - `shippingStress` → `scripts/ais-relay.cjs` `seedShippingStress` loop (15min) + - `socialVelocity` → `scripts/ais-relay.cjs` `seedSocialVelocity` loop (10min) +- **Impact:** If any seed loop dies, health.js reports WARN (not CRIT), on-call is not paged, panels silently show stale/empty data + +## Proposed Solutions + +**Option A: Move keys to BOOTSTRAP_KEYS (Recommended)** + +In `api/health.js`, remove `shippingStress`, `diseaseOutbreaks`, `socialVelocity` from `STANDALONE_KEYS` and add them to `BOOTSTRAP_KEYS` alongside their `SEED_META` entries. + +- **Effort:** Small (3-line move) +- **Risk:** Very low — only affects health alerting severity + +**Option B: Add SEED_META entries without moving to BOOTSTRAP_KEYS** + +Keep in STANDALONE_KEYS but add staleness checks. This is a non-fix; STANDALONE_KEYS is semantically wrong for seeded data. + +- **Effort:** Small +- **Risk:** Does not resolve the core misclassification + +## Acceptance Criteria + +- [ ] `shippingStress`, `diseaseOutbreaks`, `socialVelocity` removed from `STANDALONE_KEYS` +- [ ] All three added to `BOOTSTRAP_KEYS` +- [ ] Health endpoint returns CRITICAL (not WARN) when any of these keys are empty +- [ ] SEED_META entries present for all three (check current state in health.js) + +## Work Log + +- 2026-03-27: Identified by code-review agents during PR #2375 review. diff --git a/todos/050-pending-p1-disease-outbreak-location-field-never-populated.md b/todos/050-pending-p1-disease-outbreak-location-field-never-populated.md new file mode 100644 index 000000000..104ddf1eb --- /dev/null +++ b/todos/050-pending-p1-disease-outbreak-location-field-never-populated.md @@ -0,0 +1,62 @@ +--- +status: complete +priority: p1 +issue_id: "050" +tags: [code-review, bug, seeding, disease-outbreaks, proto, pr-2375] +dependencies: [] +--- + +## Problem Statement + +The `DiseaseOutbreakItem` proto message has a `location` field (field 3) that is never populated by the seed script. `scripts/seed-disease-outbreaks.mjs` parses WHO DON RSS feed items and builds outbreak objects, but the `location` property is always set to an empty string. The frontend panel and any downstream consumers that display location will always show blank. + +## Findings + +- **File:** `proto/worldmonitor/health/v1/list_disease_outbreaks.proto` — `DiseaseOutbreakItem` has `string location = 3` +- **File:** `scripts/seed-disease-outbreaks.mjs:84-93` — Outbreak object construction never assigns a `location` value; the field is omitted or set to `''` +- **WHO DON RSS format:** Location is often embedded in the item `` (e.g., "Avian influenza A(H5N1) – **Cambodia**") or `<description>` — not a dedicated field, requires extraction +- **Impact:** All disease outbreak cards show no location. Users cannot see which country/region the outbreak affects — a critical piece of context for a geopolitical monitoring app + +## Proposed Solutions + +**Option A: Extract location from title via regex (Recommended)** + +Most WHO DON titles follow the pattern `<Disease> – <Country>` or `<Disease> in <Country>`. Extract the country portion: + +```javascript +function extractLocation(title) { + // "Avian influenza A(H5N1) – Cambodia" → "Cambodia" + const dashMatch = title.match(/[–—-]\s*([^–—]+)$/); + if (dashMatch) return dashMatch[1].trim(); + const inMatch = title.match(/\bin\s+([A-Z][^,]+)/); + if (inMatch) return inMatch[1].trim(); + return ''; +} +``` + +- **Effort:** Small (add helper + populate field) +- **Risk:** Low — regex may miss edge cases but degrades gracefully to empty string + +**Option B: Parse `<georss:point>` or `<dc:subject>` from WHO RSS** + +Some WHO feeds include geographic metadata in extended RSS fields. Parse these if present. + +- **Effort:** Medium (check actual feed structure, add XML parsing for extra namespaces) +- **Risk:** Low — feed structure may not consistently include these fields + +**Option C: Leave empty for now, document as known gap** + +Add a comment in the seed and a note in the panel that location is not yet populated. + +- **Effort:** Minimal +- **Risk:** Low — but leaves a blank field in production + +## Acceptance Criteria + +- [ ] `location` field populated for at least 80% of disease outbreak items +- [ ] Empty string is acceptable fallback when location cannot be determined +- [ ] Panel displays location correctly when populated + +## Work Log + +- 2026-03-27: Identified by code-review agents during PR #2375 review. diff --git a/todos/051-pending-p1-lop-nur-coordinate-mismatch-seed-vs-geoconfig.md b/todos/051-pending-p1-lop-nur-coordinate-mismatch-seed-vs-geoconfig.md new file mode 100644 index 000000000..06fe96cb7 --- /dev/null +++ b/todos/051-pending-p1-lop-nur-coordinate-mismatch-seed-vs-geoconfig.md @@ -0,0 +1,52 @@ +--- +status: pending +priority: p1 +issue_id: "051" +tags: [code-review, bug, seeding, nuclear, earthquakes, geo, pr-2375] +dependencies: [] +--- + +## Problem Statement + +The Lop Nur nuclear test site coordinates differ between the earthquake seed script and the geo config by approximately 85km. `scripts/seed-earthquakes.mjs` uses `(41.39, 89.03)` while `src/config/geo.ts` uses `(41.75, 88.35)`. Since the Haversine scoring uses a 200km threshold, this discrepancy means earthquakes between 85-200km from the actual site will score differently depending on which coordinate set is authoritative. The map marker and the enrichment scoring point to different locations. + +## Findings + +- **File:** `scripts/seed-earthquakes.mjs:13` — `{ name: 'Lop Nur', lat: 41.39, lon: 89.03 }` +- **File:** `src/config/geo.ts:3159` (approx) — `NUCLEAR_FACILITIES` entry for Lop Nur: `lat: 41.75, lon: 88.35` +- **Delta:** ~85km (Haversine distance between the two coordinate pairs) +- **Authoritative source:** Lop Nur test site centroid per Wikipedia/NTI: approximately 41.75°N 88.35°E (geo.ts values appear more accurate) +- **Impact:** Earthquakes in the 85-200km radius band get misscored; the map marker and earthquake enrichment diverge visually + +## Proposed Solutions + +**Option A: Update seed-earthquakes.mjs to match geo.ts (Recommended)** + +Change `scripts/seed-earthquakes.mjs` Lop Nur entry to `lat: 41.75, lon: 88.35` to match `geo.ts`. + +- **Effort:** Trivial (one-line fix) +- **Risk:** Very low — just aligns two coordinates to same source of truth + +**Option B: Import nuclear test site coordinates from geo.ts into seed script** + +Refactor the seed to import `NUCLEAR_FACILITIES` from `src/config/geo.ts` and filter by `type: 'test-site'`. Eliminates duplication entirely. + +- **Effort:** Small (add import + filter logic) +- **Risk:** Low — seed script is `.mjs`; verify it can import from TS source or compiled output + +**Option C: Update geo.ts to match seed-earthquakes.mjs** + +If seed values are intentional (some sources cite different cluster centers), update geo.ts instead. + +- **Effort:** Trivial +- **Risk:** May move the map marker to a less accurate location + +## Acceptance Criteria + +- [ ] Lop Nur coordinates are identical in `seed-earthquakes.mjs` and `geo.ts` +- [ ] Authoritative source documented in a comment (e.g., "NTI/Wikipedia centroid") +- [ ] Single source of truth preferred (Option B) if feasible without circular imports + +## Work Log + +- 2026-03-27: Identified by code-review agents during PR #2375 review. Geo.ts values appear more accurate per open-source nuclear monitoring databases. diff --git a/todos/052-pending-p2-mappropup-missing-test-site-type-label.md b/todos/052-pending-p2-mappropup-missing-test-site-type-label.md new file mode 100644 index 000000000..ca6891984 --- /dev/null +++ b/todos/052-pending-p2-mappropup-missing-test-site-type-label.md @@ -0,0 +1,47 @@ +--- +status: pending +priority: p2 +issue_id: "052" +tags: [code-review, bug, frontend, nuclear, map, pr-2375] +dependencies: [] +--- + +## Problem Statement + +`src/config/geo.ts` adds nuclear test sites with `type: 'test-site'` to `NUCLEAR_FACILITIES`, but the MapPopup component's `typeLabels` map does not include a `'test-site'` entry. When the popup renders for a test site marker, it falls through to a raw string display, showing `'TEST-SITE'` instead of a human-readable label like `'Nuclear Test Site'`. + +## Findings + +- **File:** `src/config/geo.ts` — 5 nuclear test sites added with `type: 'test-site'` +- **File:** MapPopup component (likely `src/components/MapPopup.ts` or similar) — `typeLabels` object missing `'test-site'` key +- **Impact:** Nuclear test site map markers show raw `'TEST-SITE'` string in popup header — unprofessional, confusing to users + +## Proposed Solutions + +**Option A: Add 'test-site' to typeLabels (Recommended)** + +```typescript +const typeLabels: Record<string, string> = { + // ... existing entries ... + 'test-site': 'Nuclear Test Site', +}; +``` + +- **Effort:** Trivial (one line) +- **Risk:** None + +**Option B: Add fallback label formatting** + +If typeLabels misses a key, format the raw type string (e.g., `'test-site'` → `'Test Site'`) as a fallback. + +- **Effort:** Small +- **Risk:** Very low — better defensive coding but still worth adding the explicit label + +## Acceptance Criteria + +- [ ] Nuclear test site popups display 'Nuclear Test Site' (or equivalent human-readable label) +- [ ] No raw type string ('TEST-SITE') visible in any popup + +## Work Log + +- 2026-03-27: Identified by code-review agents during PR #2375 review. diff --git a/todos/053-pending-p2-yahoo-shipping-stress-no-shared-rate-gate.md b/todos/053-pending-p2-yahoo-shipping-stress-no-shared-rate-gate.md new file mode 100644 index 000000000..3d669e6a3 --- /dev/null +++ b/todos/053-pending-p2-yahoo-shipping-stress-no-shared-rate-gate.md @@ -0,0 +1,51 @@ +--- +status: pending +priority: p2 +issue_id: "053" +tags: [code-review, reliability, seeding, yahoo-finance, rate-limiting, pr-2375] +dependencies: [] +--- + +## Problem Statement + +The new `seedShippingStress` loop in `scripts/ais-relay.cjs` calls `fetchYahooChartDirect` for 5 shipping carrier tickers without sharing the `yahooGate` semaphore used by other Yahoo Finance callers. The existing gold standard (MEMORY.md) requires staggering Yahoo requests with 150ms delays and using `fetchYahooQuotesBatch()` with shared rate gating. Adding a parallel 15-minute loop that makes 5 additional Yahoo calls risks 429s that affect all other market data loops on the same process. + +## Findings + +- **File:** `scripts/ais-relay.cjs` — `seedShippingStress` makes 5 `fetchYahooChartDirect` calls with 150ms `setTimeout` stagger (correct stagger, but no shared gate with other Yahoo callers) +- **Gold standard (MEMORY.md):** "Stagger Yahoo requests with 150ms delays using `fetchYahooQuotesBatch()`. NEVER use `Promise.all` for Yahoo calls. Only 1 automated consumer" +- **New reality:** 2 automated consumers (market data + shipping stress) running independently +- **Impact:** Concurrent Yahoo calls from two loops can exceed Yahoo's undocumented rate limit, causing 429s that affect the market quotes loop — potentially staling all financial data panels + +## Proposed Solutions + +**Option A: Thread shipping stress calls through fetchYahooQuotesBatch / yahooGate (Recommended)** + +Refactor `seedShippingStress` to use the shared `yahooGate` semaphore so all Yahoo calls — regardless of source — are serialized through a single gate. + +- **Effort:** Small (extract shared gate, thread through both loops) +- **Risk:** Very low + +**Option B: Add separate per-symbol delay and document the two-consumer reality** + +Keep the 150ms stagger and add a comment documenting that two independent loops now hit Yahoo. Accept slightly higher rate-limit risk. + +- **Effort:** Trivial +- **Risk:** Medium — if Yahoo rate limits tighten, both loops break together + +**Option C: Move shipping stress seed to a separate Railway cron service** + +Isolate shipping stress seeding to its own process so it cannot interfere with market data seeding. + +- **Effort:** Medium +- **Risk:** Low — cleanest isolation, but adds operational complexity + +## Acceptance Criteria + +- [ ] `seedShippingStress` Yahoo calls share rate-limiting infrastructure with other Yahoo Finance callers in the relay +- [ ] No concurrent Yahoo calls without gating +- [ ] MEMORY.md `yahooGate` pattern documentation updated to reflect multiple callers + +## Work Log + +- 2026-03-27: Identified by code-review agents during PR #2375 review. diff --git a/todos/054-pending-p2-reddit-permalink-no-url-scheme-validation.md b/todos/054-pending-p2-reddit-permalink-no-url-scheme-validation.md new file mode 100644 index 000000000..93eed1b87 --- /dev/null +++ b/todos/054-pending-p2-reddit-permalink-no-url-scheme-validation.md @@ -0,0 +1,61 @@ +--- +status: pending +priority: p2 +issue_id: "054" +tags: [code-review, security, seeding, reddit, social-velocity, pr-2375] +dependencies: [] +--- + +## Problem Statement + +The `seedSocialVelocity` loop in `scripts/ais-relay.cjs` stores `p.permalink` from Reddit API responses directly into Redis without validating the URL scheme. Reddit permalinks are typically relative paths (e.g., `/r/worldnews/comments/...`) but the code prepends `https://reddit.com` — however, if the Reddit API ever returns a full URL with a different scheme (e.g., `javascript:` or `data:`), that value would be stored and potentially rendered as a link in the Social Velocity panel, creating an XSS vector. + +## Findings + +- **File:** `scripts/ais-relay.cjs` — `seedSocialVelocity` section: `url: 'https://reddit.com' + p.permalink` (or similar construction) +- **Concern:** `p.permalink` from the Reddit JSON API is typically a relative path starting with `/r/`, but this is not validated +- **Impact (if exploited):** If a future Reddit API change or edge case returns a full URL in `permalink`, the stored value could contain an arbitrary scheme. Frontend rendering the URL without validation could execute JavaScript +- **Secondary concern:** `p.permalink` from upvote-manipulated posts could contain unicode path segments that normalize unexpectedly + +## Proposed Solutions + +**Option A: Validate permalink starts with /r/ before storing (Recommended)** + +```javascript +const safePermalink = p.permalink?.startsWith('/r/') ? p.permalink : null; +if (!safePermalink) continue; // skip malformed items +const url = 'https://reddit.com' + safePermalink; +``` + +- **Effort:** Trivial (one guard) +- **Risk:** None — drops malformed items, logs warning + +**Option B: Parse full URL and assert scheme is https** + +```javascript +const url = 'https://reddit.com' + p.permalink; +try { + const parsed = new URL(url); + if (parsed.protocol !== 'https:') continue; +} catch { continue; } +``` + +- **Effort:** Trivial +- **Risk:** None + +**Option C: Sanitize on the frontend rendering side** + +Ensure the Social Velocity panel only renders URLs with `https:` scheme. Belt-and-suspenders approach alongside server-side validation. + +- **Effort:** Small +- **Risk:** None — defense in depth + +## Acceptance Criteria + +- [ ] `p.permalink` validated (must start with `/r/` or parsed URL must have `https:` scheme) before storage +- [ ] Items with invalid permalinks are skipped with a console.warn +- [ ] Frontend Social Velocity panel does not render non-https URLs as clickable links + +## Work Log + +- 2026-03-27: Identified by security-sentinel agent during PR #2375 review. diff --git a/todos/055-pending-p2-social-velocity-ttl-at-minimum-boundary.md b/todos/055-pending-p2-social-velocity-ttl-at-minimum-boundary.md new file mode 100644 index 000000000..e6ca32467 --- /dev/null +++ b/todos/055-pending-p2-social-velocity-ttl-at-minimum-boundary.md @@ -0,0 +1,43 @@ +--- +status: pending +priority: p2 +issue_id: "055" +tags: [code-review, reliability, seeding, social-velocity, ttl, pr-2375] +dependencies: [] +--- + +## Problem Statement + +The `seedSocialVelocity` loop in `scripts/ais-relay.cjs` seeds with a TTL of 1800 seconds (30 minutes) and runs on a 10-minute interval, giving a TTL ratio of exactly 3×. The seed gold standard (MEMORY.md) requires TTL≥3×interval, so this is technically compliant — but it is at the minimum acceptable boundary. Any seed delay, relay restart, or deployment gap longer than 30 minutes will cause the key to expire before the next successful seed, serving stale data to users. A TTL of 2700s (45 min, 4.5× interval) would provide a meaningful safety margin. + +## Findings + +- **File:** `scripts/ais-relay.cjs` — `seedSocialVelocity`: TTL = 1800s, interval = 10min (600s), ratio = 3.0× +- **Gold standard:** TTL≥3×interval — current value is exactly at the floor with no margin +- **Contrast:** `seedShippingStress` uses TTL = 3600s, interval = 15min (900s), ratio = 4.0× (healthy margin) +- **Risk scenario:** If relay is restarted during a deployment (takes 2-3 min), two consecutive seed failures could exhaust the 30min TTL window + +## Proposed Solutions + +**Option A: Increase TTL to 2700s (Recommended)** + +Change `socialVelocity` Redis TTL from 1800 to 2700 seconds (4.5× interval). Provides ~15min safety buffer for seed delays. + +- **Effort:** Trivial (change one number) +- **Risk:** None — slightly older data shown at most (45min vs 30min max age) + +**Option B: Increase interval to match (keep 3× ratio but with more room)** + +Keep 1800s TTL but reduce interval to 8min. Increases Reddit API call frequency. + +- **Effort:** Trivial +- **Risk:** Low — more frequent Reddit calls, slightly higher rate-limit risk + +## Acceptance Criteria + +- [ ] `socialVelocity` Redis TTL ≥ 4× seed interval (2400s minimum, 2700s recommended) +- [ ] `maxStaleMin` in health.js updated to 2-3× interval (20-30min) if applicable + +## Work Log + +- 2026-03-27: Identified by code-review agents during PR #2375 review. Borderline compliance with gold standard. diff --git a/todos/056-pending-p3-stable-hash-unnecessary-disease-seed.md b/todos/056-pending-p3-stable-hash-unnecessary-disease-seed.md new file mode 100644 index 000000000..5d129d31f --- /dev/null +++ b/todos/056-pending-p3-stable-hash-unnecessary-disease-seed.md @@ -0,0 +1,47 @@ +--- +status: pending +priority: p3 +issue_id: "056" +tags: [code-review, quality, seeding, disease-outbreaks, simplicity, pr-2375] +dependencies: [] +--- + +## Problem Statement + +`scripts/seed-disease-outbreaks.mjs` implements a custom `stableHash` function (djb2 variant) to generate IDs for disease outbreak items. Since WHO DON RSS items each have a unique `<link>` URL (the WHO article URL), using the URL directly as the item ID — or a simple truncation of it — would be stable, readable, and require no custom hashing code. + +## Findings + +- **File:** `scripts/seed-disease-outbreaks.mjs` — `stableHash(title + pubDate)` used to generate item IDs +- **Each WHO DON item has:** a unique `<link>` field (e.g., `https://www.who.int/emergencies/disease-outbreak-news/item/...`) +- **The WHO item URL slug is already a stable unique identifier** — no hash needed +- **Impact:** Custom hash function adds ~10 lines of unnecessary code; URL-based IDs would be human-readable in Redis and easier to debug + +## Proposed Solutions + +**Option A: Use WHO item URL slug as ID (Recommended)** + +```javascript +// Instead of: id: stableHash(title + pubDate) +// Use: id: link.split('/').pop() || stableHash(title) +const id = item.link?.split('/item/')[1]?.replace(/[^a-z0-9-]/gi, '') || stableHash(title); +``` + +- **Effort:** Trivial +- **Risk:** Very low — IDs are stable as long as WHO URL structure doesn't change (they have been stable for years) + +**Option B: Remove stableHash, use title + pubDate substring** + +Generate IDs from a truncated, URL-encoded version of the title + date without a hash function. + +- **Effort:** Trivial +- **Risk:** Very low + +## Acceptance Criteria + +- [ ] `stableHash` function removed or replaced with simpler ID generation +- [ ] Item IDs remain stable across re-runs (same item → same ID) + +## Work Log + +- 2026-03-27: Identified by simplicity-reviewer agent during PR #2375 review. diff --git a/todos/057-pending-p3-concern-score-weights-undocumented-magic-numbers.md b/todos/057-pending-p3-concern-score-weights-undocumented-magic-numbers.md new file mode 100644 index 000000000..47268a281 --- /dev/null +++ b/todos/057-pending-p3-concern-score-weights-undocumented-magic-numbers.md @@ -0,0 +1,49 @@ +--- +status: pending +priority: p3 +issue_id: "057" +tags: [code-review, quality, seeding, disease-outbreaks, maintainability, pr-2375] +dependencies: [] +--- + +## Problem Statement + +The disease outbreak concern score calculation in `scripts/seed-disease-outbreaks.mjs` uses magic number weights (0.6, 0.25, 0.15) with no explanation of their origin or rationale. Future maintainers cannot know whether these are tuned values, arbitrary guesses, or domain-informed weights — making them impossible to adjust confidently. + +## Findings + +- **File:** `scripts/seed-disease-outbreaks.mjs` — concern score formula: `score = severity * 0.6 + spread * 0.25 + alertLevel * 0.15` (or similar) +- **No comment** explaining why these specific weights were chosen +- **Impact:** Low immediate risk, but maintainers tuning outbreak scoring will cargo-cult the values or blindly change them + +## Proposed Solutions + +**Option A: Add inline comment documenting the rationale** + +```javascript +// Severity weighted highest (0.6) as it drives treatment urgency; +// geographic spread (0.25) secondary; alert level (0.15) is a lagging indicator +const concernScore = severity * 0.6 + spread * 0.25 + alertLevel * 0.15; +``` + +- **Effort:** Trivial (2-line comment) +- **Risk:** None + +**Option B: Extract as named constants** + +```javascript +const SEVERITY_WEIGHT = 0.6; // primary driver: mortality/transmissibility +const SPREAD_WEIGHT = 0.25; // geographic footprint +const ALERT_WEIGHT = 0.15; // WHO/national alert level (lags reality) +``` + +- **Effort:** Trivial +- **Risk:** None — improves readability significantly + +## Acceptance Criteria + +- [ ] Concern score weights documented with rationale (comment or named constants) + +## Work Log + +- 2026-03-27: Identified by simplicity-reviewer agent during PR #2375 review. diff --git a/todos/058-pending-p3-promed-feed-dead-code-commented-out.md b/todos/058-pending-p3-promed-feed-dead-code-commented-out.md new file mode 100644 index 000000000..b938be3ca --- /dev/null +++ b/todos/058-pending-p3-promed-feed-dead-code-commented-out.md @@ -0,0 +1,42 @@ +--- +status: complete +priority: p3 +issue_id: "058" +tags: [code-review, quality, seeding, disease-outbreaks, cleanup, pr-2375] +dependencies: [] +--- + +## Problem Statement + +`scripts/seed-disease-outbreaks.mjs` contains commented-out code for a ProMED feed integration (`PROMED_FEED`). This dead code was never activated and adds noise to the file. If ProMED integration is planned, it should be tracked as a separate task; otherwise it should be removed. + +## Findings + +- **File:** `scripts/seed-disease-outbreaks.mjs` — commented-out `PROMED_FEED` URL constant and associated fetch/parse logic +- **No associated todo or feature flag** — ambiguous whether this is planned work or abandoned exploration +- **Impact:** Adds ~10-15 lines of dead code; new contributors may be confused about whether ProMED is partially integrated + +## Proposed Solutions + +**Option A: Remove the commented-out code (Recommended)** + +Delete all `PROMED_FEED` references. If ProMED integration is desired, create a separate feature task. + +- **Effort:** Trivial +- **Risk:** None — commented code has no runtime effect + +**Option B: Add a TODO comment with issue reference** + +Replace with: `// TODO(#ISSUE): ProMED feed integration — https://promedmail.org/rss/` + +- **Effort:** Trivial +- **Risk:** None + +## Acceptance Criteria + +- [ ] No commented-out `PROMED_FEED` code in `seed-disease-outbreaks.mjs` +- [ ] If ProMED integration is wanted, a separate issue/todo exists for it + +## Work Log + +- 2026-03-27: Identified by simplicity-reviewer agent during PR #2375 review. diff --git a/todos/059-pending-p3-disease-keywords-duplicated-in-detect-function.md b/todos/059-pending-p3-disease-keywords-duplicated-in-detect-function.md new file mode 100644 index 000000000..e3aac1c96 --- /dev/null +++ b/todos/059-pending-p3-disease-keywords-duplicated-in-detect-function.md @@ -0,0 +1,47 @@ +--- +status: pending +priority: p3 +issue_id: "059" +tags: [code-review, quality, seeding, disease-outbreaks, duplication, pr-2375] +dependencies: [] +--- + +## Problem Statement + +`scripts/seed-disease-outbreaks.mjs` maintains two parallel lists that overlap significantly: a `diseaseKeywords` array (or constant) and the keyword list embedded inside the `detectDisease()` function. Any update to supported disease keywords must be made in both places, or the two lists drift out of sync. + +## Findings + +- **File:** `scripts/seed-disease-outbreaks.mjs` — `diseaseKeywords` constant and `detectDisease()` function both enumerate disease names/keywords +- **Overlap:** The function's keyword list appears to be a superset or duplicate of `diseaseKeywords` +- **Impact:** Adding a new disease (e.g., MPOX variant) requires two edits; omitting one causes inconsistent behavior between any code that uses `diseaseKeywords` directly vs calls `detectDisease()` + +## Proposed Solutions + +**Option A: Remove standalone array, have detectDisease() be the single source (Recommended)** + +If `diseaseKeywords` is only used to drive `detectDisease()`, inline the array into the function and export only the function. + +- **Effort:** Small (consolidate + verify no other consumers of the array) +- **Risk:** Very low + +**Option B: Make detectDisease() use the diseaseKeywords array** + +```javascript +const DISEASE_KEYWORDS = ['mpox', 'ebola', 'cholera', ...]; +function detectDisease(text) { + return DISEASE_KEYWORDS.find(k => text.toLowerCase().includes(k)) || null; +} +``` + +- **Effort:** Trivial +- **Risk:** Very low — clean single source of truth + +## Acceptance Criteria + +- [ ] Disease keyword list exists in exactly one place +- [ ] `detectDisease()` uses that single list + +## Work Log + +- 2026-03-27: Identified by simplicity-reviewer agent during PR #2375 review.