diff --git a/api/health.js b/api/health.js index 1acef5ec7..bac6d5803 100644 --- a/api/health.js +++ b/api/health.js @@ -137,6 +137,7 @@ const STANDALONE_KEYS = { productCatalog: 'product-catalog:v2', energyExposure: 'energy:exposure:v1:index', regulatoryActions: 'regulatory:actions:v1', + energyIntelligence: 'energy:intelligence:feed:v1', }; const SEED_META = { @@ -256,6 +257,7 @@ const SEED_META = { regulatoryActions: { key: 'seed-meta:regulatory:actions', maxStaleMin: 360 }, // 2h cron; 360min = 3x interval electricityPrices: { key: 'seed-meta:energy:electricity-prices', maxStaleMin: 2880 }, // daily cron (14:00 UTC); 2880min = 48h = 2x interval gasStorageCountries: { key: 'seed-meta:energy:gas-storage-countries', maxStaleMin: 2880 }, // daily cron at 10:30 UTC; 2880min = 48h = 2x interval + energyIntelligence: { key: 'seed-meta:energy:intelligence', maxStaleMin: 720 }, // 6h cron; 720min = 2x interval }; // Standalone keys that are populated on-demand by RPC handlers (not seeds). diff --git a/scripts/railway-set-watch-paths.mjs b/scripts/railway-set-watch-paths.mjs index 6b10b5ed3..3b6bffc72 100644 --- a/scripts/railway-set-watch-paths.mjs +++ b/scripts/railway-set-watch-paths.mjs @@ -81,6 +81,15 @@ const SERVICE_OVERRIDES = { startCommand: 'node seed-gas-storage-countries.mjs', cronSchedule: '30 10 * * *', }, + 'seed-energy-intelligence': { + watchPatterns: [ + 'scripts/seed-energy-intelligence.mjs', + 'scripts/_seed-utils.mjs', + 'scripts/package.json', + ], + startCommand: 'node seed-energy-intelligence.mjs', + cronSchedule: '0 */6 * * *', + }, }; function getToken() { diff --git a/scripts/seed-energy-intelligence.mjs b/scripts/seed-energy-intelligence.mjs new file mode 100644 index 000000000..2fa8658cc --- /dev/null +++ b/scripts/seed-energy-intelligence.mjs @@ -0,0 +1,202 @@ +#!/usr/bin/env node + +import { loadEnvFile, CHROME_UA, runSeed } from './_seed-utils.mjs'; + +loadEnvFile(import.meta.url); + +export const CANONICAL_KEY = 'energy:intelligence:feed:v1'; +export const INTELLIGENCE_TTL_SECONDS = 86400; // 24h = 4× 6h interval (gold standard: TTL ≥ 3× interval) +const MAX_ITEMS = 30; +const RSS_MAX_BYTES = 500_000; +const AGE_LIMIT_MS = 30 * 24 * 3600 * 1000; // 30 days + +// Note: IEA removed public RSS feeds (https://www.iea.org/rss/*.xml returns 404). +// OPEC RSS is Cloudflare-protected — kept as best-effort (works from Railway IPs). +// OilPrice.com provides reliable energy intelligence coverage as primary source. +const FEEDS = [ + { url: 'https://oilprice.com/rss/main', source: 'OilPrice', label: 'oilprice-main' }, + { url: 'https://www.opec.org/opec_web/en/press_room/rss.htm', source: 'OPEC', label: 'opec-press' }, +]; + +export const ENERGY_KEYWORDS = [ + 'oil', 'gas', 'lng', 'coal', 'energy', 'opec', 'refinery', 'petroleum', + 'electricity', 'power', 'renewable', 'nuclear', 'barrel', 'crude', + 'storage', 'pipeline', 'fuel', 'carbon', 'emissions', +]; + +export function stableHash(str) { + let h = 0; + for (let i = 0; i < str.length; i++) h = (Math.imul(31, h) + str.charCodeAt(i)) | 0; + return Math.abs(h).toString(36); +} + +function decodeHtmlEntities(text) { + return text + .replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16))) + .replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(parseInt(dec, 10))) + .replace(/&/g, '&') + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/"/g, '"') + .replace(/'|'/g, "'") + .replace(/ /g, ' ') + .replace(/…/g, '…') + .replace(/—/g, '—') + .replace(/–/g, '–') + .replace(/‘|’/g, "'") + .replace(/“|”/g, '"'); +} + +function extractTag(block, tagName) { + const re = new RegExp(`<${tagName}[^>]*>(?:)?<\\/${tagName}>`, 'i'); + return (block.match(re) || [])[1]?.trim() || ''; +} + +function cleanSummary(raw) { + return decodeHtmlEntities(raw).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 300); +} + +function parseDateMs(block) { + const raw = extractTag(block, 'pubDate') + || extractTag(block, 'published') + || extractTag(block, 'updated') + || extractTag(block, 'dc:date'); + if (!raw) return 0; + const ms = new Date(raw).getTime(); + return Number.isFinite(ms) ? ms : 0; +} + +function extractLink(block) { + const direct = extractTag(block, 'link'); + if (direct) return decodeHtmlEntities(direct).trim(); + const href = (block.match(/]*\bhref=(["'])(.*?)\1[^>]*\/?>/i) || [])[2] || ''; + return decodeHtmlEntities(href).trim(); +} + +export function parseRssItems(xml, source) { + const bounded = xml.length > RSS_MAX_BYTES ? xml.slice(0, RSS_MAX_BYTES) : xml; + const items = []; + const seenIds = new Set(); + + const pushParsedItem = (block, summaryTags) => { + const title = decodeHtmlEntities(extractTag(block, 'title')); + const url = extractLink(block); + const publishedAt = parseDateMs(block); + const rawSummary = summaryTags.map((tag) => extractTag(block, tag)).find(Boolean) || ''; + if (!title || !url || !publishedAt) return; + + const id = `${source.toLowerCase()}-${stableHash(url)}-${publishedAt}`; + if (seenIds.has(id)) return; + seenIds.add(id); + + items.push({ + id, + title, + url, + source, + publishedAt, + summary: cleanSummary(rawSummary), + }); + }; + + const itemRe = /]*>([\s\S]*?)<\/item>/gi; + let match; + while ((match = itemRe.exec(bounded)) !== null) { + pushParsedItem(match[1], ['description', 'summary', 'content:encoded']); + } + + const entryRe = /]*>([\s\S]*?)<\/entry>/gi; + while ((match = entryRe.exec(bounded)) !== null) { + pushParsedItem(match[1], ['summary', 'content']); + } + + return items; +} + +export function filterEnergyRelevant(items) { + return items.filter((item) => { + const text = `${item.title} ${item.summary}`.toLowerCase(); + return ENERGY_KEYWORDS.some((kw) => text.includes(kw)); + }); +} + +export function deduplicateByUrl(items) { + const byUrl = new Map(); + for (const item of items) { + const key = stableHash(item.url); + const existing = byUrl.get(key); + if (!existing || item.publishedAt > existing.publishedAt) { + byUrl.set(key, item); + } + } + return Array.from(byUrl.values()); +} + +async function fetchFeed(feed) { + try { + const resp = await fetch(feed.url, { + headers: { + Accept: 'application/rss+xml, application/xml, text/xml, */*', + 'User-Agent': CHROME_UA, + }, + signal: AbortSignal.timeout(15_000), + }); + if (!resp.ok) { + console.warn(`[EnergyIntel] ${feed.label} HTTP ${resp.status}`); + return []; + } + const xml = await resp.text(); + const items = parseRssItems(xml, feed.source); + console.log(`[EnergyIntel] ${feed.label}: ${items.length} raw items`); + return items; + } catch (e) { + console.warn(`[EnergyIntel] ${feed.label} fetch error:`, e?.message || e); + return []; + } +} + +async function fetchEnergyIntelligence() { + const settled = await Promise.allSettled(FEEDS.map(fetchFeed)); + const allItems = []; + for (const result of settled) { + if (result.status === 'fulfilled') allItems.push(...result.value); + } + + if (allItems.length === 0) { + throw new Error('All energy intelligence feeds returned 0 items'); + } + + const now = Date.now(); + const recent = allItems.filter((item) => item.publishedAt >= now - AGE_LIMIT_MS); + + const relevant = filterEnergyRelevant(recent); + + const deduped = deduplicateByUrl(relevant); + + deduped.sort((a, b) => b.publishedAt - a.publishedAt); + + const limited = deduped.slice(0, MAX_ITEMS); + + console.log(`[EnergyIntel] ${allItems.length} raw → ${recent.length} recent → ${relevant.length} relevant → ${deduped.length} deduped → ${limited.length} final`); + + return { items: limited, fetchedAt: now, count: limited.length }; +} + +export function validate(data) { + return Array.isArray(data?.items) && data.items.length >= 3; +} + +export { CANONICAL_KEY as ENERGY_INTELLIGENCE_KEY }; + +if (process.argv[1]?.endsWith('seed-energy-intelligence.mjs')) { + runSeed('energy', 'intelligence', CANONICAL_KEY, fetchEnergyIntelligence, { + validateFn: validate, + ttlSeconds: INTELLIGENCE_TTL_SECONDS, + sourceVersion: 'energy-intel-rss-v1', + recordCount: (data) => data?.items?.length || 0, + }).catch((err) => { + const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : ''; + console.error('FATAL:', (err.message || err) + _cause); + process.exit(1); + }); +} diff --git a/server/_shared/cache-keys.ts b/server/_shared/cache-keys.ts index 4492dc25b..d4891a692 100644 --- a/server/_shared/cache-keys.ts +++ b/server/_shared/cache-keys.ts @@ -56,6 +56,7 @@ export const GAS_STORAGE_KEY_PREFIX = 'energy:gas-storage:v1:'; export const GAS_STORAGE_COUNTRIES_KEY = 'energy:gas-storage:v1:_countries'; export const SPR_KEY = 'economic:spr:v1'; export const REFINERY_INPUTS_KEY = 'economic:refinery-inputs:v1'; +export const ENERGY_INTELLIGENCE_KEY = 'energy:intelligence:feed:v1'; /** * Static cache keys for the bootstrap endpoint. @@ -141,6 +142,7 @@ export const BOOTSTRAP_CACHE_KEYS: Record = { electricityPrices: 'energy:electricity:v1:index', }; + export const BOOTSTRAP_TIERS: Record = { bisPolicy: 'slow', bisExchange: 'slow', bisCredit: 'slow', minerals: 'slow', giving: 'slow', sectors: 'slow', diff --git a/tests/energy-intelligence-seed.test.mjs b/tests/energy-intelligence-seed.test.mjs new file mode 100644 index 000000000..0b07021a9 --- /dev/null +++ b/tests/energy-intelligence-seed.test.mjs @@ -0,0 +1,240 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; + +import { + parseRssItems, + filterEnergyRelevant, + deduplicateByUrl, + validate, + ENERGY_INTELLIGENCE_KEY, + INTELLIGENCE_TTL_SECONDS, +} from '../scripts/seed-energy-intelligence.mjs'; + +// --------------------------------------------------------------------------- +// Fixtures +// --------------------------------------------------------------------------- + +const MINIMAL_RSS = ` + + + Test Feed + + IEA warns of tight LNG supply heading into summer 2026 + https://www.iea.org/news/iea-warns-lng-supply-2026 + Sat, 05 Apr 2026 10:00:00 +0000 + The International Energy Agency said global LNG markets are tightening. + + + OPEC maintains production cuts amid oil demand uncertainty + https://www.opec.org/news/opec-production-cuts + Fri, 04 Apr 2026 08:00:00 +0000 + OPEC members agreed to maintain current crude oil production quotas. + + +`; + +const CDATA_RSS = ` + + + CDATA Feed + + <![CDATA[IEA Report: Global Energy Review 2026 & Oil Market Forecast]]> + https://www.iea.org/reports/global-energy-review-2026 + Thu, 03 Apr 2026 12:00:00 +0000 + oil and gas trends.]]> + + +`; + +// --------------------------------------------------------------------------- +// parseRssItems +// --------------------------------------------------------------------------- + +describe('parseRssItems', () => { + it('extracts title, url, publishedAt from a minimal RSS XML fixture', () => { + const items = parseRssItems(MINIMAL_RSS, 'IEA'); + assert.equal(items.length, 2); + + const first = items[0]; + assert.equal(first.title, 'IEA warns of tight LNG supply heading into summer 2026'); + assert.equal(first.url, 'https://www.iea.org/news/iea-warns-lng-supply-2026'); + assert.ok(typeof first.publishedAt === 'number' && first.publishedAt > 0, 'publishedAt should be a positive number'); + assert.equal(first.source, 'IEA'); + }); + + it('handles CDATA-wrapped titles', () => { + const items = parseRssItems(CDATA_RSS, 'IEA'); + assert.equal(items.length, 1); + assert.equal(items[0].title, 'IEA Report: Global Energy Review 2026 & Oil Market Forecast'); + assert.ok(items[0].summary.length > 0); + }); +}); + +// --------------------------------------------------------------------------- +// filterEnergyRelevant +// --------------------------------------------------------------------------- + +describe('filterEnergyRelevant', () => { + it("keeps items with 'oil' in title, drops items with no energy keywords", () => { + const items = [ + { id: '1', title: 'Oil prices surge on OPEC cuts', url: 'https://example.com/1', source: 'IEA', publishedAt: Date.now(), summary: '' }, + { id: '2', title: 'Latest sports results from the weekend', url: 'https://example.com/2', source: 'IEA', publishedAt: Date.now(), summary: 'Football match highlights and scores.' }, + { id: '3', title: 'Tech startup raises funding round', url: 'https://example.com/3', source: 'IEA', publishedAt: Date.now(), summary: 'Silicon Valley venture capital news.' }, + ]; + const filtered = filterEnergyRelevant(items); + assert.equal(filtered.length, 1); + assert.equal(filtered[0].id, '1'); + }); + + it("is case-insensitive — 'LNG' in title matches 'lng' keyword", () => { + const items = [ + { id: '1', title: 'LNG exports hit record highs in Q1 2026', url: 'https://example.com/1', source: 'IEA', publishedAt: Date.now(), summary: '' }, + ]; + const filtered = filterEnergyRelevant(items); + assert.equal(filtered.length, 1); + }); + + it('matches keyword in summary when title has no keyword', () => { + const items = [ + { id: '1', title: 'Market update for April', url: 'https://example.com/1', source: 'IEA', publishedAt: Date.now(), summary: 'Crude oil inventories fell sharply last week.' }, + ]; + const filtered = filterEnergyRelevant(items); + assert.equal(filtered.length, 1); + }); +}); + +// --------------------------------------------------------------------------- +// deduplicateByUrl +// --------------------------------------------------------------------------- + +describe('deduplicateByUrl', () => { + it('same URL appears only once, keeping the most recent by publishedAt', () => { + const url = 'https://www.iea.org/news/duplicate-story'; + const older = { id: 'a', title: 'Old version', url, source: 'IEA', publishedAt: 1000, summary: '' }; + const newer = { id: 'b', title: 'Updated version', url, source: 'IEA', publishedAt: 2000, summary: '' }; + const items = [older, newer]; + + const deduped = deduplicateByUrl(items); + assert.equal(deduped.length, 1); + assert.equal(deduped[0].publishedAt, 2000); + assert.equal(deduped[0].id, 'b'); + }); + + it('keeps distinct URLs unchanged', () => { + const items = [ + { id: '1', title: 'Story A', url: 'https://www.iea.org/a', source: 'IEA', publishedAt: 1000, summary: '' }, + { id: '2', title: 'Story B', url: 'https://www.iea.org/b', source: 'IEA', publishedAt: 2000, summary: '' }, + ]; + const deduped = deduplicateByUrl(items); + assert.equal(deduped.length, 2); + }); +}); + +// --------------------------------------------------------------------------- +// Age filter integration +// --------------------------------------------------------------------------- + +describe('age filter', () => { + it('item older than 30 days is excluded via AGE_LIMIT_MS threshold', () => { + const now = Date.now(); + const oldTs = now - (31 * 24 * 3600 * 1000); + const AGE_LIMIT_MS = 30 * 24 * 3600 * 1000; + + const items = [ + { id: 'old', title: 'Old oil report', url: 'https://example.com/old', source: 'IEA', publishedAt: oldTs, summary: '' }, + { id: 'new', title: 'New gas update', url: 'https://example.com/new', source: 'IEA', publishedAt: now, summary: '' }, + ]; + + const recent = items.filter((item) => item.publishedAt >= now - AGE_LIMIT_MS); + assert.equal(recent.length, 1); + assert.equal(recent[0].id, 'new'); + }); +}); + +// --------------------------------------------------------------------------- +// Exported key constants +// --------------------------------------------------------------------------- + +describe('exported constants', () => { + it("ENERGY_INTELLIGENCE_KEY === 'energy:intelligence:feed:v1'", () => { + assert.equal(ENERGY_INTELLIGENCE_KEY, 'energy:intelligence:feed:v1'); + }); + + it('INTELLIGENCE_TTL_SECONDS >= 24 * 3600 (24h minimum)', () => { + assert.ok( + INTELLIGENCE_TTL_SECONDS >= 24 * 3600, + `TTL ${INTELLIGENCE_TTL_SECONDS}s is less than 24h minimum`, + ); + }); +}); + +// --------------------------------------------------------------------------- +// validate — the gate that controls skip vs. publish in runSeed +// --------------------------------------------------------------------------- +// OPEC is best-effort and OilPrice is the primary source, so fewer-than-3 +// items is a real production scenario. A regression here would ship with all +// other tests green while runSeed silently extends old TTLs instead of writing. + +describe('validate', () => { + it('returns false for null', () => { + assert.equal(validate(null), false); + }); + + it('returns false when items is missing', () => { + assert.equal(validate({}), false); + }); + + it('returns false for fewer than 3 items', () => { + assert.equal(validate({ items: [] }), false); + assert.equal(validate({ items: [{ url: 'a' }] }), false); + assert.equal(validate({ items: [{ url: 'a' }, { url: 'b' }] }), false); + }); + + it('returns true for exactly 3 items', () => { + assert.equal(validate({ items: [{ url: 'a' }, { url: 'b' }, { url: 'c' }] }), true); + }); + + it('returns true for more than 3 items', () => { + const items = Array.from({ length: 10 }, (_, i) => ({ url: `https://example.com/${i}` })); + assert.equal(validate({ items }), true); + }); +}); + +// --------------------------------------------------------------------------- +// decodeHtmlEntities — numeric and extended named entity handling +// --------------------------------------------------------------------------- + +describe('decodeHtmlEntities via parseRssItems title', () => { + const wrapInRss = (title) => ` + + ${title} + https://example.com/1 + Sun, 05 Apr 2026 10:00:00 +0000 + + `; + + it('decodes numeric decimal entity ’ → right single quote', () => { + const items = parseRssItems(wrapInRss('Europe’s gas storage'), 'Test'); + assert.ok(items[0].title.includes('\u2019'), `Expected right quote, got: ${items[0].title}`); + }); + + it('decodes numeric hex entity ’ → right single quote', () => { + const items = parseRssItems(wrapInRss('Europe’s gas'), 'Test'); + assert.ok(items[0].title.includes('\u2019'), `Expected right quote, got: ${items[0].title}`); + }); + + it('decodes — → em dash', () => { + const items = parseRssItems(wrapInRss('Oil prices — weekly review'), 'Test'); + assert.ok(items[0].title.includes('—'), `Expected em dash, got: ${items[0].title}`); + }); + + it('decodes … → ellipsis', () => { + const items = parseRssItems(wrapInRss('OPEC output cuts…'), 'Test'); + assert.ok(items[0].title.includes('…'), `Expected ellipsis, got: ${items[0].title}`); + }); + + it('decodes ' → apostrophe', () => { + const items = parseRssItems(wrapInRss('Europe's energy'), 'Test'); + assert.ok(items[0].title.includes("'"), `Expected apostrophe, got: ${items[0].title}`); + }); +});