mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
feat(energy): IEA/OPEC energy intelligence RSS feed (#2713)
* feat(seeds): IEA and OPEC energy intelligence RSS feed - Add scripts/seed-energy-intelligence.mjs: parses IEA news, IEA reports, and OPEC press RSS feeds; filters by 20 energy keywords; deduplicates by URL (keeps most recent); excludes items older than 30 days; limits to 30 most recent items; TTL 86400s (24h); validates >= 3 items - Add tests/energy-intelligence-seed.test.mjs: 10 tests covering parseRssItems, filterEnergyRelevant, deduplicateByUrl, age filter, and key constants - Add ENERGY_INTELLIGENCE_KEY to server/_shared/cache-keys.ts - Add energyIntelligence to BOOTSTRAP_KEYS and SEED_META in api/health.js - Add seed-energy-intelligence service override to railway-set-watch-paths.mjs with 6h cron schedule * fix(seeds): replace dead IEA RSS (404) with OilPrice.com; keep OPEC best-effort * fix(seeds): fix energyIntelligence health placement, key format, validate export, entity decoding P1: energyIntelligence was in health.js BOOTSTRAP_KEYS but absent from api/bootstrap.js and BOOTSTRAP_CACHE_KEYS. The feed has no SPA consumer (server-side read only via chat-analyst-context), so it belongs in STANDALONE_KEYS in health.js, not BOOTSTRAP_KEYS. Moved accordingly: health monitoring is preserved via SEED_META, and the bootstrap test invariants (every bootstrap key must have a getHydratedData consumer) are satisfied. Key format: CANONICAL_KEY renamed energy:intelligence:v1:feed → energy:intelligence:feed:v1 to comply with the :v\d+$ convention enforced by bootstrap.test.mjs. Updated in health.js, cache-keys.ts (standalone export), and seed-energy-intelligence.mjs. P2: export validate() from seed-energy-intelligence.mjs and add tests covering the skip path (< 3 items → false, exactly 3 → true, > 3 → true). OPEC is best-effort and OilPrice is primary, so sub-threshold runs are a real production scenario. Quality: expand decodeHtmlEntities to handle numeric decimal/hex character references (’ ’) and common named entities (' … — – ‘ ’ “ ”). & decoded last to handle double-encoded sequences correctly. Five new tests added. * fix(seeds): remove unused extendExistingTtl import from seed-energy-intelligence
This commit is contained in:
@@ -137,6 +137,7 @@ const STANDALONE_KEYS = {
|
||||
productCatalog: 'product-catalog:v2',
|
||||
energyExposure: 'energy:exposure:v1:index',
|
||||
regulatoryActions: 'regulatory:actions:v1',
|
||||
energyIntelligence: 'energy:intelligence:feed:v1',
|
||||
};
|
||||
|
||||
const SEED_META = {
|
||||
@@ -256,6 +257,7 @@ const SEED_META = {
|
||||
regulatoryActions: { key: 'seed-meta:regulatory:actions', maxStaleMin: 360 }, // 2h cron; 360min = 3x interval
|
||||
electricityPrices: { key: 'seed-meta:energy:electricity-prices', maxStaleMin: 2880 }, // daily cron (14:00 UTC); 2880min = 48h = 2x interval
|
||||
gasStorageCountries: { key: 'seed-meta:energy:gas-storage-countries', maxStaleMin: 2880 }, // daily cron at 10:30 UTC; 2880min = 48h = 2x interval
|
||||
energyIntelligence: { key: 'seed-meta:energy:intelligence', maxStaleMin: 720 }, // 6h cron; 720min = 2x interval
|
||||
};
|
||||
|
||||
// Standalone keys that are populated on-demand by RPC handlers (not seeds).
|
||||
|
||||
@@ -81,6 +81,15 @@ const SERVICE_OVERRIDES = {
|
||||
startCommand: 'node seed-gas-storage-countries.mjs',
|
||||
cronSchedule: '30 10 * * *',
|
||||
},
|
||||
'seed-energy-intelligence': {
|
||||
watchPatterns: [
|
||||
'scripts/seed-energy-intelligence.mjs',
|
||||
'scripts/_seed-utils.mjs',
|
||||
'scripts/package.json',
|
||||
],
|
||||
startCommand: 'node seed-energy-intelligence.mjs',
|
||||
cronSchedule: '0 */6 * * *',
|
||||
},
|
||||
};
|
||||
|
||||
function getToken() {
|
||||
|
||||
202
scripts/seed-energy-intelligence.mjs
Normal file
202
scripts/seed-energy-intelligence.mjs
Normal file
@@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import { loadEnvFile, CHROME_UA, runSeed } from './_seed-utils.mjs';
|
||||
|
||||
loadEnvFile(import.meta.url);
|
||||
|
||||
export const CANONICAL_KEY = 'energy:intelligence:feed:v1';
|
||||
export const INTELLIGENCE_TTL_SECONDS = 86400; // 24h = 4× 6h interval (gold standard: TTL ≥ 3× interval)
|
||||
const MAX_ITEMS = 30;
|
||||
const RSS_MAX_BYTES = 500_000;
|
||||
const AGE_LIMIT_MS = 30 * 24 * 3600 * 1000; // 30 days
|
||||
|
||||
// Note: IEA removed public RSS feeds (https://www.iea.org/rss/*.xml returns 404).
|
||||
// OPEC RSS is Cloudflare-protected — kept as best-effort (works from Railway IPs).
|
||||
// OilPrice.com provides reliable energy intelligence coverage as primary source.
|
||||
const FEEDS = [
|
||||
{ url: 'https://oilprice.com/rss/main', source: 'OilPrice', label: 'oilprice-main' },
|
||||
{ url: 'https://www.opec.org/opec_web/en/press_room/rss.htm', source: 'OPEC', label: 'opec-press' },
|
||||
];
|
||||
|
||||
export const ENERGY_KEYWORDS = [
|
||||
'oil', 'gas', 'lng', 'coal', 'energy', 'opec', 'refinery', 'petroleum',
|
||||
'electricity', 'power', 'renewable', 'nuclear', 'barrel', 'crude',
|
||||
'storage', 'pipeline', 'fuel', 'carbon', 'emissions',
|
||||
];
|
||||
|
||||
export function stableHash(str) {
|
||||
let h = 0;
|
||||
for (let i = 0; i < str.length; i++) h = (Math.imul(31, h) + str.charCodeAt(i)) | 0;
|
||||
return Math.abs(h).toString(36);
|
||||
}
|
||||
|
||||
function decodeHtmlEntities(text) {
|
||||
return text
|
||||
.replace(/&#x([0-9a-fA-F]+);/g, (_, hex) => String.fromCodePoint(parseInt(hex, 16)))
|
||||
.replace(/&#(\d+);/g, (_, dec) => String.fromCodePoint(parseInt(dec, 10)))
|
||||
.replace(/&/g, '&')
|
||||
.replace(/</g, '<')
|
||||
.replace(/>/g, '>')
|
||||
.replace(/"/g, '"')
|
||||
.replace(/'|'/g, "'")
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/…/g, '…')
|
||||
.replace(/—/g, '—')
|
||||
.replace(/–/g, '–')
|
||||
.replace(/‘|’/g, "'")
|
||||
.replace(/“|”/g, '"');
|
||||
}
|
||||
|
||||
function extractTag(block, tagName) {
|
||||
const re = new RegExp(`<${tagName}[^>]*>(?:<!\\[CDATA\\[)?([\\s\\S]*?)(?:\\]\\]>)?<\\/${tagName}>`, 'i');
|
||||
return (block.match(re) || [])[1]?.trim() || '';
|
||||
}
|
||||
|
||||
function cleanSummary(raw) {
|
||||
return decodeHtmlEntities(raw).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim().slice(0, 300);
|
||||
}
|
||||
|
||||
function parseDateMs(block) {
|
||||
const raw = extractTag(block, 'pubDate')
|
||||
|| extractTag(block, 'published')
|
||||
|| extractTag(block, 'updated')
|
||||
|| extractTag(block, 'dc:date');
|
||||
if (!raw) return 0;
|
||||
const ms = new Date(raw).getTime();
|
||||
return Number.isFinite(ms) ? ms : 0;
|
||||
}
|
||||
|
||||
function extractLink(block) {
|
||||
const direct = extractTag(block, 'link');
|
||||
if (direct) return decodeHtmlEntities(direct).trim();
|
||||
const href = (block.match(/<link[^>]*\bhref=(["'])(.*?)\1[^>]*\/?>/i) || [])[2] || '';
|
||||
return decodeHtmlEntities(href).trim();
|
||||
}
|
||||
|
||||
export function parseRssItems(xml, source) {
|
||||
const bounded = xml.length > RSS_MAX_BYTES ? xml.slice(0, RSS_MAX_BYTES) : xml;
|
||||
const items = [];
|
||||
const seenIds = new Set();
|
||||
|
||||
const pushParsedItem = (block, summaryTags) => {
|
||||
const title = decodeHtmlEntities(extractTag(block, 'title'));
|
||||
const url = extractLink(block);
|
||||
const publishedAt = parseDateMs(block);
|
||||
const rawSummary = summaryTags.map((tag) => extractTag(block, tag)).find(Boolean) || '';
|
||||
if (!title || !url || !publishedAt) return;
|
||||
|
||||
const id = `${source.toLowerCase()}-${stableHash(url)}-${publishedAt}`;
|
||||
if (seenIds.has(id)) return;
|
||||
seenIds.add(id);
|
||||
|
||||
items.push({
|
||||
id,
|
||||
title,
|
||||
url,
|
||||
source,
|
||||
publishedAt,
|
||||
summary: cleanSummary(rawSummary),
|
||||
});
|
||||
};
|
||||
|
||||
const itemRe = /<item\b[^>]*>([\s\S]*?)<\/item>/gi;
|
||||
let match;
|
||||
while ((match = itemRe.exec(bounded)) !== null) {
|
||||
pushParsedItem(match[1], ['description', 'summary', 'content:encoded']);
|
||||
}
|
||||
|
||||
const entryRe = /<entry\b[^>]*>([\s\S]*?)<\/entry>/gi;
|
||||
while ((match = entryRe.exec(bounded)) !== null) {
|
||||
pushParsedItem(match[1], ['summary', 'content']);
|
||||
}
|
||||
|
||||
return items;
|
||||
}
|
||||
|
||||
export function filterEnergyRelevant(items) {
|
||||
return items.filter((item) => {
|
||||
const text = `${item.title} ${item.summary}`.toLowerCase();
|
||||
return ENERGY_KEYWORDS.some((kw) => text.includes(kw));
|
||||
});
|
||||
}
|
||||
|
||||
export function deduplicateByUrl(items) {
|
||||
const byUrl = new Map();
|
||||
for (const item of items) {
|
||||
const key = stableHash(item.url);
|
||||
const existing = byUrl.get(key);
|
||||
if (!existing || item.publishedAt > existing.publishedAt) {
|
||||
byUrl.set(key, item);
|
||||
}
|
||||
}
|
||||
return Array.from(byUrl.values());
|
||||
}
|
||||
|
||||
async function fetchFeed(feed) {
|
||||
try {
|
||||
const resp = await fetch(feed.url, {
|
||||
headers: {
|
||||
Accept: 'application/rss+xml, application/xml, text/xml, */*',
|
||||
'User-Agent': CHROME_UA,
|
||||
},
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
});
|
||||
if (!resp.ok) {
|
||||
console.warn(`[EnergyIntel] ${feed.label} HTTP ${resp.status}`);
|
||||
return [];
|
||||
}
|
||||
const xml = await resp.text();
|
||||
const items = parseRssItems(xml, feed.source);
|
||||
console.log(`[EnergyIntel] ${feed.label}: ${items.length} raw items`);
|
||||
return items;
|
||||
} catch (e) {
|
||||
console.warn(`[EnergyIntel] ${feed.label} fetch error:`, e?.message || e);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchEnergyIntelligence() {
|
||||
const settled = await Promise.allSettled(FEEDS.map(fetchFeed));
|
||||
const allItems = [];
|
||||
for (const result of settled) {
|
||||
if (result.status === 'fulfilled') allItems.push(...result.value);
|
||||
}
|
||||
|
||||
if (allItems.length === 0) {
|
||||
throw new Error('All energy intelligence feeds returned 0 items');
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
const recent = allItems.filter((item) => item.publishedAt >= now - AGE_LIMIT_MS);
|
||||
|
||||
const relevant = filterEnergyRelevant(recent);
|
||||
|
||||
const deduped = deduplicateByUrl(relevant);
|
||||
|
||||
deduped.sort((a, b) => b.publishedAt - a.publishedAt);
|
||||
|
||||
const limited = deduped.slice(0, MAX_ITEMS);
|
||||
|
||||
console.log(`[EnergyIntel] ${allItems.length} raw → ${recent.length} recent → ${relevant.length} relevant → ${deduped.length} deduped → ${limited.length} final`);
|
||||
|
||||
return { items: limited, fetchedAt: now, count: limited.length };
|
||||
}
|
||||
|
||||
export function validate(data) {
|
||||
return Array.isArray(data?.items) && data.items.length >= 3;
|
||||
}
|
||||
|
||||
export { CANONICAL_KEY as ENERGY_INTELLIGENCE_KEY };
|
||||
|
||||
if (process.argv[1]?.endsWith('seed-energy-intelligence.mjs')) {
|
||||
runSeed('energy', 'intelligence', CANONICAL_KEY, fetchEnergyIntelligence, {
|
||||
validateFn: validate,
|
||||
ttlSeconds: INTELLIGENCE_TTL_SECONDS,
|
||||
sourceVersion: 'energy-intel-rss-v1',
|
||||
recordCount: (data) => data?.items?.length || 0,
|
||||
}).catch((err) => {
|
||||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
|
||||
console.error('FATAL:', (err.message || err) + _cause);
|
||||
process.exit(1);
|
||||
});
|
||||
}
|
||||
@@ -56,6 +56,7 @@ export const GAS_STORAGE_KEY_PREFIX = 'energy:gas-storage:v1:';
|
||||
export const GAS_STORAGE_COUNTRIES_KEY = 'energy:gas-storage:v1:_countries';
|
||||
export const SPR_KEY = 'economic:spr:v1';
|
||||
export const REFINERY_INPUTS_KEY = 'economic:refinery-inputs:v1';
|
||||
export const ENERGY_INTELLIGENCE_KEY = 'energy:intelligence:feed:v1';
|
||||
|
||||
/**
|
||||
* Static cache keys for the bootstrap endpoint.
|
||||
@@ -141,6 +142,7 @@ export const BOOTSTRAP_CACHE_KEYS: Record<string, string> = {
|
||||
electricityPrices: 'energy:electricity:v1:index',
|
||||
};
|
||||
|
||||
|
||||
export const BOOTSTRAP_TIERS: Record<string, 'slow' | 'fast'> = {
|
||||
bisPolicy: 'slow', bisExchange: 'slow', bisCredit: 'slow',
|
||||
minerals: 'slow', giving: 'slow', sectors: 'slow',
|
||||
|
||||
240
tests/energy-intelligence-seed.test.mjs
Normal file
240
tests/energy-intelligence-seed.test.mjs
Normal file
@@ -0,0 +1,240 @@
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
|
||||
import {
|
||||
parseRssItems,
|
||||
filterEnergyRelevant,
|
||||
deduplicateByUrl,
|
||||
validate,
|
||||
ENERGY_INTELLIGENCE_KEY,
|
||||
INTELLIGENCE_TTL_SECONDS,
|
||||
} from '../scripts/seed-energy-intelligence.mjs';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Fixtures
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const MINIMAL_RSS = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>Test Feed</title>
|
||||
<item>
|
||||
<title>IEA warns of tight LNG supply heading into summer 2026</title>
|
||||
<link>https://www.iea.org/news/iea-warns-lng-supply-2026</link>
|
||||
<pubDate>Sat, 05 Apr 2026 10:00:00 +0000</pubDate>
|
||||
<description>The International Energy Agency said global LNG markets are tightening.</description>
|
||||
</item>
|
||||
<item>
|
||||
<title>OPEC maintains production cuts amid oil demand uncertainty</title>
|
||||
<link>https://www.opec.org/news/opec-production-cuts</link>
|
||||
<pubDate>Fri, 04 Apr 2026 08:00:00 +0000</pubDate>
|
||||
<description>OPEC members agreed to maintain current crude oil production quotas.</description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`;
|
||||
|
||||
const CDATA_RSS = `<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0">
|
||||
<channel>
|
||||
<title>CDATA Feed</title>
|
||||
<item>
|
||||
<title><![CDATA[IEA Report: Global Energy Review 2026 & Oil Market Forecast]]></title>
|
||||
<link>https://www.iea.org/reports/global-energy-review-2026</link>
|
||||
<pubDate>Thu, 03 Apr 2026 12:00:00 +0000</pubDate>
|
||||
<description><![CDATA[A comprehensive overview of the global energy market with <strong>oil</strong> and <em>gas</em> trends.]]></description>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>`;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// parseRssItems
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('parseRssItems', () => {
|
||||
it('extracts title, url, publishedAt from a minimal RSS XML fixture', () => {
|
||||
const items = parseRssItems(MINIMAL_RSS, 'IEA');
|
||||
assert.equal(items.length, 2);
|
||||
|
||||
const first = items[0];
|
||||
assert.equal(first.title, 'IEA warns of tight LNG supply heading into summer 2026');
|
||||
assert.equal(first.url, 'https://www.iea.org/news/iea-warns-lng-supply-2026');
|
||||
assert.ok(typeof first.publishedAt === 'number' && first.publishedAt > 0, 'publishedAt should be a positive number');
|
||||
assert.equal(first.source, 'IEA');
|
||||
});
|
||||
|
||||
it('handles CDATA-wrapped titles', () => {
|
||||
const items = parseRssItems(CDATA_RSS, 'IEA');
|
||||
assert.equal(items.length, 1);
|
||||
assert.equal(items[0].title, 'IEA Report: Global Energy Review 2026 & Oil Market Forecast');
|
||||
assert.ok(items[0].summary.length > 0);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// filterEnergyRelevant
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('filterEnergyRelevant', () => {
|
||||
it("keeps items with 'oil' in title, drops items with no energy keywords", () => {
|
||||
const items = [
|
||||
{ id: '1', title: 'Oil prices surge on OPEC cuts', url: 'https://example.com/1', source: 'IEA', publishedAt: Date.now(), summary: '' },
|
||||
{ id: '2', title: 'Latest sports results from the weekend', url: 'https://example.com/2', source: 'IEA', publishedAt: Date.now(), summary: 'Football match highlights and scores.' },
|
||||
{ id: '3', title: 'Tech startup raises funding round', url: 'https://example.com/3', source: 'IEA', publishedAt: Date.now(), summary: 'Silicon Valley venture capital news.' },
|
||||
];
|
||||
const filtered = filterEnergyRelevant(items);
|
||||
assert.equal(filtered.length, 1);
|
||||
assert.equal(filtered[0].id, '1');
|
||||
});
|
||||
|
||||
it("is case-insensitive — 'LNG' in title matches 'lng' keyword", () => {
|
||||
const items = [
|
||||
{ id: '1', title: 'LNG exports hit record highs in Q1 2026', url: 'https://example.com/1', source: 'IEA', publishedAt: Date.now(), summary: '' },
|
||||
];
|
||||
const filtered = filterEnergyRelevant(items);
|
||||
assert.equal(filtered.length, 1);
|
||||
});
|
||||
|
||||
it('matches keyword in summary when title has no keyword', () => {
|
||||
const items = [
|
||||
{ id: '1', title: 'Market update for April', url: 'https://example.com/1', source: 'IEA', publishedAt: Date.now(), summary: 'Crude oil inventories fell sharply last week.' },
|
||||
];
|
||||
const filtered = filterEnergyRelevant(items);
|
||||
assert.equal(filtered.length, 1);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// deduplicateByUrl
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('deduplicateByUrl', () => {
|
||||
it('same URL appears only once, keeping the most recent by publishedAt', () => {
|
||||
const url = 'https://www.iea.org/news/duplicate-story';
|
||||
const older = { id: 'a', title: 'Old version', url, source: 'IEA', publishedAt: 1000, summary: '' };
|
||||
const newer = { id: 'b', title: 'Updated version', url, source: 'IEA', publishedAt: 2000, summary: '' };
|
||||
const items = [older, newer];
|
||||
|
||||
const deduped = deduplicateByUrl(items);
|
||||
assert.equal(deduped.length, 1);
|
||||
assert.equal(deduped[0].publishedAt, 2000);
|
||||
assert.equal(deduped[0].id, 'b');
|
||||
});
|
||||
|
||||
it('keeps distinct URLs unchanged', () => {
|
||||
const items = [
|
||||
{ id: '1', title: 'Story A', url: 'https://www.iea.org/a', source: 'IEA', publishedAt: 1000, summary: '' },
|
||||
{ id: '2', title: 'Story B', url: 'https://www.iea.org/b', source: 'IEA', publishedAt: 2000, summary: '' },
|
||||
];
|
||||
const deduped = deduplicateByUrl(items);
|
||||
assert.equal(deduped.length, 2);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Age filter integration
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('age filter', () => {
|
||||
it('item older than 30 days is excluded via AGE_LIMIT_MS threshold', () => {
|
||||
const now = Date.now();
|
||||
const oldTs = now - (31 * 24 * 3600 * 1000);
|
||||
const AGE_LIMIT_MS = 30 * 24 * 3600 * 1000;
|
||||
|
||||
const items = [
|
||||
{ id: 'old', title: 'Old oil report', url: 'https://example.com/old', source: 'IEA', publishedAt: oldTs, summary: '' },
|
||||
{ id: 'new', title: 'New gas update', url: 'https://example.com/new', source: 'IEA', publishedAt: now, summary: '' },
|
||||
];
|
||||
|
||||
const recent = items.filter((item) => item.publishedAt >= now - AGE_LIMIT_MS);
|
||||
assert.equal(recent.length, 1);
|
||||
assert.equal(recent[0].id, 'new');
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Exported key constants
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('exported constants', () => {
|
||||
it("ENERGY_INTELLIGENCE_KEY === 'energy:intelligence:feed:v1'", () => {
|
||||
assert.equal(ENERGY_INTELLIGENCE_KEY, 'energy:intelligence:feed:v1');
|
||||
});
|
||||
|
||||
it('INTELLIGENCE_TTL_SECONDS >= 24 * 3600 (24h minimum)', () => {
|
||||
assert.ok(
|
||||
INTELLIGENCE_TTL_SECONDS >= 24 * 3600,
|
||||
`TTL ${INTELLIGENCE_TTL_SECONDS}s is less than 24h minimum`,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// validate — the gate that controls skip vs. publish in runSeed
|
||||
// ---------------------------------------------------------------------------
|
||||
// OPEC is best-effort and OilPrice is the primary source, so fewer-than-3
|
||||
// items is a real production scenario. A regression here would ship with all
|
||||
// other tests green while runSeed silently extends old TTLs instead of writing.
|
||||
|
||||
describe('validate', () => {
|
||||
it('returns false for null', () => {
|
||||
assert.equal(validate(null), false);
|
||||
});
|
||||
|
||||
it('returns false when items is missing', () => {
|
||||
assert.equal(validate({}), false);
|
||||
});
|
||||
|
||||
it('returns false for fewer than 3 items', () => {
|
||||
assert.equal(validate({ items: [] }), false);
|
||||
assert.equal(validate({ items: [{ url: 'a' }] }), false);
|
||||
assert.equal(validate({ items: [{ url: 'a' }, { url: 'b' }] }), false);
|
||||
});
|
||||
|
||||
it('returns true for exactly 3 items', () => {
|
||||
assert.equal(validate({ items: [{ url: 'a' }, { url: 'b' }, { url: 'c' }] }), true);
|
||||
});
|
||||
|
||||
it('returns true for more than 3 items', () => {
|
||||
const items = Array.from({ length: 10 }, (_, i) => ({ url: `https://example.com/${i}` }));
|
||||
assert.equal(validate({ items }), true);
|
||||
});
|
||||
});
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// decodeHtmlEntities — numeric and extended named entity handling
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('decodeHtmlEntities via parseRssItems title', () => {
|
||||
const wrapInRss = (title) => `<rss version="2.0"><channel>
|
||||
<item>
|
||||
<title>${title}</title>
|
||||
<link>https://example.com/1</link>
|
||||
<pubDate>Sun, 05 Apr 2026 10:00:00 +0000</pubDate>
|
||||
</item>
|
||||
</channel></rss>`;
|
||||
|
||||
it('decodes numeric decimal entity ’ → right single quote', () => {
|
||||
const items = parseRssItems(wrapInRss('Europe’s gas storage'), 'Test');
|
||||
assert.ok(items[0].title.includes('\u2019'), `Expected right quote, got: ${items[0].title}`);
|
||||
});
|
||||
|
||||
it('decodes numeric hex entity ’ → right single quote', () => {
|
||||
const items = parseRssItems(wrapInRss('Europe’s gas'), 'Test');
|
||||
assert.ok(items[0].title.includes('\u2019'), `Expected right quote, got: ${items[0].title}`);
|
||||
});
|
||||
|
||||
it('decodes — → em dash', () => {
|
||||
const items = parseRssItems(wrapInRss('Oil prices — weekly review'), 'Test');
|
||||
assert.ok(items[0].title.includes('—'), `Expected em dash, got: ${items[0].title}`);
|
||||
});
|
||||
|
||||
it('decodes … → ellipsis', () => {
|
||||
const items = parseRssItems(wrapInRss('OPEC output cuts…'), 'Test');
|
||||
assert.ok(items[0].title.includes('…'), `Expected ellipsis, got: ${items[0].title}`);
|
||||
});
|
||||
|
||||
it('decodes ' → apostrophe', () => {
|
||||
const items = parseRssItems(wrapInRss('Europe's energy'), 'Test');
|
||||
assert.ok(items[0].title.includes("'"), `Expected apostrophe, got: ${items[0].title}`);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user