mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
feat(regulatory): add tier classification and Redis publish (#2691)
* feat(regulatory): add tier classification and Redis publish Builds on the fetch/parse layer from #2564. Adds keyword-based tier classification (high/medium/low/unknown) and publishes to Redis via runSeed with 6h TTL. - HIGH: enforcement, fraud, penalty, injunction, etc. - MEDIUM: rulemaking, guidance, investigation, etc. - LOW: routine notices matching title patterns - Register REGULATORY_ACTIONS_KEY in cache-keys.ts Closes #2493 Co-authored-by: Lucas Passos <lspassos1@users.noreply.github.com> * fix(regulatory): reject empty payloads, add health monitoring - validateFn now requires actions.length > 0 to prevent overwriting a healthy snapshot with an empty one on parser regression - Register regulatory:actions:v1 in STANDALONE_KEYS (api/health.js) - Add seed-meta:regulatory:actions to SEED_META (maxStaleMin: 360, 3x the 2h cron interval) - Add seed-health.js monitoring (intervalMin: 120) --------- Co-authored-by: Lucas Passos <lspassos1@users.noreply.github.com>
This commit is contained in:
@@ -132,6 +132,7 @@ const STANDALONE_KEYS = {
|
||||
resilienceStaticIndex: 'resilience:static:index:v1',
|
||||
productCatalog: 'product-catalog:v2',
|
||||
energyExposure: 'energy:exposure:v1:index',
|
||||
regulatoryActions: 'regulatory:actions:v1',
|
||||
};
|
||||
|
||||
const SEED_META = {
|
||||
@@ -246,6 +247,7 @@ const SEED_META = {
|
||||
vpdTrackerHistorical: { key: 'seed-meta:health:vpd-tracker', maxStaleMin: 2880 }, // shares seed-meta key with vpdTrackerRealtime (same run)
|
||||
resilienceStaticIndex: { key: 'seed-meta:resilience:static', maxStaleMin: 576000 }, // annual October snapshot; 400d threshold matches TTL and preserves prior-year data on source outages
|
||||
energyExposure: { key: 'seed-meta:economic:owid-energy-mix', maxStaleMin: 50400 }, // monthly cron on 1st; 50400min = 35d = TTL matches cron cadence + 5d buffer
|
||||
regulatoryActions: { key: 'seed-meta:regulatory:actions', maxStaleMin: 360 }, // 2h cron; 360min = 3x interval
|
||||
};
|
||||
|
||||
// Standalone keys that are populated on-demand by RPC handlers (not seeds).
|
||||
|
||||
@@ -65,6 +65,7 @@ const SEED_DOMAINS = {
|
||||
'economic:grocery-basket': { key: 'seed-meta:economic:grocery-basket', intervalMin: 5040 }, // weekly seed; intervalMin = maxStaleMin / 2
|
||||
'economic:bigmac': { key: 'seed-meta:economic:bigmac', intervalMin: 5040 }, // weekly seed; intervalMin = maxStaleMin / 2
|
||||
'resilience:static': { key: 'seed-meta:resilience:static', intervalMin: 288000 }, // annual October snapshot; intervalMin = health.js maxStaleMin / 2 (400d alert threshold)
|
||||
'regulatory:actions': { key: 'seed-meta:regulatory:actions', intervalMin: 120 }, // 2h cron; intervalMin = maxStaleMin / 3
|
||||
};
|
||||
|
||||
async function getMetaBatch(keys) {
|
||||
|
||||
@@ -2,12 +2,33 @@
|
||||
// @ts-check
|
||||
|
||||
import { pathToFileURL } from 'node:url';
|
||||
import { CHROME_UA } from './_seed-utils.mjs';
|
||||
import { CHROME_UA, loadEnvFile, runSeed } from './_seed-utils.mjs';
|
||||
|
||||
loadEnvFile(import.meta.url);
|
||||
|
||||
const CANONICAL_KEY = 'regulatory:actions:v1';
|
||||
const FEED_TIMEOUT_MS = 15_000;
|
||||
const TTL_SECONDS = 21600;
|
||||
const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*';
|
||||
const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)';
|
||||
const DEFAULT_FETCH = (...args) => globalThis.fetch(...args);
|
||||
const HIGH_KEYWORDS = [
|
||||
'enforcement', 'charges', 'charged', 'fraud', 'failure', 'failed bank',
|
||||
'emergency', 'halt', 'suspension', 'suspended', 'cease', 'desist',
|
||||
'penalty', 'fine', 'fined', 'settlement', 'indictment', 'manipulation',
|
||||
'ban', 'revocation', 'insolvency', 'injunction', 'cease and desist',
|
||||
'cease-and-desist', 'consent order', 'debarment', 'suspension order',
|
||||
];
|
||||
const MEDIUM_KEYWORDS = [
|
||||
'proposed rule', 'final rule', 'rulemaking', 'guidance', 'warning',
|
||||
'advisory', 'review', 'examination', 'investigation',
|
||||
'stress test', 'capital requirement', 'disclosure requirement',
|
||||
'resolves action', 'settled charges', 'administrative proceeding', 'remedial action',
|
||||
];
|
||||
const LOW_PRIORITY_TITLE_PATTERNS = [
|
||||
/^(Regulatory|Information|Technical) Notice\b/i,
|
||||
/\bmonthly (highlights|bulletin)\b/i,
|
||||
];
|
||||
|
||||
const REGULATORY_FEEDS = [
|
||||
{ agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT },
|
||||
@@ -229,10 +250,77 @@ async function fetchAllFeeds(fetchImpl = DEFAULT_FETCH, feeds = REGULATORY_FEEDS
|
||||
return dedupeAndSortActions(actions);
|
||||
}
|
||||
|
||||
async function main(fetchImpl = DEFAULT_FETCH) {
|
||||
function escapeRegex(value) {
|
||||
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||
}
|
||||
|
||||
function compileKeywordPattern(keyword) {
|
||||
const pattern = `\\b${escapeRegex(keyword.toLowerCase()).replace(/\s+/g, '\\s+')}\\b`;
|
||||
return { keyword, regex: new RegExp(pattern, 'i') };
|
||||
}
|
||||
|
||||
const HIGH_KEYWORD_PATTERNS = HIGH_KEYWORDS.map(compileKeywordPattern);
|
||||
const MEDIUM_KEYWORD_PATTERNS = MEDIUM_KEYWORDS.map(compileKeywordPattern);
|
||||
|
||||
function findMatchedKeywords(text, keywordPatterns) {
|
||||
const normalizedText = stripHtml(text).toLowerCase();
|
||||
return keywordPatterns.filter(({ regex }) => regex.test(normalizedText)).map(({ keyword }) => keyword);
|
||||
}
|
||||
|
||||
function buildClassificationText(action) {
|
||||
return [action.title, action.description].filter(Boolean).join(' ');
|
||||
}
|
||||
|
||||
function isLowPriorityRoutineTitle(title) {
|
||||
const normalizedTitle = stripHtml(title);
|
||||
return LOW_PRIORITY_TITLE_PATTERNS.some((pattern) => pattern.test(normalizedTitle));
|
||||
}
|
||||
|
||||
function classifyAction(action) {
|
||||
const classificationText = buildClassificationText(action);
|
||||
const highMatches = findMatchedKeywords(classificationText, HIGH_KEYWORD_PATTERNS);
|
||||
if (highMatches.length > 0) {
|
||||
return { ...action, tier: 'high', matchedKeywords: [...new Set(highMatches)] };
|
||||
}
|
||||
|
||||
if (isLowPriorityRoutineTitle(action.title)) {
|
||||
return { ...action, tier: 'low', matchedKeywords: [] };
|
||||
}
|
||||
|
||||
const mediumMatches = findMatchedKeywords(classificationText, MEDIUM_KEYWORD_PATTERNS);
|
||||
if (mediumMatches.length > 0) {
|
||||
return { ...action, tier: 'medium', matchedKeywords: [...new Set(mediumMatches)] };
|
||||
}
|
||||
|
||||
return { ...action, tier: 'unknown', matchedKeywords: [] };
|
||||
}
|
||||
|
||||
function buildSeedPayload(actions, fetchedAt = Date.now()) {
|
||||
const classified = actions.map(classifyAction);
|
||||
const highCount = classified.filter((action) => action.tier === 'high').length;
|
||||
const mediumCount = classified.filter((action) => action.tier === 'medium').length;
|
||||
|
||||
return {
|
||||
actions: classified,
|
||||
fetchedAt,
|
||||
recordCount: classified.length,
|
||||
highCount,
|
||||
mediumCount,
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchRegulatoryActionPayload(fetchImpl = DEFAULT_FETCH) {
|
||||
const actions = await fetchAllFeeds(fetchImpl);
|
||||
process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`);
|
||||
return actions;
|
||||
return buildSeedPayload(actions, Date.now());
|
||||
}
|
||||
|
||||
async function main(fetchImpl = DEFAULT_FETCH, runSeedImpl = runSeed) {
|
||||
return runSeedImpl('regulatory', 'actions', CANONICAL_KEY, () => fetchRegulatoryActionPayload(fetchImpl), {
|
||||
ttlSeconds: TTL_SECONDS,
|
||||
validateFn: (data) => Array.isArray(data?.actions) && data.actions.length > 0,
|
||||
recordCount: (data) => data?.recordCount || 0,
|
||||
sourceVersion: 'regulatory-rss-v1',
|
||||
});
|
||||
}
|
||||
|
||||
const isDirectRun = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href;
|
||||
@@ -245,18 +333,27 @@ if (isDirectRun) {
|
||||
}
|
||||
|
||||
export {
|
||||
CANONICAL_KEY,
|
||||
CHROME_UA,
|
||||
FEED_TIMEOUT_MS,
|
||||
HIGH_KEYWORDS,
|
||||
MEDIUM_KEYWORDS,
|
||||
REGULATORY_FEEDS,
|
||||
SEC_USER_AGENT,
|
||||
TTL_SECONDS,
|
||||
buildActionId,
|
||||
buildSeedPayload,
|
||||
canonicalizeLink,
|
||||
classifyAction,
|
||||
decodeEntities,
|
||||
dedupeAndSortActions,
|
||||
extractAtomLink,
|
||||
fetchAllFeeds,
|
||||
fetchFeed,
|
||||
fetchRegulatoryActionPayload,
|
||||
findMatchedKeywords,
|
||||
getTagValue,
|
||||
isLowPriorityRoutineTitle,
|
||||
main,
|
||||
normalizeFeedItems,
|
||||
parseAtomEntries,
|
||||
|
||||
@@ -39,6 +39,7 @@ export const DIGEST_ACCUMULATOR_TTL = 172800; // 48h — lookback window for dig
|
||||
*/
|
||||
export const SIMULATION_OUTCOME_LATEST_KEY = 'forecast:simulation-outcome:latest';
|
||||
export const SIMULATION_PACKAGE_LATEST_KEY = 'forecast:simulation-package:latest';
|
||||
export const REGULATORY_ACTIONS_KEY = 'regulatory:actions:v1';
|
||||
export const CLIMATE_ANOMALIES_KEY = 'climate:anomalies:v2';
|
||||
export const CLIMATE_AIR_QUALITY_KEY = 'climate:air-quality:v1';
|
||||
export const CLIMATE_ZONE_NORMALS_KEY = 'climate:zone-normals:v1';
|
||||
|
||||
18
tests/regulatory-contract.test.mjs
Normal file
18
tests/regulatory-contract.test.mjs
Normal file
@@ -0,0 +1,18 @@
|
||||
import { describe, it } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
import { readFileSync } from 'node:fs';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = dirname(fileURLToPath(import.meta.url));
|
||||
const root = join(__dirname, '..');
|
||||
|
||||
describe('regulatory cache contracts', () => {
|
||||
it('exports REGULATORY_ACTIONS_KEY from cache-keys.ts', () => {
|
||||
const cacheKeysSrc = readFileSync(join(root, 'server', '_shared', 'cache-keys.ts'), 'utf8');
|
||||
assert.match(
|
||||
cacheKeysSrc,
|
||||
/export const REGULATORY_ACTIONS_KEY = 'regulatory:actions:v1';/
|
||||
);
|
||||
});
|
||||
});
|
||||
@@ -11,6 +11,7 @@ const seedSrc = readFileSync('scripts/seed-regulatory-actions.mjs', 'utf8');
|
||||
|
||||
const pureSrc = seedSrc
|
||||
.replace(/^import\s.*$/gm, '')
|
||||
.replace(/loadEnvFile\([^)]+\);\n/, '')
|
||||
.replace(/const isDirectRun[\s\S]*?}\n\nexport\s*{[\s\S]*?};?\s*$/m, '');
|
||||
|
||||
const ctx = vm.createContext({
|
||||
@@ -26,6 +27,8 @@ const ctx = vm.createContext({
|
||||
URLSearchParams,
|
||||
AbortSignal,
|
||||
CHROME_UA: 'Mozilla/5.0 (test)',
|
||||
loadEnvFile: () => {},
|
||||
runSeed: async () => {},
|
||||
});
|
||||
|
||||
vm.runInContext(pureSrc, ctx);
|
||||
@@ -40,6 +43,10 @@ const {
|
||||
normalizeFeedItems,
|
||||
dedupeAndSortActions,
|
||||
fetchAllFeeds,
|
||||
classifyAction,
|
||||
buildSeedPayload,
|
||||
fetchRegulatoryActionPayload,
|
||||
main,
|
||||
} = ctx;
|
||||
|
||||
describe('decodeEntities', () => {
|
||||
@@ -211,3 +218,147 @@ describe('fetchAllFeeds', () => {
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('classifyAction', () => {
|
||||
it('marks high priority actions from combined title and description text', () => {
|
||||
const action = normalize(classifyAction({
|
||||
id: 'sec-a',
|
||||
agency: 'SEC',
|
||||
title: 'SEC action against issuer',
|
||||
description: 'The SEC secured a permanent injunction for accounting fraud.',
|
||||
link: 'https://example.test/sec-a',
|
||||
publishedAt: '2026-03-30T18:00:00.000Z',
|
||||
}));
|
||||
|
||||
assert.equal(action.tier, 'high');
|
||||
assert.deepEqual(action.matchedKeywords, ['fraud', 'injunction']);
|
||||
});
|
||||
|
||||
it('marks medium actions from description text', () => {
|
||||
const medium = normalize(classifyAction({
|
||||
id: 'fed-a',
|
||||
agency: 'Federal Reserve',
|
||||
title: 'Federal Reserve update',
|
||||
description: 'The board resolves action through a remedial action plan.',
|
||||
link: 'https://example.test/fed-a',
|
||||
publishedAt: '2026-03-30T18:00:00.000Z',
|
||||
}));
|
||||
|
||||
assert.equal(medium.tier, 'medium');
|
||||
assert.deepEqual(medium.matchedKeywords, ['resolves action', 'remedial action']);
|
||||
});
|
||||
|
||||
it('uses low only for explicit routine notice titles', () => {
|
||||
const low = normalize(classifyAction({
|
||||
id: 'finra-a',
|
||||
agency: 'FINRA',
|
||||
title: 'Technical Notice 26-01',
|
||||
description: 'Routine operational bulletin for members.',
|
||||
link: 'https://example.test/finra-a',
|
||||
publishedAt: '2026-03-30T18:00:00.000Z',
|
||||
}));
|
||||
|
||||
assert.equal(low.tier, 'low');
|
||||
assert.deepEqual(low.matchedKeywords, []);
|
||||
});
|
||||
|
||||
it('falls back to unknown for unmatched actions', () => {
|
||||
const unknown = normalize(classifyAction({
|
||||
id: 'fdic-a',
|
||||
agency: 'FDIC',
|
||||
title: 'FDIC consumer outreach update',
|
||||
description: 'General event recap for community stakeholders.',
|
||||
link: 'https://example.test/fdic-a',
|
||||
publishedAt: '2026-03-30T18:00:00.000Z',
|
||||
}));
|
||||
|
||||
assert.equal(unknown.tier, 'unknown');
|
||||
assert.deepEqual(unknown.matchedKeywords, []);
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildSeedPayload', () => {
|
||||
it('adds fetchedAt and aggregate counts', () => {
|
||||
const payload = normalize(buildSeedPayload([
|
||||
{
|
||||
id: 'sec-a',
|
||||
agency: 'SEC',
|
||||
title: 'SEC action against issuer',
|
||||
description: 'The SEC secured a permanent injunction for accounting fraud.',
|
||||
link: 'https://example.test/sec-a',
|
||||
publishedAt: '2026-03-30T18:00:00.000Z',
|
||||
},
|
||||
{
|
||||
id: 'fed-a',
|
||||
agency: 'Federal Reserve',
|
||||
title: 'Federal Reserve update',
|
||||
description: 'The board resolves action through a remedial action plan.',
|
||||
link: 'https://example.test/fed-a',
|
||||
publishedAt: '2026-03-29T18:00:00.000Z',
|
||||
},
|
||||
{
|
||||
id: 'finra-a',
|
||||
agency: 'FINRA',
|
||||
title: 'Regulatory Notice 26-01',
|
||||
description: 'Routine bulletin for members.',
|
||||
link: 'https://example.test/finra-a',
|
||||
publishedAt: '2026-03-28T18:00:00.000Z',
|
||||
},
|
||||
{
|
||||
id: 'fdic-a',
|
||||
agency: 'FDIC',
|
||||
title: 'FDIC consumer outreach update',
|
||||
description: 'General event recap for community stakeholders.',
|
||||
link: 'https://example.test/fdic-a',
|
||||
publishedAt: '2026-03-27T18:00:00.000Z',
|
||||
},
|
||||
], 1711718400000));
|
||||
|
||||
assert.equal(payload.fetchedAt, 1711718400000);
|
||||
assert.equal(payload.recordCount, 4);
|
||||
assert.equal(payload.highCount, 1);
|
||||
assert.equal(payload.mediumCount, 1);
|
||||
assert.equal(payload.actions[2].tier, 'low');
|
||||
assert.equal(payload.actions[3].tier, 'unknown');
|
||||
});
|
||||
});
|
||||
|
||||
describe('fetchRegulatoryActionPayload', () => {
|
||||
it('returns classified payload from fetched actions', async () => {
|
||||
const payload = normalize(await fetchRegulatoryActionPayload(async (url) => ({
|
||||
ok: true,
|
||||
text: async () => `<rss><channel><item><title>FDIC update</title><description>FDIC resolves action through a remedial action plan.</description><link>${url}/item</link><pubDate>Mon, 30 Mar 2026 18:00:00 GMT</pubDate></item></channel></rss>`,
|
||||
})));
|
||||
|
||||
assert.equal(payload.actions.length, 6);
|
||||
assert.equal(payload.recordCount, 6);
|
||||
assert.ok(typeof payload.fetchedAt === 'number');
|
||||
assert.equal(payload.actions[0].tier, 'medium');
|
||||
assert.deepEqual(payload.actions[0].matchedKeywords, ['resolves action', 'remedial action']);
|
||||
});
|
||||
});
|
||||
|
||||
describe('main', () => {
|
||||
it('wires runSeed with the regulatory key, TTL, and validateFn', async () => {
|
||||
const calls = [];
|
||||
const runSeedStub = async (domain, resource, canonicalKey, fetchFn, opts) => {
|
||||
calls.push({ domain, resource, canonicalKey, opts, payload: await fetchFn() });
|
||||
return 'ok';
|
||||
};
|
||||
const fetchStub = async (url) => ({
|
||||
ok: true,
|
||||
text: async () => `<rss><channel><item><title>CFTC Issues Advisory</title><link>${url}/item</link><pubDate>Mon, 30 Mar 2026 18:00:00 GMT</pubDate></item></channel></rss>`,
|
||||
});
|
||||
|
||||
const result = await main(fetchStub, runSeedStub);
|
||||
assert.equal(result, 'ok');
|
||||
assert.equal(calls.length, 1);
|
||||
assert.equal(calls[0].domain, 'regulatory');
|
||||
assert.equal(calls[0].resource, 'actions');
|
||||
assert.equal(calls[0].canonicalKey, 'regulatory:actions:v1');
|
||||
assert.equal(calls[0].opts.ttlSeconds, 21600);
|
||||
assert.equal(calls[0].opts.validateFn({ actions: [] }), false);
|
||||
assert.equal(calls[0].opts.validateFn({ actions: [{ id: 'a' }] }), true);
|
||||
assert.equal(calls[0].payload.recordCount, 6);
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user