feat(regulatory): add tier classification and Redis publish (#2691)

* feat(regulatory): add tier classification and Redis publish

Builds on the fetch/parse layer from #2564. Adds keyword-based tier
classification (high/medium/low/unknown) and publishes to Redis via
runSeed with 6h TTL.

- HIGH: enforcement, fraud, penalty, injunction, etc.
- MEDIUM: rulemaking, guidance, investigation, etc.
- LOW: routine notices matching title patterns
- Register REGULATORY_ACTIONS_KEY in cache-keys.ts

Closes #2493

Co-authored-by: Lucas Passos <lspassos1@users.noreply.github.com>

* fix(regulatory): reject empty payloads, add health monitoring

- validateFn now requires actions.length > 0 to prevent overwriting
  a healthy snapshot with an empty one on parser regression
- Register regulatory:actions:v1 in STANDALONE_KEYS (api/health.js)
- Add seed-meta:regulatory:actions to SEED_META (maxStaleMin: 360,
  3x the 2h cron interval)
- Add seed-health.js monitoring (intervalMin: 120)

---------

Co-authored-by: Lucas Passos <lspassos1@users.noreply.github.com>
This commit is contained in:
Elie Habib
2026-04-04 23:19:33 +04:00
committed by GitHub
parent 8609ad1384
commit f210c5511a
6 changed files with 274 additions and 4 deletions

View File

@@ -132,6 +132,7 @@ const STANDALONE_KEYS = {
resilienceStaticIndex: 'resilience:static:index:v1',
productCatalog: 'product-catalog:v2',
energyExposure: 'energy:exposure:v1:index',
regulatoryActions: 'regulatory:actions:v1',
};
const SEED_META = {
@@ -246,6 +247,7 @@ const SEED_META = {
vpdTrackerHistorical: { key: 'seed-meta:health:vpd-tracker', maxStaleMin: 2880 }, // shares seed-meta key with vpdTrackerRealtime (same run)
resilienceStaticIndex: { key: 'seed-meta:resilience:static', maxStaleMin: 576000 }, // annual October snapshot; 400d threshold matches TTL and preserves prior-year data on source outages
energyExposure: { key: 'seed-meta:economic:owid-energy-mix', maxStaleMin: 50400 }, // monthly cron on 1st; 50400min = 35d = TTL matches cron cadence + 5d buffer
regulatoryActions: { key: 'seed-meta:regulatory:actions', maxStaleMin: 360 }, // 2h cron; 360min = 3x interval
};
// Standalone keys that are populated on-demand by RPC handlers (not seeds).

View File

@@ -65,6 +65,7 @@ const SEED_DOMAINS = {
'economic:grocery-basket': { key: 'seed-meta:economic:grocery-basket', intervalMin: 5040 }, // weekly seed; intervalMin = maxStaleMin / 2
'economic:bigmac': { key: 'seed-meta:economic:bigmac', intervalMin: 5040 }, // weekly seed; intervalMin = maxStaleMin / 2
'resilience:static': { key: 'seed-meta:resilience:static', intervalMin: 288000 }, // annual October snapshot; intervalMin = health.js maxStaleMin / 2 (400d alert threshold)
'regulatory:actions': { key: 'seed-meta:regulatory:actions', intervalMin: 120 }, // 2h cron; intervalMin = maxStaleMin / 3
};
async function getMetaBatch(keys) {

View File

@@ -2,12 +2,33 @@
// @ts-check
import { pathToFileURL } from 'node:url';
import { CHROME_UA } from './_seed-utils.mjs';
import { CHROME_UA, loadEnvFile, runSeed } from './_seed-utils.mjs';
loadEnvFile(import.meta.url);
const CANONICAL_KEY = 'regulatory:actions:v1';
const FEED_TIMEOUT_MS = 15_000;
const TTL_SECONDS = 21600;
const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*';
const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)';
const DEFAULT_FETCH = (...args) => globalThis.fetch(...args);
const HIGH_KEYWORDS = [
'enforcement', 'charges', 'charged', 'fraud', 'failure', 'failed bank',
'emergency', 'halt', 'suspension', 'suspended', 'cease', 'desist',
'penalty', 'fine', 'fined', 'settlement', 'indictment', 'manipulation',
'ban', 'revocation', 'insolvency', 'injunction', 'cease and desist',
'cease-and-desist', 'consent order', 'debarment', 'suspension order',
];
const MEDIUM_KEYWORDS = [
'proposed rule', 'final rule', 'rulemaking', 'guidance', 'warning',
'advisory', 'review', 'examination', 'investigation',
'stress test', 'capital requirement', 'disclosure requirement',
'resolves action', 'settled charges', 'administrative proceeding', 'remedial action',
];
const LOW_PRIORITY_TITLE_PATTERNS = [
/^(Regulatory|Information|Technical) Notice\b/i,
/\bmonthly (highlights|bulletin)\b/i,
];
const REGULATORY_FEEDS = [
{ agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT },
@@ -229,10 +250,77 @@ async function fetchAllFeeds(fetchImpl = DEFAULT_FETCH, feeds = REGULATORY_FEEDS
return dedupeAndSortActions(actions);
}
async function main(fetchImpl = DEFAULT_FETCH) {
function escapeRegex(value) {
return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
function compileKeywordPattern(keyword) {
const pattern = `\\b${escapeRegex(keyword.toLowerCase()).replace(/\s+/g, '\\s+')}\\b`;
return { keyword, regex: new RegExp(pattern, 'i') };
}
const HIGH_KEYWORD_PATTERNS = HIGH_KEYWORDS.map(compileKeywordPattern);
const MEDIUM_KEYWORD_PATTERNS = MEDIUM_KEYWORDS.map(compileKeywordPattern);
function findMatchedKeywords(text, keywordPatterns) {
const normalizedText = stripHtml(text).toLowerCase();
return keywordPatterns.filter(({ regex }) => regex.test(normalizedText)).map(({ keyword }) => keyword);
}
function buildClassificationText(action) {
return [action.title, action.description].filter(Boolean).join(' ');
}
function isLowPriorityRoutineTitle(title) {
const normalizedTitle = stripHtml(title);
return LOW_PRIORITY_TITLE_PATTERNS.some((pattern) => pattern.test(normalizedTitle));
}
function classifyAction(action) {
const classificationText = buildClassificationText(action);
const highMatches = findMatchedKeywords(classificationText, HIGH_KEYWORD_PATTERNS);
if (highMatches.length > 0) {
return { ...action, tier: 'high', matchedKeywords: [...new Set(highMatches)] };
}
if (isLowPriorityRoutineTitle(action.title)) {
return { ...action, tier: 'low', matchedKeywords: [] };
}
const mediumMatches = findMatchedKeywords(classificationText, MEDIUM_KEYWORD_PATTERNS);
if (mediumMatches.length > 0) {
return { ...action, tier: 'medium', matchedKeywords: [...new Set(mediumMatches)] };
}
return { ...action, tier: 'unknown', matchedKeywords: [] };
}
function buildSeedPayload(actions, fetchedAt = Date.now()) {
const classified = actions.map(classifyAction);
const highCount = classified.filter((action) => action.tier === 'high').length;
const mediumCount = classified.filter((action) => action.tier === 'medium').length;
return {
actions: classified,
fetchedAt,
recordCount: classified.length,
highCount,
mediumCount,
};
}
async function fetchRegulatoryActionPayload(fetchImpl = DEFAULT_FETCH) {
const actions = await fetchAllFeeds(fetchImpl);
process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`);
return actions;
return buildSeedPayload(actions, Date.now());
}
async function main(fetchImpl = DEFAULT_FETCH, runSeedImpl = runSeed) {
return runSeedImpl('regulatory', 'actions', CANONICAL_KEY, () => fetchRegulatoryActionPayload(fetchImpl), {
ttlSeconds: TTL_SECONDS,
validateFn: (data) => Array.isArray(data?.actions) && data.actions.length > 0,
recordCount: (data) => data?.recordCount || 0,
sourceVersion: 'regulatory-rss-v1',
});
}
const isDirectRun = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href;
@@ -245,18 +333,27 @@ if (isDirectRun) {
}
export {
CANONICAL_KEY,
CHROME_UA,
FEED_TIMEOUT_MS,
HIGH_KEYWORDS,
MEDIUM_KEYWORDS,
REGULATORY_FEEDS,
SEC_USER_AGENT,
TTL_SECONDS,
buildActionId,
buildSeedPayload,
canonicalizeLink,
classifyAction,
decodeEntities,
dedupeAndSortActions,
extractAtomLink,
fetchAllFeeds,
fetchFeed,
fetchRegulatoryActionPayload,
findMatchedKeywords,
getTagValue,
isLowPriorityRoutineTitle,
main,
normalizeFeedItems,
parseAtomEntries,

View File

@@ -39,6 +39,7 @@ export const DIGEST_ACCUMULATOR_TTL = 172800; // 48h — lookback window for dig
*/
export const SIMULATION_OUTCOME_LATEST_KEY = 'forecast:simulation-outcome:latest';
export const SIMULATION_PACKAGE_LATEST_KEY = 'forecast:simulation-package:latest';
export const REGULATORY_ACTIONS_KEY = 'regulatory:actions:v1';
export const CLIMATE_ANOMALIES_KEY = 'climate:anomalies:v2';
export const CLIMATE_AIR_QUALITY_KEY = 'climate:air-quality:v1';
export const CLIMATE_ZONE_NORMALS_KEY = 'climate:zone-normals:v1';

View File

@@ -0,0 +1,18 @@
import { describe, it } from 'node:test';
import assert from 'node:assert/strict';
import { readFileSync } from 'node:fs';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';
const __dirname = dirname(fileURLToPath(import.meta.url));
const root = join(__dirname, '..');
describe('regulatory cache contracts', () => {
it('exports REGULATORY_ACTIONS_KEY from cache-keys.ts', () => {
const cacheKeysSrc = readFileSync(join(root, 'server', '_shared', 'cache-keys.ts'), 'utf8');
assert.match(
cacheKeysSrc,
/export const REGULATORY_ACTIONS_KEY = 'regulatory:actions:v1';/
);
});
});

View File

@@ -11,6 +11,7 @@ const seedSrc = readFileSync('scripts/seed-regulatory-actions.mjs', 'utf8');
const pureSrc = seedSrc
.replace(/^import\s.*$/gm, '')
.replace(/loadEnvFile\([^)]+\);\n/, '')
.replace(/const isDirectRun[\s\S]*?}\n\nexport\s*{[\s\S]*?};?\s*$/m, '');
const ctx = vm.createContext({
@@ -26,6 +27,8 @@ const ctx = vm.createContext({
URLSearchParams,
AbortSignal,
CHROME_UA: 'Mozilla/5.0 (test)',
loadEnvFile: () => {},
runSeed: async () => {},
});
vm.runInContext(pureSrc, ctx);
@@ -40,6 +43,10 @@ const {
normalizeFeedItems,
dedupeAndSortActions,
fetchAllFeeds,
classifyAction,
buildSeedPayload,
fetchRegulatoryActionPayload,
main,
} = ctx;
describe('decodeEntities', () => {
@@ -211,3 +218,147 @@ describe('fetchAllFeeds', () => {
);
});
});
describe('classifyAction', () => {
it('marks high priority actions from combined title and description text', () => {
const action = normalize(classifyAction({
id: 'sec-a',
agency: 'SEC',
title: 'SEC action against issuer',
description: 'The SEC secured a permanent injunction for accounting fraud.',
link: 'https://example.test/sec-a',
publishedAt: '2026-03-30T18:00:00.000Z',
}));
assert.equal(action.tier, 'high');
assert.deepEqual(action.matchedKeywords, ['fraud', 'injunction']);
});
it('marks medium actions from description text', () => {
const medium = normalize(classifyAction({
id: 'fed-a',
agency: 'Federal Reserve',
title: 'Federal Reserve update',
description: 'The board resolves action through a remedial action plan.',
link: 'https://example.test/fed-a',
publishedAt: '2026-03-30T18:00:00.000Z',
}));
assert.equal(medium.tier, 'medium');
assert.deepEqual(medium.matchedKeywords, ['resolves action', 'remedial action']);
});
it('uses low only for explicit routine notice titles', () => {
const low = normalize(classifyAction({
id: 'finra-a',
agency: 'FINRA',
title: 'Technical Notice 26-01',
description: 'Routine operational bulletin for members.',
link: 'https://example.test/finra-a',
publishedAt: '2026-03-30T18:00:00.000Z',
}));
assert.equal(low.tier, 'low');
assert.deepEqual(low.matchedKeywords, []);
});
it('falls back to unknown for unmatched actions', () => {
const unknown = normalize(classifyAction({
id: 'fdic-a',
agency: 'FDIC',
title: 'FDIC consumer outreach update',
description: 'General event recap for community stakeholders.',
link: 'https://example.test/fdic-a',
publishedAt: '2026-03-30T18:00:00.000Z',
}));
assert.equal(unknown.tier, 'unknown');
assert.deepEqual(unknown.matchedKeywords, []);
});
});
describe('buildSeedPayload', () => {
it('adds fetchedAt and aggregate counts', () => {
const payload = normalize(buildSeedPayload([
{
id: 'sec-a',
agency: 'SEC',
title: 'SEC action against issuer',
description: 'The SEC secured a permanent injunction for accounting fraud.',
link: 'https://example.test/sec-a',
publishedAt: '2026-03-30T18:00:00.000Z',
},
{
id: 'fed-a',
agency: 'Federal Reserve',
title: 'Federal Reserve update',
description: 'The board resolves action through a remedial action plan.',
link: 'https://example.test/fed-a',
publishedAt: '2026-03-29T18:00:00.000Z',
},
{
id: 'finra-a',
agency: 'FINRA',
title: 'Regulatory Notice 26-01',
description: 'Routine bulletin for members.',
link: 'https://example.test/finra-a',
publishedAt: '2026-03-28T18:00:00.000Z',
},
{
id: 'fdic-a',
agency: 'FDIC',
title: 'FDIC consumer outreach update',
description: 'General event recap for community stakeholders.',
link: 'https://example.test/fdic-a',
publishedAt: '2026-03-27T18:00:00.000Z',
},
], 1711718400000));
assert.equal(payload.fetchedAt, 1711718400000);
assert.equal(payload.recordCount, 4);
assert.equal(payload.highCount, 1);
assert.equal(payload.mediumCount, 1);
assert.equal(payload.actions[2].tier, 'low');
assert.equal(payload.actions[3].tier, 'unknown');
});
});
describe('fetchRegulatoryActionPayload', () => {
it('returns classified payload from fetched actions', async () => {
const payload = normalize(await fetchRegulatoryActionPayload(async (url) => ({
ok: true,
text: async () => `<rss><channel><item><title>FDIC update</title><description>FDIC resolves action through a remedial action plan.</description><link>${url}/item</link><pubDate>Mon, 30 Mar 2026 18:00:00 GMT</pubDate></item></channel></rss>`,
})));
assert.equal(payload.actions.length, 6);
assert.equal(payload.recordCount, 6);
assert.ok(typeof payload.fetchedAt === 'number');
assert.equal(payload.actions[0].tier, 'medium');
assert.deepEqual(payload.actions[0].matchedKeywords, ['resolves action', 'remedial action']);
});
});
describe('main', () => {
it('wires runSeed with the regulatory key, TTL, and validateFn', async () => {
const calls = [];
const runSeedStub = async (domain, resource, canonicalKey, fetchFn, opts) => {
calls.push({ domain, resource, canonicalKey, opts, payload: await fetchFn() });
return 'ok';
};
const fetchStub = async (url) => ({
ok: true,
text: async () => `<rss><channel><item><title>CFTC Issues Advisory</title><link>${url}/item</link><pubDate>Mon, 30 Mar 2026 18:00:00 GMT</pubDate></item></channel></rss>`,
});
const result = await main(fetchStub, runSeedStub);
assert.equal(result, 'ok');
assert.equal(calls.length, 1);
assert.equal(calls[0].domain, 'regulatory');
assert.equal(calls[0].resource, 'actions');
assert.equal(calls[0].canonicalKey, 'regulatory:actions:v1');
assert.equal(calls[0].opts.ttlSeconds, 21600);
assert.equal(calls[0].opts.validateFn({ actions: [] }), false);
assert.equal(calls[0].opts.validateFn({ actions: [{ id: 'a' }] }), true);
assert.equal(calls[0].payload.recordCount, 6);
});
});