diff --git a/api/health.js b/api/health.js index b67d45425..735393cbc 100644 --- a/api/health.js +++ b/api/health.js @@ -132,6 +132,7 @@ const STANDALONE_KEYS = { resilienceStaticIndex: 'resilience:static:index:v1', productCatalog: 'product-catalog:v2', energyExposure: 'energy:exposure:v1:index', + regulatoryActions: 'regulatory:actions:v1', }; const SEED_META = { @@ -246,6 +247,7 @@ const SEED_META = { vpdTrackerHistorical: { key: 'seed-meta:health:vpd-tracker', maxStaleMin: 2880 }, // shares seed-meta key with vpdTrackerRealtime (same run) resilienceStaticIndex: { key: 'seed-meta:resilience:static', maxStaleMin: 576000 }, // annual October snapshot; 400d threshold matches TTL and preserves prior-year data on source outages energyExposure: { key: 'seed-meta:economic:owid-energy-mix', maxStaleMin: 50400 }, // monthly cron on 1st; 50400min = 35d = TTL matches cron cadence + 5d buffer + regulatoryActions: { key: 'seed-meta:regulatory:actions', maxStaleMin: 360 }, // 2h cron; 360min = 3x interval }; // Standalone keys that are populated on-demand by RPC handlers (not seeds). diff --git a/api/seed-health.js b/api/seed-health.js index ddbda27f7..970fe31a2 100644 --- a/api/seed-health.js +++ b/api/seed-health.js @@ -65,6 +65,7 @@ const SEED_DOMAINS = { 'economic:grocery-basket': { key: 'seed-meta:economic:grocery-basket', intervalMin: 5040 }, // weekly seed; intervalMin = maxStaleMin / 2 'economic:bigmac': { key: 'seed-meta:economic:bigmac', intervalMin: 5040 }, // weekly seed; intervalMin = maxStaleMin / 2 'resilience:static': { key: 'seed-meta:resilience:static', intervalMin: 288000 }, // annual October snapshot; intervalMin = health.js maxStaleMin / 2 (400d alert threshold) + 'regulatory:actions': { key: 'seed-meta:regulatory:actions', intervalMin: 120 }, // 2h cron; intervalMin = maxStaleMin / 3 }; async function getMetaBatch(keys) { diff --git a/scripts/seed-regulatory-actions.mjs b/scripts/seed-regulatory-actions.mjs index 844105b74..eadba4f11 100644 --- a/scripts/seed-regulatory-actions.mjs +++ b/scripts/seed-regulatory-actions.mjs @@ -2,12 +2,33 @@ // @ts-check import { pathToFileURL } from 'node:url'; -import { CHROME_UA } from './_seed-utils.mjs'; +import { CHROME_UA, loadEnvFile, runSeed } from './_seed-utils.mjs'; +loadEnvFile(import.meta.url); + +const CANONICAL_KEY = 'regulatory:actions:v1'; const FEED_TIMEOUT_MS = 15_000; +const TTL_SECONDS = 21600; const XML_ACCEPT = 'application/atom+xml, application/rss+xml, application/xml, text/xml, */*'; const SEC_USER_AGENT = 'WorldMonitor/2.0 (monitor@worldmonitor.app)'; const DEFAULT_FETCH = (...args) => globalThis.fetch(...args); +const HIGH_KEYWORDS = [ + 'enforcement', 'charges', 'charged', 'fraud', 'failure', 'failed bank', + 'emergency', 'halt', 'suspension', 'suspended', 'cease', 'desist', + 'penalty', 'fine', 'fined', 'settlement', 'indictment', 'manipulation', + 'ban', 'revocation', 'insolvency', 'injunction', 'cease and desist', + 'cease-and-desist', 'consent order', 'debarment', 'suspension order', +]; +const MEDIUM_KEYWORDS = [ + 'proposed rule', 'final rule', 'rulemaking', 'guidance', 'warning', + 'advisory', 'review', 'examination', 'investigation', + 'stress test', 'capital requirement', 'disclosure requirement', + 'resolves action', 'settled charges', 'administrative proceeding', 'remedial action', +]; +const LOW_PRIORITY_TITLE_PATTERNS = [ + /^(Regulatory|Information|Technical) Notice\b/i, + /\bmonthly (highlights|bulletin)\b/i, +]; const REGULATORY_FEEDS = [ { agency: 'SEC', url: 'https://www.sec.gov/news/pressreleases.rss', userAgent: SEC_USER_AGENT }, @@ -229,10 +250,77 @@ async function fetchAllFeeds(fetchImpl = DEFAULT_FETCH, feeds = REGULATORY_FEEDS return dedupeAndSortActions(actions); } -async function main(fetchImpl = DEFAULT_FETCH) { +function escapeRegex(value) { + return String(value).replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); +} + +function compileKeywordPattern(keyword) { + const pattern = `\\b${escapeRegex(keyword.toLowerCase()).replace(/\s+/g, '\\s+')}\\b`; + return { keyword, regex: new RegExp(pattern, 'i') }; +} + +const HIGH_KEYWORD_PATTERNS = HIGH_KEYWORDS.map(compileKeywordPattern); +const MEDIUM_KEYWORD_PATTERNS = MEDIUM_KEYWORDS.map(compileKeywordPattern); + +function findMatchedKeywords(text, keywordPatterns) { + const normalizedText = stripHtml(text).toLowerCase(); + return keywordPatterns.filter(({ regex }) => regex.test(normalizedText)).map(({ keyword }) => keyword); +} + +function buildClassificationText(action) { + return [action.title, action.description].filter(Boolean).join(' '); +} + +function isLowPriorityRoutineTitle(title) { + const normalizedTitle = stripHtml(title); + return LOW_PRIORITY_TITLE_PATTERNS.some((pattern) => pattern.test(normalizedTitle)); +} + +function classifyAction(action) { + const classificationText = buildClassificationText(action); + const highMatches = findMatchedKeywords(classificationText, HIGH_KEYWORD_PATTERNS); + if (highMatches.length > 0) { + return { ...action, tier: 'high', matchedKeywords: [...new Set(highMatches)] }; + } + + if (isLowPriorityRoutineTitle(action.title)) { + return { ...action, tier: 'low', matchedKeywords: [] }; + } + + const mediumMatches = findMatchedKeywords(classificationText, MEDIUM_KEYWORD_PATTERNS); + if (mediumMatches.length > 0) { + return { ...action, tier: 'medium', matchedKeywords: [...new Set(mediumMatches)] }; + } + + return { ...action, tier: 'unknown', matchedKeywords: [] }; +} + +function buildSeedPayload(actions, fetchedAt = Date.now()) { + const classified = actions.map(classifyAction); + const highCount = classified.filter((action) => action.tier === 'high').length; + const mediumCount = classified.filter((action) => action.tier === 'medium').length; + + return { + actions: classified, + fetchedAt, + recordCount: classified.length, + highCount, + mediumCount, + }; +} + +async function fetchRegulatoryActionPayload(fetchImpl = DEFAULT_FETCH) { const actions = await fetchAllFeeds(fetchImpl); - process.stdout.write(`${JSON.stringify(actions, null, 2)}\n`); - return actions; + return buildSeedPayload(actions, Date.now()); +} + +async function main(fetchImpl = DEFAULT_FETCH, runSeedImpl = runSeed) { + return runSeedImpl('regulatory', 'actions', CANONICAL_KEY, () => fetchRegulatoryActionPayload(fetchImpl), { + ttlSeconds: TTL_SECONDS, + validateFn: (data) => Array.isArray(data?.actions) && data.actions.length > 0, + recordCount: (data) => data?.recordCount || 0, + sourceVersion: 'regulatory-rss-v1', + }); } const isDirectRun = process.argv[1] && import.meta.url === pathToFileURL(process.argv[1]).href; @@ -245,18 +333,27 @@ if (isDirectRun) { } export { + CANONICAL_KEY, CHROME_UA, FEED_TIMEOUT_MS, + HIGH_KEYWORDS, + MEDIUM_KEYWORDS, REGULATORY_FEEDS, SEC_USER_AGENT, + TTL_SECONDS, buildActionId, + buildSeedPayload, canonicalizeLink, + classifyAction, decodeEntities, dedupeAndSortActions, extractAtomLink, fetchAllFeeds, fetchFeed, + fetchRegulatoryActionPayload, + findMatchedKeywords, getTagValue, + isLowPriorityRoutineTitle, main, normalizeFeedItems, parseAtomEntries, diff --git a/server/_shared/cache-keys.ts b/server/_shared/cache-keys.ts index 2500a8cd8..a2d1b3e34 100644 --- a/server/_shared/cache-keys.ts +++ b/server/_shared/cache-keys.ts @@ -39,6 +39,7 @@ export const DIGEST_ACCUMULATOR_TTL = 172800; // 48h — lookback window for dig */ export const SIMULATION_OUTCOME_LATEST_KEY = 'forecast:simulation-outcome:latest'; export const SIMULATION_PACKAGE_LATEST_KEY = 'forecast:simulation-package:latest'; +export const REGULATORY_ACTIONS_KEY = 'regulatory:actions:v1'; export const CLIMATE_ANOMALIES_KEY = 'climate:anomalies:v2'; export const CLIMATE_AIR_QUALITY_KEY = 'climate:air-quality:v1'; export const CLIMATE_ZONE_NORMALS_KEY = 'climate:zone-normals:v1'; diff --git a/tests/regulatory-contract.test.mjs b/tests/regulatory-contract.test.mjs new file mode 100644 index 000000000..687abc185 --- /dev/null +++ b/tests/regulatory-contract.test.mjs @@ -0,0 +1,18 @@ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const root = join(__dirname, '..'); + +describe('regulatory cache contracts', () => { + it('exports REGULATORY_ACTIONS_KEY from cache-keys.ts', () => { + const cacheKeysSrc = readFileSync(join(root, 'server', '_shared', 'cache-keys.ts'), 'utf8'); + assert.match( + cacheKeysSrc, + /export const REGULATORY_ACTIONS_KEY = 'regulatory:actions:v1';/ + ); + }); +}); diff --git a/tests/regulatory-seed-unit.test.mjs b/tests/regulatory-seed-unit.test.mjs index 28dee14b2..c2d19b64b 100644 --- a/tests/regulatory-seed-unit.test.mjs +++ b/tests/regulatory-seed-unit.test.mjs @@ -11,6 +11,7 @@ const seedSrc = readFileSync('scripts/seed-regulatory-actions.mjs', 'utf8'); const pureSrc = seedSrc .replace(/^import\s.*$/gm, '') + .replace(/loadEnvFile\([^)]+\);\n/, '') .replace(/const isDirectRun[\s\S]*?}\n\nexport\s*{[\s\S]*?};?\s*$/m, ''); const ctx = vm.createContext({ @@ -26,6 +27,8 @@ const ctx = vm.createContext({ URLSearchParams, AbortSignal, CHROME_UA: 'Mozilla/5.0 (test)', + loadEnvFile: () => {}, + runSeed: async () => {}, }); vm.runInContext(pureSrc, ctx); @@ -40,6 +43,10 @@ const { normalizeFeedItems, dedupeAndSortActions, fetchAllFeeds, + classifyAction, + buildSeedPayload, + fetchRegulatoryActionPayload, + main, } = ctx; describe('decodeEntities', () => { @@ -211,3 +218,147 @@ describe('fetchAllFeeds', () => { ); }); }); + +describe('classifyAction', () => { + it('marks high priority actions from combined title and description text', () => { + const action = normalize(classifyAction({ + id: 'sec-a', + agency: 'SEC', + title: 'SEC action against issuer', + description: 'The SEC secured a permanent injunction for accounting fraud.', + link: 'https://example.test/sec-a', + publishedAt: '2026-03-30T18:00:00.000Z', + })); + + assert.equal(action.tier, 'high'); + assert.deepEqual(action.matchedKeywords, ['fraud', 'injunction']); + }); + + it('marks medium actions from description text', () => { + const medium = normalize(classifyAction({ + id: 'fed-a', + agency: 'Federal Reserve', + title: 'Federal Reserve update', + description: 'The board resolves action through a remedial action plan.', + link: 'https://example.test/fed-a', + publishedAt: '2026-03-30T18:00:00.000Z', + })); + + assert.equal(medium.tier, 'medium'); + assert.deepEqual(medium.matchedKeywords, ['resolves action', 'remedial action']); + }); + + it('uses low only for explicit routine notice titles', () => { + const low = normalize(classifyAction({ + id: 'finra-a', + agency: 'FINRA', + title: 'Technical Notice 26-01', + description: 'Routine operational bulletin for members.', + link: 'https://example.test/finra-a', + publishedAt: '2026-03-30T18:00:00.000Z', + })); + + assert.equal(low.tier, 'low'); + assert.deepEqual(low.matchedKeywords, []); + }); + + it('falls back to unknown for unmatched actions', () => { + const unknown = normalize(classifyAction({ + id: 'fdic-a', + agency: 'FDIC', + title: 'FDIC consumer outreach update', + description: 'General event recap for community stakeholders.', + link: 'https://example.test/fdic-a', + publishedAt: '2026-03-30T18:00:00.000Z', + })); + + assert.equal(unknown.tier, 'unknown'); + assert.deepEqual(unknown.matchedKeywords, []); + }); +}); + +describe('buildSeedPayload', () => { + it('adds fetchedAt and aggregate counts', () => { + const payload = normalize(buildSeedPayload([ + { + id: 'sec-a', + agency: 'SEC', + title: 'SEC action against issuer', + description: 'The SEC secured a permanent injunction for accounting fraud.', + link: 'https://example.test/sec-a', + publishedAt: '2026-03-30T18:00:00.000Z', + }, + { + id: 'fed-a', + agency: 'Federal Reserve', + title: 'Federal Reserve update', + description: 'The board resolves action through a remedial action plan.', + link: 'https://example.test/fed-a', + publishedAt: '2026-03-29T18:00:00.000Z', + }, + { + id: 'finra-a', + agency: 'FINRA', + title: 'Regulatory Notice 26-01', + description: 'Routine bulletin for members.', + link: 'https://example.test/finra-a', + publishedAt: '2026-03-28T18:00:00.000Z', + }, + { + id: 'fdic-a', + agency: 'FDIC', + title: 'FDIC consumer outreach update', + description: 'General event recap for community stakeholders.', + link: 'https://example.test/fdic-a', + publishedAt: '2026-03-27T18:00:00.000Z', + }, + ], 1711718400000)); + + assert.equal(payload.fetchedAt, 1711718400000); + assert.equal(payload.recordCount, 4); + assert.equal(payload.highCount, 1); + assert.equal(payload.mediumCount, 1); + assert.equal(payload.actions[2].tier, 'low'); + assert.equal(payload.actions[3].tier, 'unknown'); + }); +}); + +describe('fetchRegulatoryActionPayload', () => { + it('returns classified payload from fetched actions', async () => { + const payload = normalize(await fetchRegulatoryActionPayload(async (url) => ({ + ok: true, + text: async () => `FDIC updateFDIC resolves action through a remedial action plan.${url}/itemMon, 30 Mar 2026 18:00:00 GMT`, + }))); + + assert.equal(payload.actions.length, 6); + assert.equal(payload.recordCount, 6); + assert.ok(typeof payload.fetchedAt === 'number'); + assert.equal(payload.actions[0].tier, 'medium'); + assert.deepEqual(payload.actions[0].matchedKeywords, ['resolves action', 'remedial action']); + }); +}); + +describe('main', () => { + it('wires runSeed with the regulatory key, TTL, and validateFn', async () => { + const calls = []; + const runSeedStub = async (domain, resource, canonicalKey, fetchFn, opts) => { + calls.push({ domain, resource, canonicalKey, opts, payload: await fetchFn() }); + return 'ok'; + }; + const fetchStub = async (url) => ({ + ok: true, + text: async () => `CFTC Issues Advisory${url}/itemMon, 30 Mar 2026 18:00:00 GMT`, + }); + + const result = await main(fetchStub, runSeedStub); + assert.equal(result, 'ok'); + assert.equal(calls.length, 1); + assert.equal(calls[0].domain, 'regulatory'); + assert.equal(calls[0].resource, 'actions'); + assert.equal(calls[0].canonicalKey, 'regulatory:actions:v1'); + assert.equal(calls[0].opts.ttlSeconds, 21600); + assert.equal(calls[0].opts.validateFn({ actions: [] }), false); + assert.equal(calls[0].opts.validateFn({ actions: [{ id: 'a' }] }), true); + assert.equal(calls[0].payload.recordCount, 6); + }); +});