mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
fix(seeds): strict-floor validators must not poison seed-meta on empty (#3078)
* fix(seeds): strict-floor validators must not poison seed-meta on empty When `runSeed`'s validateFn rejected (empty/short data), seed-meta was refreshed with `fetchedAt=now, recordCount=0`. Bundle runners read `fetchedAt` to decide skip — so one transient empty fetch locked the IMF-extended bundle (30-day cadence) out for a full month. Adds opt-in `emptyDataIsFailure` flag that skips the meta refresh on validation failure, letting the bundle retry next cron fire and health flip to STALE_SEED. Wires it on all four IMF/WEO seeders (floor 150-190 countries), which structurally can't have legitimate empty results. Default behavior unchanged for quiet-period feeds (news, events) where empty is normal. Observed: Railway log 2026-04-13 18:58 — imf-external validation fail; next fire 8h later skipped "483min ago / interval 43200min". * test(seeds): regression coverage for emptyDataIsFailure branch Static-analysis guard against the PR #3078 regression reintroducing itself: - Asserts runSeed gates writeFreshnessMetadata on opts.emptyDataIsFailure and that extendExistingTtl still runs in both branches (cache preserved). - Asserts the four strict-floor IMF seeders (external/growth/labor/macro) pass emptyDataIsFailure: true. Prevents silent bundle-lockout if someone removes the gate or adds a new strict-floor seeder without the flag. * fix(seeds): strict-floor failure must exit(1) + behavioral test P2 (surfacing upstream failures in bundle summary): Strict-floor seeders with emptyDataIsFailure:true now process.exit(1) after logging FAILURE. _bundle-runner's spawnSeed wraps execFile, so non-zero exit rejects → failed++ increments → bundle itself exits 1. Before: bundle logged 'Done' and ran++ on a poisoned upstream, hiding 30-day outages from Railway monitoring. P3 (behavioral regression coverage, replacing static source-shape test): Stubs globalThis.fetch (Upstash REST) + process.exit to drive runSeed through both branches. Asserts on actual Redis commands: - strict path: zero seed-meta SET, pipeline EXPIRE still called, exit(1) - default path: exactly one seed-meta SET, exit(0) Catches future regressions where writeFreshnessMetadata is reintroduced indirectly, and is immune to cosmetic refactors of _seed-utils.mjs. * test(seeds): regression for emptyDataIsFailure meta-refresh gate Proves that validation failure with opts.emptyDataIsFailure:true does NOT write seed-meta (strict-floor seeders) while the default behavior DOES write count=0 meta (quiet-period feeds). Addresses PR #3078 review.
This commit is contained in:
@@ -746,13 +746,27 @@ export async function runSeed(domain, resource, canonicalKey, fetchFn, opts = {}
|
||||
const keys = [canonicalKey, `seed-meta:${domain}:${resource}`];
|
||||
if (extraKeys) keys.push(...extraKeys.map(ek => ek.key));
|
||||
await extendExistingTtl(keys, ttlSeconds || 600);
|
||||
// Always write seed-meta even when data is empty so health checks can
|
||||
// distinguish "seeder ran but nothing to publish" from "seeder stopped".
|
||||
await writeFreshnessMetadata(domain, resource, 0, opts.sourceVersion, ttlSeconds);
|
||||
console.log(` SKIPPED: validation failed (empty data) — seed-meta refreshed, existing cache TTL extended`);
|
||||
const strictFailure = Boolean(opts.emptyDataIsFailure);
|
||||
if (strictFailure) {
|
||||
// Strict-floor seeders (e.g. IMF-External, floor=180 countries) treat
|
||||
// empty data as a real upstream failure. Do NOT refresh seed-meta —
|
||||
// letting fetchedAt stay stale lets bundles retry on their next cron
|
||||
// fire and lets health flip to STALE_SEED. Writing fresh meta here
|
||||
// caused imf-external to skip for the full 30-day interval after a
|
||||
// single transient failure (Railway log 2026-04-13).
|
||||
console.error(` FAILURE: validation failed (empty data) — seed-meta NOT refreshed; bundle will retry next cycle`);
|
||||
} else {
|
||||
// Write seed-meta even when data is empty so health can distinguish
|
||||
// "seeder ran but nothing to publish" from "seeder stopped" (quiet-
|
||||
// period feeds: news, events, sparse indicators).
|
||||
await writeFreshnessMetadata(domain, resource, 0, opts.sourceVersion, ttlSeconds);
|
||||
console.log(` SKIPPED: validation failed (empty data) — seed-meta refreshed, existing cache TTL extended`);
|
||||
}
|
||||
console.log(`\n=== Done (${Math.round(durationMs)}ms, no write) ===`);
|
||||
await releaseLock(`${domain}:${resource}`, runId);
|
||||
process.exit(0);
|
||||
// Strict path exits non-zero so _bundle-runner counts it as failed++
|
||||
// (otherwise the bundle summary hides upstream outages behind ran++).
|
||||
process.exit(strictFailure ? 1 : 0);
|
||||
}
|
||||
const { payloadBytes } = publishResult;
|
||||
const topicArticleCount = Array.isArray(data?.topics)
|
||||
|
||||
@@ -115,6 +115,10 @@ if (process.argv[1]?.endsWith('seed-imf-external.mjs')) {
|
||||
ttlSeconds: CACHE_TTL,
|
||||
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
|
||||
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
|
||||
// Empty/short result = real upstream failure (floor is 180 countries).
|
||||
// Without this, a single transient fetch glitch refreshes seed-meta and
|
||||
// locks the bundle out for 30 days (see log 2026-04-13).
|
||||
emptyDataIsFailure: true,
|
||||
}).catch((err) => {
|
||||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
|
||||
console.error('FATAL:', (err.message || err) + _cause);
|
||||
|
||||
@@ -151,6 +151,7 @@ if (process.argv[1]?.endsWith('seed-imf-growth.mjs')) {
|
||||
ttlSeconds: CACHE_TTL,
|
||||
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
|
||||
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
|
||||
emptyDataIsFailure: true,
|
||||
}).catch((err) => {
|
||||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
|
||||
console.error('FATAL:', (err.message || err) + _cause);
|
||||
|
||||
@@ -93,6 +93,7 @@ if (process.argv[1]?.endsWith('seed-imf-labor.mjs')) {
|
||||
ttlSeconds: CACHE_TTL,
|
||||
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
|
||||
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
|
||||
emptyDataIsFailure: true,
|
||||
}).catch((err) => {
|
||||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
|
||||
console.error('FATAL:', (err.message || err) + _cause);
|
||||
|
||||
@@ -108,6 +108,7 @@ if (process.argv[1]?.endsWith('seed-imf-macro.mjs')) {
|
||||
ttlSeconds: CACHE_TTL,
|
||||
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
|
||||
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
|
||||
emptyDataIsFailure: true,
|
||||
}).catch((err) => {
|
||||
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
|
||||
console.error('FATAL:', (err.message || err) + _cause);
|
||||
|
||||
130
tests/seed-empty-data-is-failure.test.mjs
Normal file
130
tests/seed-empty-data-is-failure.test.mjs
Normal file
@@ -0,0 +1,130 @@
|
||||
import { describe, it, before, after, beforeEach } from 'node:test';
|
||||
import { strict as assert } from 'node:assert';
|
||||
|
||||
// Behavioral regression for PR #3078: strict-floor IMF seeders must not
|
||||
// poison seed-meta on empty/invalid upstream responses. Without the opt-in
|
||||
// flag, a single transient empty fetch refreshes fetchedAt →
|
||||
// _bundle-runner skips the bundle for the full intervalMs (30 days for
|
||||
// imf-external; Railway log 2026-04-13).
|
||||
//
|
||||
// Stubs the Upstash REST layer (all Redis calls go through globalThis.fetch)
|
||||
// plus process.exit, then drives runSeed through both branches and asserts
|
||||
// on the actual commands sent to Redis and the process exit code.
|
||||
|
||||
process.env.UPSTASH_REDIS_REST_URL = 'https://fake-upstash.local';
|
||||
process.env.UPSTASH_REDIS_REST_TOKEN = 'fake-token';
|
||||
|
||||
const { runSeed } = await import('../scripts/_seed-utils.mjs');
|
||||
|
||||
/** @type {Array<{url: string, body: any}>} */
|
||||
let fetchCalls = [];
|
||||
let originalFetch;
|
||||
let originalExit;
|
||||
let originalLog;
|
||||
let originalWarn;
|
||||
let originalError;
|
||||
|
||||
before(() => {
|
||||
originalFetch = globalThis.fetch;
|
||||
originalExit = process.exit;
|
||||
originalLog = console.log;
|
||||
originalWarn = console.warn;
|
||||
originalError = console.error;
|
||||
});
|
||||
|
||||
after(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
process.exit = originalExit;
|
||||
console.log = originalLog;
|
||||
console.warn = originalWarn;
|
||||
console.error = originalError;
|
||||
});
|
||||
|
||||
beforeEach(() => {
|
||||
fetchCalls = [];
|
||||
// Silence seed noise during tests; uncomment for debugging.
|
||||
console.log = () => {};
|
||||
console.warn = () => {};
|
||||
console.error = () => {};
|
||||
globalThis.fetch = async (url, init = {}) => {
|
||||
const body = init.body ? JSON.parse(init.body) : null;
|
||||
fetchCalls.push({ url: String(url), body });
|
||||
// Lock acquire (SET ... NX PX) must succeed; everything else OK too.
|
||||
return {
|
||||
ok: true,
|
||||
status: 200,
|
||||
json: async () => ({ result: 'OK' }),
|
||||
text: async () => 'OK',
|
||||
};
|
||||
};
|
||||
});
|
||||
|
||||
class ExitCalled extends Error {
|
||||
constructor(code) { super(`exit(${code})`); this.code = code; }
|
||||
}
|
||||
|
||||
function stubExit() {
|
||||
process.exit = (code) => { throw new ExitCalled(code ?? 0); };
|
||||
}
|
||||
|
||||
function metaWrites() {
|
||||
// writeFreshnessMetadata POSTs ['SET', 'seed-meta:<domain>:<res>', payload, 'EX', ttl]
|
||||
// to the base URL. Identifies any seed-meta write regardless of helper.
|
||||
return fetchCalls.filter(c =>
|
||||
Array.isArray(c.body) &&
|
||||
c.body[0] === 'SET' &&
|
||||
typeof c.body[1] === 'string' &&
|
||||
c.body[1].startsWith('seed-meta:')
|
||||
);
|
||||
}
|
||||
|
||||
describe('runSeed emptyDataIsFailure branch (behavioral)', () => {
|
||||
const domain = 'test';
|
||||
const resource = 'strict-floor';
|
||||
const canonicalKey = 'test:strict-floor:v1';
|
||||
// validateFn rejects everything → forces atomicPublish's skipped branch.
|
||||
const alwaysInvalid = () => false;
|
||||
|
||||
it('emptyDataIsFailure:true — does NOT write seed-meta and exits non-zero', async () => {
|
||||
stubExit();
|
||||
let exitCode = null;
|
||||
try {
|
||||
await runSeed(domain, resource, canonicalKey, async () => ({ countries: {} }), {
|
||||
validateFn: alwaysInvalid,
|
||||
ttlSeconds: 3600,
|
||||
emptyDataIsFailure: true,
|
||||
});
|
||||
} catch (err) {
|
||||
if (!(err instanceof ExitCalled)) throw err;
|
||||
exitCode = err.code;
|
||||
}
|
||||
|
||||
assert.equal(exitCode, 1, 'strict-floor path must exit(1) so _bundle-runner counts failed++');
|
||||
assert.equal(metaWrites().length, 0,
|
||||
`expected zero seed-meta writes under emptyDataIsFailure:true, got: ${JSON.stringify(metaWrites())}`);
|
||||
// Must still extend TTL (pipeline EXPIRE) to preserve the existing cache.
|
||||
const pipelineCalls = fetchCalls.filter(c => c.url.endsWith('/pipeline'));
|
||||
assert.ok(pipelineCalls.length >= 1, 'extendExistingTtl pipeline call missing — cache TTL would drop');
|
||||
});
|
||||
|
||||
it('emptyDataIsFailure:false (default) — DOES write seed-meta and exits zero', async () => {
|
||||
stubExit();
|
||||
let exitCode = null;
|
||||
try {
|
||||
await runSeed(domain, resource, canonicalKey, async () => ({ countries: {} }), {
|
||||
validateFn: alwaysInvalid,
|
||||
ttlSeconds: 3600,
|
||||
// emptyDataIsFailure omitted — default quiet-period behavior
|
||||
});
|
||||
} catch (err) {
|
||||
if (!(err instanceof ExitCalled)) throw err;
|
||||
exitCode = err.code;
|
||||
}
|
||||
|
||||
assert.equal(exitCode, 0, 'default path exits(0) — quiet-period seeders must not spam bundle failures');
|
||||
const metas = metaWrites();
|
||||
assert.equal(metas.length, 1,
|
||||
`default path must write exactly one seed-meta (fresh fetchedAt for health check), got ${metas.length}`);
|
||||
assert.equal(metas[0].body[1], `seed-meta:${domain}:${resource}`);
|
||||
});
|
||||
});
|
||||
101
tests/seed-utils-empty-data-failure.test.mjs
Normal file
101
tests/seed-utils-empty-data-failure.test.mjs
Normal file
@@ -0,0 +1,101 @@
|
||||
// Regression test for PR #3078: strict-floor validators must not poison
|
||||
// seed-meta on validation failure when opts.emptyDataIsFailure is set.
|
||||
//
|
||||
// Without this guarantee, a single transient empty fetch would refresh
|
||||
// seed-meta with fetchedAt=now, locking bundle runners out of retry for a
|
||||
// full interval (30 days for the IMF extended bundle).
|
||||
|
||||
import { test, beforeEach, afterEach } from 'node:test';
|
||||
import assert from 'node:assert/strict';
|
||||
|
||||
import { runSeed } from '../scripts/_seed-utils.mjs';
|
||||
|
||||
const ORIGINAL_FETCH = globalThis.fetch;
|
||||
const ORIGINAL_EXIT = process.exit;
|
||||
const ORIGINAL_ENV = {
|
||||
UPSTASH_REDIS_REST_URL: process.env.UPSTASH_REDIS_REST_URL,
|
||||
UPSTASH_REDIS_REST_TOKEN: process.env.UPSTASH_REDIS_REST_TOKEN,
|
||||
};
|
||||
|
||||
let recordedCalls;
|
||||
|
||||
beforeEach(() => {
|
||||
process.env.UPSTASH_REDIS_REST_URL = 'https://fake-upstash.example.com';
|
||||
process.env.UPSTASH_REDIS_REST_TOKEN = 'fake-token';
|
||||
recordedCalls = [];
|
||||
|
||||
globalThis.fetch = async (url, opts = {}) => {
|
||||
const body = opts?.body ? (() => { try { return JSON.parse(opts.body); } catch { return opts.body; } })() : null;
|
||||
recordedCalls.push({ url: String(url), method: opts?.method || 'GET', body });
|
||||
// Lock acquire: SET NX returns OK. Pipeline (EXPIRE) returns array. Default: OK.
|
||||
if (Array.isArray(body) && Array.isArray(body[0])) {
|
||||
return new Response(JSON.stringify(body.map(() => ({ result: 0 }))), { status: 200 });
|
||||
}
|
||||
return new Response(JSON.stringify({ result: 'OK' }), { status: 200 });
|
||||
};
|
||||
|
||||
// runSeed's skipped path calls process.exit(0). Convert to a throw so the
|
||||
// test can proceed after the seed "finishes" and inspect recorded calls.
|
||||
process.exit = (code) => {
|
||||
const e = new Error(`__test_exit__:${code}`);
|
||||
e.exitCode = code;
|
||||
throw e;
|
||||
};
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
globalThis.fetch = ORIGINAL_FETCH;
|
||||
process.exit = ORIGINAL_EXIT;
|
||||
if (ORIGINAL_ENV.UPSTASH_REDIS_REST_URL == null) delete process.env.UPSTASH_REDIS_REST_URL;
|
||||
else process.env.UPSTASH_REDIS_REST_URL = ORIGINAL_ENV.UPSTASH_REDIS_REST_URL;
|
||||
if (ORIGINAL_ENV.UPSTASH_REDIS_REST_TOKEN == null) delete process.env.UPSTASH_REDIS_REST_TOKEN;
|
||||
else process.env.UPSTASH_REDIS_REST_TOKEN = ORIGINAL_ENV.UPSTASH_REDIS_REST_TOKEN;
|
||||
});
|
||||
|
||||
function countMetaSets(resourceSuffix) {
|
||||
return recordedCalls.filter(c =>
|
||||
Array.isArray(c.body)
|
||||
&& c.body[0] === 'SET'
|
||||
&& typeof c.body[1] === 'string'
|
||||
&& c.body[1] === `seed-meta:test:${resourceSuffix}`,
|
||||
).length;
|
||||
}
|
||||
|
||||
async function runWithExitTrap(fn) {
|
||||
try {
|
||||
await fn();
|
||||
} catch (err) {
|
||||
if (!String(err.message).startsWith('__test_exit__:')) throw err;
|
||||
}
|
||||
}
|
||||
|
||||
test('validation failure with emptyDataIsFailure:true does NOT refresh seed-meta', async () => {
|
||||
await runWithExitTrap(() =>
|
||||
runSeed('test', 'empty-fail', 'test:empty-fail:v1', async () => ({ items: [] }), {
|
||||
validateFn: (d) => d?.items?.length >= 10, // always fails for empty
|
||||
emptyDataIsFailure: true,
|
||||
ttlSeconds: 3600,
|
||||
}),
|
||||
);
|
||||
|
||||
assert.equal(
|
||||
countMetaSets('empty-fail'), 0,
|
||||
'seed-meta must NOT be SET on validation-fail when emptyDataIsFailure is true; ' +
|
||||
'refreshing fetchedAt here would mask outages and block bundle retries',
|
||||
);
|
||||
});
|
||||
|
||||
test('validation failure WITHOUT emptyDataIsFailure DOES refresh seed-meta (quiet-period feeds)', async () => {
|
||||
await runWithExitTrap(() =>
|
||||
runSeed('test', 'empty-legacy', 'test:empty-legacy:v1', async () => ({ items: [] }), {
|
||||
validateFn: (d) => d?.items?.length >= 10,
|
||||
ttlSeconds: 3600,
|
||||
}),
|
||||
);
|
||||
|
||||
assert.ok(
|
||||
countMetaSets('empty-legacy') >= 1,
|
||||
'legacy behavior for quiet-period feeds (news, events) must still write ' +
|
||||
'seed-meta count=0 so health does not false-positive STALE_SEED',
|
||||
);
|
||||
});
|
||||
Reference in New Issue
Block a user