fix(seeds): strict-floor validators must not poison seed-meta on empty (#3078)

* fix(seeds): strict-floor validators must not poison seed-meta on empty

When `runSeed`'s validateFn rejected (empty/short data), seed-meta was
refreshed with `fetchedAt=now, recordCount=0`. Bundle runners read
`fetchedAt` to decide skip — so one transient empty fetch locked the
IMF-extended bundle (30-day cadence) out for a full month.

Adds opt-in `emptyDataIsFailure` flag that skips the meta refresh on
validation failure, letting the bundle retry next cron fire and health
flip to STALE_SEED. Wires it on all four IMF/WEO seeders (floor 150-190
countries), which structurally can't have legitimate empty results.

Default behavior unchanged for quiet-period feeds (news, events) where
empty is normal.

Observed: Railway log 2026-04-13 18:58 — imf-external validation fail;
next fire 8h later skipped "483min ago / interval 43200min".

* test(seeds): regression coverage for emptyDataIsFailure branch

Static-analysis guard against the PR #3078 regression reintroducing itself:
- Asserts runSeed gates writeFreshnessMetadata on opts.emptyDataIsFailure
  and that extendExistingTtl still runs in both branches (cache preserved).
- Asserts the four strict-floor IMF seeders (external/growth/labor/macro)
  pass emptyDataIsFailure: true.

Prevents silent bundle-lockout if someone removes the gate or adds a new
strict-floor seeder without the flag.

* fix(seeds): strict-floor failure must exit(1) + behavioral test

P2 (surfacing upstream failures in bundle summary):
Strict-floor seeders with emptyDataIsFailure:true now process.exit(1)
after logging FAILURE. _bundle-runner's spawnSeed wraps execFile, so
non-zero exit rejects → failed++ increments → bundle itself exits 1.
Before: bundle logged 'Done' and ran++ on a poisoned upstream, hiding
30-day outages from Railway monitoring.

P3 (behavioral regression coverage, replacing static source-shape test):
Stubs globalThis.fetch (Upstash REST) + process.exit to drive runSeed
through both branches. Asserts on actual Redis commands:
- strict path: zero seed-meta SET, pipeline EXPIRE still called, exit(1)
- default path: exactly one seed-meta SET, exit(0)
Catches future regressions where writeFreshnessMetadata is reintroduced
indirectly, and is immune to cosmetic refactors of _seed-utils.mjs.

* test(seeds): regression for emptyDataIsFailure meta-refresh gate

Proves that validation failure with opts.emptyDataIsFailure:true does NOT
write seed-meta (strict-floor seeders) while the default behavior DOES
write count=0 meta (quiet-period feeds). Addresses PR #3078 review.
This commit is contained in:
Elie Habib
2026-04-14 09:02:21 +04:00
committed by GitHub
parent 1875531e2a
commit 9d27ff0d6a
7 changed files with 257 additions and 5 deletions

View File

@@ -746,13 +746,27 @@ export async function runSeed(domain, resource, canonicalKey, fetchFn, opts = {}
const keys = [canonicalKey, `seed-meta:${domain}:${resource}`];
if (extraKeys) keys.push(...extraKeys.map(ek => ek.key));
await extendExistingTtl(keys, ttlSeconds || 600);
// Always write seed-meta even when data is empty so health checks can
// distinguish "seeder ran but nothing to publish" from "seeder stopped".
await writeFreshnessMetadata(domain, resource, 0, opts.sourceVersion, ttlSeconds);
console.log(` SKIPPED: validation failed (empty data) — seed-meta refreshed, existing cache TTL extended`);
const strictFailure = Boolean(opts.emptyDataIsFailure);
if (strictFailure) {
// Strict-floor seeders (e.g. IMF-External, floor=180 countries) treat
// empty data as a real upstream failure. Do NOT refresh seed-meta —
// letting fetchedAt stay stale lets bundles retry on their next cron
// fire and lets health flip to STALE_SEED. Writing fresh meta here
// caused imf-external to skip for the full 30-day interval after a
// single transient failure (Railway log 2026-04-13).
console.error(` FAILURE: validation failed (empty data) — seed-meta NOT refreshed; bundle will retry next cycle`);
} else {
// Write seed-meta even when data is empty so health can distinguish
// "seeder ran but nothing to publish" from "seeder stopped" (quiet-
// period feeds: news, events, sparse indicators).
await writeFreshnessMetadata(domain, resource, 0, opts.sourceVersion, ttlSeconds);
console.log(` SKIPPED: validation failed (empty data) — seed-meta refreshed, existing cache TTL extended`);
}
console.log(`\n=== Done (${Math.round(durationMs)}ms, no write) ===`);
await releaseLock(`${domain}:${resource}`, runId);
process.exit(0);
// Strict path exits non-zero so _bundle-runner counts it as failed++
// (otherwise the bundle summary hides upstream outages behind ran++).
process.exit(strictFailure ? 1 : 0);
}
const { payloadBytes } = publishResult;
const topicArticleCount = Array.isArray(data?.topics)

View File

@@ -115,6 +115,10 @@ if (process.argv[1]?.endsWith('seed-imf-external.mjs')) {
ttlSeconds: CACHE_TTL,
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
// Empty/short result = real upstream failure (floor is 180 countries).
// Without this, a single transient fetch glitch refreshes seed-meta and
// locks the bundle out for 30 days (see log 2026-04-13).
emptyDataIsFailure: true,
}).catch((err) => {
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
console.error('FATAL:', (err.message || err) + _cause);

View File

@@ -151,6 +151,7 @@ if (process.argv[1]?.endsWith('seed-imf-growth.mjs')) {
ttlSeconds: CACHE_TTL,
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
emptyDataIsFailure: true,
}).catch((err) => {
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
console.error('FATAL:', (err.message || err) + _cause);

View File

@@ -93,6 +93,7 @@ if (process.argv[1]?.endsWith('seed-imf-labor.mjs')) {
ttlSeconds: CACHE_TTL,
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
emptyDataIsFailure: true,
}).catch((err) => {
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
console.error('FATAL:', (err.message || err) + _cause);

View File

@@ -108,6 +108,7 @@ if (process.argv[1]?.endsWith('seed-imf-macro.mjs')) {
ttlSeconds: CACHE_TTL,
sourceVersion: `imf-sdmx-weo-${new Date().getFullYear()}`,
recordCount: (data) => Object.keys(data?.countries ?? {}).length,
emptyDataIsFailure: true,
}).catch((err) => {
const _cause = err.cause ? ` (cause: ${err.cause.message || err.cause.code || err.cause})` : '';
console.error('FATAL:', (err.message || err) + _cause);

View File

@@ -0,0 +1,130 @@
import { describe, it, before, after, beforeEach } from 'node:test';
import { strict as assert } from 'node:assert';
// Behavioral regression for PR #3078: strict-floor IMF seeders must not
// poison seed-meta on empty/invalid upstream responses. Without the opt-in
// flag, a single transient empty fetch refreshes fetchedAt →
// _bundle-runner skips the bundle for the full intervalMs (30 days for
// imf-external; Railway log 2026-04-13).
//
// Stubs the Upstash REST layer (all Redis calls go through globalThis.fetch)
// plus process.exit, then drives runSeed through both branches and asserts
// on the actual commands sent to Redis and the process exit code.
process.env.UPSTASH_REDIS_REST_URL = 'https://fake-upstash.local';
process.env.UPSTASH_REDIS_REST_TOKEN = 'fake-token';
const { runSeed } = await import('../scripts/_seed-utils.mjs');
/** @type {Array<{url: string, body: any}>} */
let fetchCalls = [];
let originalFetch;
let originalExit;
let originalLog;
let originalWarn;
let originalError;
before(() => {
originalFetch = globalThis.fetch;
originalExit = process.exit;
originalLog = console.log;
originalWarn = console.warn;
originalError = console.error;
});
after(() => {
globalThis.fetch = originalFetch;
process.exit = originalExit;
console.log = originalLog;
console.warn = originalWarn;
console.error = originalError;
});
beforeEach(() => {
fetchCalls = [];
// Silence seed noise during tests; uncomment for debugging.
console.log = () => {};
console.warn = () => {};
console.error = () => {};
globalThis.fetch = async (url, init = {}) => {
const body = init.body ? JSON.parse(init.body) : null;
fetchCalls.push({ url: String(url), body });
// Lock acquire (SET ... NX PX) must succeed; everything else OK too.
return {
ok: true,
status: 200,
json: async () => ({ result: 'OK' }),
text: async () => 'OK',
};
};
});
class ExitCalled extends Error {
constructor(code) { super(`exit(${code})`); this.code = code; }
}
function stubExit() {
process.exit = (code) => { throw new ExitCalled(code ?? 0); };
}
function metaWrites() {
// writeFreshnessMetadata POSTs ['SET', 'seed-meta:<domain>:<res>', payload, 'EX', ttl]
// to the base URL. Identifies any seed-meta write regardless of helper.
return fetchCalls.filter(c =>
Array.isArray(c.body) &&
c.body[0] === 'SET' &&
typeof c.body[1] === 'string' &&
c.body[1].startsWith('seed-meta:')
);
}
describe('runSeed emptyDataIsFailure branch (behavioral)', () => {
const domain = 'test';
const resource = 'strict-floor';
const canonicalKey = 'test:strict-floor:v1';
// validateFn rejects everything → forces atomicPublish's skipped branch.
const alwaysInvalid = () => false;
it('emptyDataIsFailure:true — does NOT write seed-meta and exits non-zero', async () => {
stubExit();
let exitCode = null;
try {
await runSeed(domain, resource, canonicalKey, async () => ({ countries: {} }), {
validateFn: alwaysInvalid,
ttlSeconds: 3600,
emptyDataIsFailure: true,
});
} catch (err) {
if (!(err instanceof ExitCalled)) throw err;
exitCode = err.code;
}
assert.equal(exitCode, 1, 'strict-floor path must exit(1) so _bundle-runner counts failed++');
assert.equal(metaWrites().length, 0,
`expected zero seed-meta writes under emptyDataIsFailure:true, got: ${JSON.stringify(metaWrites())}`);
// Must still extend TTL (pipeline EXPIRE) to preserve the existing cache.
const pipelineCalls = fetchCalls.filter(c => c.url.endsWith('/pipeline'));
assert.ok(pipelineCalls.length >= 1, 'extendExistingTtl pipeline call missing — cache TTL would drop');
});
it('emptyDataIsFailure:false (default) — DOES write seed-meta and exits zero', async () => {
stubExit();
let exitCode = null;
try {
await runSeed(domain, resource, canonicalKey, async () => ({ countries: {} }), {
validateFn: alwaysInvalid,
ttlSeconds: 3600,
// emptyDataIsFailure omitted — default quiet-period behavior
});
} catch (err) {
if (!(err instanceof ExitCalled)) throw err;
exitCode = err.code;
}
assert.equal(exitCode, 0, 'default path exits(0) — quiet-period seeders must not spam bundle failures');
const metas = metaWrites();
assert.equal(metas.length, 1,
`default path must write exactly one seed-meta (fresh fetchedAt for health check), got ${metas.length}`);
assert.equal(metas[0].body[1], `seed-meta:${domain}:${resource}`);
});
});

View File

@@ -0,0 +1,101 @@
// Regression test for PR #3078: strict-floor validators must not poison
// seed-meta on validation failure when opts.emptyDataIsFailure is set.
//
// Without this guarantee, a single transient empty fetch would refresh
// seed-meta with fetchedAt=now, locking bundle runners out of retry for a
// full interval (30 days for the IMF extended bundle).
import { test, beforeEach, afterEach } from 'node:test';
import assert from 'node:assert/strict';
import { runSeed } from '../scripts/_seed-utils.mjs';
const ORIGINAL_FETCH = globalThis.fetch;
const ORIGINAL_EXIT = process.exit;
const ORIGINAL_ENV = {
UPSTASH_REDIS_REST_URL: process.env.UPSTASH_REDIS_REST_URL,
UPSTASH_REDIS_REST_TOKEN: process.env.UPSTASH_REDIS_REST_TOKEN,
};
let recordedCalls;
beforeEach(() => {
process.env.UPSTASH_REDIS_REST_URL = 'https://fake-upstash.example.com';
process.env.UPSTASH_REDIS_REST_TOKEN = 'fake-token';
recordedCalls = [];
globalThis.fetch = async (url, opts = {}) => {
const body = opts?.body ? (() => { try { return JSON.parse(opts.body); } catch { return opts.body; } })() : null;
recordedCalls.push({ url: String(url), method: opts?.method || 'GET', body });
// Lock acquire: SET NX returns OK. Pipeline (EXPIRE) returns array. Default: OK.
if (Array.isArray(body) && Array.isArray(body[0])) {
return new Response(JSON.stringify(body.map(() => ({ result: 0 }))), { status: 200 });
}
return new Response(JSON.stringify({ result: 'OK' }), { status: 200 });
};
// runSeed's skipped path calls process.exit(0). Convert to a throw so the
// test can proceed after the seed "finishes" and inspect recorded calls.
process.exit = (code) => {
const e = new Error(`__test_exit__:${code}`);
e.exitCode = code;
throw e;
};
});
afterEach(() => {
globalThis.fetch = ORIGINAL_FETCH;
process.exit = ORIGINAL_EXIT;
if (ORIGINAL_ENV.UPSTASH_REDIS_REST_URL == null) delete process.env.UPSTASH_REDIS_REST_URL;
else process.env.UPSTASH_REDIS_REST_URL = ORIGINAL_ENV.UPSTASH_REDIS_REST_URL;
if (ORIGINAL_ENV.UPSTASH_REDIS_REST_TOKEN == null) delete process.env.UPSTASH_REDIS_REST_TOKEN;
else process.env.UPSTASH_REDIS_REST_TOKEN = ORIGINAL_ENV.UPSTASH_REDIS_REST_TOKEN;
});
function countMetaSets(resourceSuffix) {
return recordedCalls.filter(c =>
Array.isArray(c.body)
&& c.body[0] === 'SET'
&& typeof c.body[1] === 'string'
&& c.body[1] === `seed-meta:test:${resourceSuffix}`,
).length;
}
async function runWithExitTrap(fn) {
try {
await fn();
} catch (err) {
if (!String(err.message).startsWith('__test_exit__:')) throw err;
}
}
test('validation failure with emptyDataIsFailure:true does NOT refresh seed-meta', async () => {
await runWithExitTrap(() =>
runSeed('test', 'empty-fail', 'test:empty-fail:v1', async () => ({ items: [] }), {
validateFn: (d) => d?.items?.length >= 10, // always fails for empty
emptyDataIsFailure: true,
ttlSeconds: 3600,
}),
);
assert.equal(
countMetaSets('empty-fail'), 0,
'seed-meta must NOT be SET on validation-fail when emptyDataIsFailure is true; ' +
'refreshing fetchedAt here would mask outages and block bundle retries',
);
});
test('validation failure WITHOUT emptyDataIsFailure DOES refresh seed-meta (quiet-period feeds)', async () => {
await runWithExitTrap(() =>
runSeed('test', 'empty-legacy', 'test:empty-legacy:v1', async () => ({ items: [] }), {
validateFn: (d) => d?.items?.length >= 10,
ttlSeconds: 3600,
}),
);
assert.ok(
countMetaSets('empty-legacy') >= 1,
'legacy behavior for quiet-period feeds (news, events) must still write ' +
'seed-meta count=0 so health does not false-positive STALE_SEED',
);
});