Files
worldmonitor/tests/seed-utils.test.mjs
Elie Habib 56103684c6 fix(seed-utils): payloadBytes>0 fallback for runSeed recordCount auto-detect (#3087)
* fix(seed-utils): payloadBytes>0 fallback for runSeed recordCount auto-detect

Phantom EMPTY_DATA in /api/health: 16 of 21 failing health checks were
caused by seeders publishing custom payload shapes without passing
opts.recordCount. The auto-detect chain in runSeed only matches a hardcoded
list of shapes; anything else falls through to recordCount=0 and triggers
EMPTY_DATA in /api/health even though the payload is fully populated and
verified in Redis.

Smoking-gun log signature from Railway 2026-04-14:
  [BLS-Series] recordCount:0, payloadBytes:6093, Verified: data present
  [VPD-Tracker] recordCount:0, payloadBytes:3068853, Verified: data present
  [Disease-Outbreaks] recordCount:0, payloadBytes:92684, Verified: data present

Fix:
- Extract recordCount logic into pure exported computeRecordCount() for
  unit testability.
- Add payloadBytes>0 → 1 fallback at the end of the resolution chain. When
  triggered, console.warn names the seeder so the author can add an
  explicit opts.recordCount for accurate dashboards.
- Resolution order unchanged for existing callers: opts.recordCount wins,
  then known-shape auto-detect, then the new payloadBytes fallback, then 0.
  Explicit opts.recordCount=0 still wins (test covers it).

Effect: clears 16 phantom CRITs on the next bundle cycle. Per-seeder warns
will surface in logs so we can add accurate opts.recordCount in follow-up.

Tests: 11 new computeRecordCount cases (opts precedence, auto-detect shapes,
fallback behavior, no-spurious-warn, explicit-zero precedence).
seed-utils.test.mjs 18/18 + seed-utils-empty-data-failure.test.mjs 2/2 +
typecheck clean.

* test(seed-utils): address Greptile P2 — replace it.each mutation, add empty-known-shape edge case

Greptile review on PR #3087 caught two minor test issues:

1. `it.each = undefined` mutated the imported `it` function (ES module
   live binding). Replaced with a plain comment.

2. Missing edge case: `data: { events: [] }` with payloadBytes > 0 should
   NOT trigger the payloadBytes fallback because detectedFromShape resolves
   to a real 0 (not undefined). Without this guard, a future regression
   could collapse the !=null check and silently mask genuine empty
   upstream cycles as "1 record". Test added.

Tests: 19/19 (was 18). No production code change.
2026-04-14 13:28:00 +04:00

171 lines
5.8 KiB
JavaScript

import assert from 'node:assert/strict';
import { describe, it } from 'node:test';
import { isTransientRedisError, computeRecordCount } from '../scripts/_seed-utils.mjs';
describe('seed utils redis error handling', () => {
it('treats undici connect timeout as transient', () => {
const err = new TypeError('fetch failed');
err.cause = new Error('Connect Timeout Error');
err.cause.code = 'UND_ERR_CONNECT_TIMEOUT';
assert.equal(isTransientRedisError(err), true);
});
it('treats ECONNRESET as transient', () => {
const err = new Error('fetch failed');
err.cause = new Error('read ECONNRESET');
err.cause.code = 'ECONNRESET';
assert.equal(isTransientRedisError(err), true);
});
it('treats DNS lookup failure as transient', () => {
const err = new Error('fetch failed');
err.cause = new Error('getaddrinfo EAI_AGAIN redis-host');
err.cause.code = 'EAI_AGAIN';
assert.equal(isTransientRedisError(err), true);
});
it('treats ETIMEDOUT as transient', () => {
const err = new Error('fetch failed');
err.cause = new Error('connect ETIMEDOUT');
err.cause.code = 'ETIMEDOUT';
assert.equal(isTransientRedisError(err), true);
});
it('does not treat Redis HTTP 403 as transient', () => {
const err = new Error('Redis command failed: HTTP 403');
assert.equal(isTransientRedisError(err), false);
});
it('does not treat generic validation errors as transient', () => {
const err = new Error('validation failed');
assert.equal(isTransientRedisError(err), false);
});
it('does not treat payload size errors as transient', () => {
const err = new Error('Payload too large: 6.2MB > 5MB limit');
assert.equal(isTransientRedisError(err), false);
});
});
describe('computeRecordCount', () => {
it('uses opts.recordCount as a number when provided', () => {
assert.equal(
computeRecordCount({ opts: { recordCount: 42 }, data: { foo: 'bar' }, payloadBytes: 1000 }),
42,
);
});
it('uses opts.recordCount as a function when provided', () => {
const data = { items: [1, 2, 3, 4, 5] };
assert.equal(
computeRecordCount({ opts: { recordCount: (d) => d.items.length * 2 }, data, payloadBytes: 100 }),
10,
);
});
it('respects opts.recordCount=0 even when payload has bytes (explicit zero)', () => {
// A seeder that explicitly says "0 records" must be trusted — used by
// seeders like seed-owid-energy-mix that never have a meaningful count.
assert.equal(
computeRecordCount({ opts: { recordCount: 0 }, data: { stuff: 1 }, payloadBytes: 500 }),
0,
);
});
it('auto-detects array length when data is an array', () => {
assert.equal(
computeRecordCount({ data: [1, 2, 3, 4], payloadBytes: 100 }),
4,
);
});
// Note: node:test does not provide it.each — explicit cases below.
it('auto-detects data.events.length', () => {
assert.equal(
computeRecordCount({ data: { events: [{}, {}, {}] }, payloadBytes: 50 }),
3,
);
});
it('auto-detects data.predictions.length', () => {
assert.equal(
computeRecordCount({ data: { predictions: [{}, {}] }, payloadBytes: 30 }),
2,
);
});
it('auto-detects topicArticleCount when topics shape', () => {
assert.equal(
computeRecordCount({ data: { topics: [{}] }, topicArticleCount: 17, payloadBytes: 200 }),
17,
);
});
it('does NOT fire fallback when known shape returns 0 (empty array, payloadBytes>0)', () => {
// Regression guard: if a seeder publishes {events: []} (genuinely zero
// events upstream), the JSON serialization is non-empty (~12 bytes for
// {"events":[]}). detectedFromShape resolves to 0 (a real number, not
// null), so the chain MUST stop there and report 0 — not flip to the
// payloadBytes>0 fallback. Otherwise we'd silently mask genuine empty
// upstream cycles as "1 record" and break the SKIPPED/EMPTY signal.
let warned = false;
const result = computeRecordCount({
data: { events: [] },
payloadBytes: 12,
onPhantomFallback: () => { warned = true; },
});
assert.equal(result, 0);
assert.equal(warned, false, 'no fallback when known shape is present but empty');
});
it('FALLBACK: returns 1 when payloadBytes>0 and shape unknown (phantom EMPTY_DATA fix)', () => {
// This is the proven-payload fallback. Without it, a seeder that publishes
// {score, inputs} (e.g. seed-fear-greed) would write recordCount=0 to
// seed-meta and trigger phantom EMPTY_DATA in /api/health even though the
// panel renders fine.
let warned = false;
const result = computeRecordCount({
data: { score: 42, inputs: { foo: 'bar' } }, // unknown shape
payloadBytes: 6093,
onPhantomFallback: () => { warned = true; },
});
assert.equal(result, 1);
assert.equal(warned, true, 'expected onPhantomFallback to fire');
});
it('returns 0 when neither known shape nor payloadBytes > 0', () => {
let warned = false;
const result = computeRecordCount({
data: { unknownShape: true },
payloadBytes: 0,
onPhantomFallback: () => { warned = true; },
});
assert.equal(result, 0);
assert.equal(warned, false, 'no fallback warn when payload is empty');
});
it('does not fire fallback when shape matches (no spurious warn)', () => {
let warned = false;
computeRecordCount({
data: [1, 2, 3],
payloadBytes: 100,
onPhantomFallback: () => { warned = true; },
});
assert.equal(warned, false);
});
it('opts.recordCount=0 from a function suppresses fallback (explicit-zero precedence)', () => {
let warned = false;
const result = computeRecordCount({
opts: { recordCount: () => 0 },
data: { mystery: true },
payloadBytes: 9999,
onPhantomFallback: () => { warned = true; },
});
assert.equal(result, 0);
assert.equal(warned, false);
});
});