mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
chore(bundle-runner): emit reliable per-section summary line on parent stdout (#3320)
* chore(bundle-runner): emit reliable per-section summary line on parent stdout Fixes observability asymmetry in Railway bundle service logs where some seeders appeared to skip lines like \`Run ID\`, \`Mode\`, or the structured \`seed_complete\` JSON event. Root cause is Railway's log ingestion dropping child-stdout lines when multiple seeders emit at similar timestamps — observed in the PR #3294 launch run where Pipelines-Gas was missing its \`=== Seed ===\` banner, Pipelines-Oil had \`Key:\` emitted BEFORE the banner, Storage-Facilities and Energy- Disruptions were missing Run ID + Mode + seed_complete entirely, despite identical code paths. All child processes emit the same lines; Railway just loses some. Fix is to piggy-back on the observation that bundle-level lines (\`[Bundle:X] Starting\`, \`[Bundle:X] Finished\`) ARE reliably captured — they come from the parent process's single stdout stream. Changes in scripts/_bundle-runner.mjs: - spawnSeed now captures the child's \`{"event":"seed_complete",...}\` JSON line while streaming stdout, parses it, and attaches to the settle result. - Main loop emits one bundle-level summary line per section after child exit: [Bundle:X] section=NAME status=OK durationMs=1237 records=15 state=OK (or \`status=FAILED elapsed=...s reason=...\` for failures). - Summary line survives Railway's log ingestion even when per-section child lines drop, giving monitors a reliable event to key off. Observability consumers (log-based alerts, seed telemetry scrapers) should now key off the bundle-level summary rather than per-section child lines which remain best-effort. The per-section child lines stay as-is for interactive debugging. Verification: parse logic sanity-checked against the exact seed_complete line format. Node syntax check clean. No schema changes. * fix(bundle-runner): emit FAILED summary line to stderr, not stdout The prior commit introduced a bundle-level structured summary line per section. On success that correctly goes to stdout; on FAILED it was also going to stdout — but that broke tests/bundle-runner.test.mjs test 140 ("timeout emits terminal reason BEFORE SIGTERM/SIGKILL grace"). The test concatenates stdout+stderr and asserts that `SIGKILL` appears AFTER `Failed after` in the combined string (verifying the kill-decision log line is emitted BEFORE the 10s SIGTERM→SIGKILL grace window, so it survives container termination). My new FAILED summary line — which includes the reason string `timeout after 1s (signal SIGKILL)` — landed on stdout, which comes first in the concatenation, and its `SIGKILL` substring matched before the stderr-side `Did not exit on SIGTERM...SIGKILL` line. Ordering assertion failed. Fix: route the FAILED summary line through console.error (same stream as the pre-kill `Failed after ... sending SIGTERM` and the grace-window `Did not exit...SIGKILL` lines). Chronological ordering in combined output is preserved; test passes. OK summary lines stay on stdout — they're observability data, not error diagnostics, and belong on the normal stream alongside the bundle Starting/Finished lines. Local: `node --test tests/bundle-runner.test.mjs` — 4/4 pass including the previously-failing ordering test.
This commit is contained in:
@@ -110,12 +110,28 @@ function streamLines(stream, onLine) {
|
||||
function spawnSeed(scriptPath, { timeoutMs, label }) {
|
||||
return new Promise((resolve) => {
|
||||
const t0 = Date.now();
|
||||
// Capture the child's structured `seed_complete` event if emitted, so
|
||||
// the parent can re-emit the key fields on a single bundle-level line.
|
||||
// Railway log ingestion drops child-stdout lines when many seeders log
|
||||
// at similar timestamps (observed across Storage-Facilities /
|
||||
// Energy-Disruptions / Pipelines-Gas in PR #3294 launch run: each
|
||||
// dropped a different subset of Run ID / Mode / seed_complete lines
|
||||
// despite identical code paths). Bundle-level lines survive reliably.
|
||||
let lastSeedComplete = null;
|
||||
const child = spawn(process.execPath, [scriptPath], {
|
||||
env: process.env,
|
||||
stdio: ['ignore', 'pipe', 'pipe'],
|
||||
});
|
||||
|
||||
streamLines(child.stdout, (line) => console.log(` [${label}] ${line}`));
|
||||
streamLines(child.stdout, (line) => {
|
||||
console.log(` [${label}] ${line}`);
|
||||
const idx = line.indexOf('{"event":"seed_complete"');
|
||||
if (idx >= 0) {
|
||||
try {
|
||||
lastSeedComplete = JSON.parse(line.slice(idx));
|
||||
} catch { /* malformed JSON — keep previous */ }
|
||||
}
|
||||
});
|
||||
streamLines(child.stderr, (line) => console.warn(` [${label}] ${line}`));
|
||||
|
||||
let settled = false;
|
||||
@@ -156,7 +172,7 @@ function spawnSeed(scriptPath, { timeoutMs, label }) {
|
||||
// Terminal reason already logged by softKill — just record the outcome.
|
||||
settle({ elapsed, ok: false, reason: `timeout after ${Math.round(timeoutMs / 1000)}s (signal ${signal || 'SIGTERM'})`, alreadyLogged: true });
|
||||
} else if (code === 0) {
|
||||
settle({ elapsed, ok: true });
|
||||
settle({ elapsed, ok: true, seedComplete: lastSeedComplete });
|
||||
} else {
|
||||
settle({ elapsed, ok: false, reason: `exit ${code ?? 'null'}${signal ? ` (signal ${signal})` : ''}` });
|
||||
}
|
||||
@@ -215,11 +231,30 @@ export async function runBundle(label, sections, opts = {}) {
|
||||
const result = await spawnSeed(scriptPath, { timeoutMs: timeout, label: section.label });
|
||||
if (result.ok) {
|
||||
console.log(` [${section.label}] Done (${result.elapsed}s)`);
|
||||
// Bundle-level per-section summary — emitted from parent stdout so
|
||||
// Railway log ingestion captures it reliably even when child lines
|
||||
// drop. Observability tools should key off this line, not per-section
|
||||
// Run ID / Mode / seed_complete lines which are best-effort only.
|
||||
const sc = result.seedComplete;
|
||||
if (sc && typeof sc === 'object') {
|
||||
console.log(`[Bundle:${label}] section=${section.label} status=OK durationMs=${sc.durationMs ?? ''} records=${sc.recordCount ?? ''} state=${sc.state || 'OK'}`);
|
||||
} else {
|
||||
// Seeder didn't emit seed_complete (legacy non-contract seeders, or
|
||||
// the child's event line was dropped before parsing).
|
||||
console.log(`[Bundle:${label}] section=${section.label} status=OK elapsed=${result.elapsed}s`);
|
||||
}
|
||||
ran++;
|
||||
} else {
|
||||
if (!result.alreadyLogged) {
|
||||
console.error(` [${section.label}] Failed after ${result.elapsed}s: ${result.reason}`);
|
||||
}
|
||||
// Emit the FAILED summary to stderr (same stream as the Failed line
|
||||
// and SIGKILL escalation log) so chronological ordering in combined
|
||||
// output is preserved. If we went to stdout here, the line would
|
||||
// appear before those stderr lines when consumers concatenate
|
||||
// stdout+stderr, breaking tests (and log readers) that rely on
|
||||
// signal-escalation ordering.
|
||||
console.error(`[Bundle:${label}] section=${section.label} status=FAILED elapsed=${result.elapsed}s reason=${(result.reason || 'unknown').replace(/\s+/g, ' ')}`);
|
||||
failed++;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user