fix(forecast): improve trace enrichment diagnostics (#1797)

* fix(forecast): avoid duplicate prior world-state read

* feat(forecast): record llm enrichment failure reasons

* fix(forecast): preserve latest pointer continuity fallback
This commit is contained in:
Elie Habib
2026-03-18 09:55:28 +04:00
committed by GitHub
parent 827cf886a5
commit 527002f873
2 changed files with 44 additions and 5 deletions

View File

@@ -3155,8 +3155,10 @@ async function writeForecastTraceArtifacts(data, context = {}) {
const traceCap = getTraceCapLog(predictionCount);
console.log(` Trace cap: raw=${traceCap.raw ?? 'default'} resolved=${traceCap.resolved} total=${traceCap.totalForecasts}`);
// Run both reads in parallel; derive priorWorldState from history head to avoid
// a redundant R2 GET (TRACE_RUNS_KEY[0] and TRACE_LATEST_KEY normally point to the same object).
// Keep TRACE_LATEST_KEY as a fallback because writeForecastTracePointer() updates
// the latest pointer and history list in separate Redis calls. If SET succeeds
// but LPUSH/LTRIM fails or the history list is stale, continuity should still
// see the most recent prior world state.
const [priorWorldStates, priorWorldStateFallback] = await Promise.all([
readForecastWorldStateHistory(storageConfig, WORLD_STATE_HISTORY_LIMIT),
readPreviousForecastWorldState(storageConfig),
@@ -3654,6 +3656,14 @@ function validateScenarios(scenarios, predictions) {
});
}
function getEnrichmentFailureReason({ result, raw, scenarios = 0, perspectives = 0, cases = 0 }) {
if (!result) return 'call_failed';
if (raw == null) return 'parse_failed';
if (Array.isArray(raw) && raw.length === 0) return 'empty_output';
if ((scenarios + perspectives + cases) === 0) return 'validation_failed';
return '';
}
async function callForecastLLM(systemPrompt, userPrompt, options = {}) {
const stage = options.stage || 'default';
const providers = resolveForecastLlmProviders(options);
@@ -3880,6 +3890,8 @@ async function enrichScenariosWithLLM(predictions) {
scenarios: 0,
perspectives: 0,
cases: 0,
rawItemCount: 0,
failureReason: '',
succeeded: false,
},
scenario: {
@@ -3889,6 +3901,8 @@ async function enrichScenariosWithLLM(predictions) {
model: '',
scenarios: 0,
cases: 0,
rawItemCount: 0,
failureReason: '',
succeeded: false,
},
};
@@ -3911,6 +3925,7 @@ async function enrichScenariosWithLLM(predictions) {
enrichmentMeta.combined.scenarios = cached.items.filter(item => item.scenario).length;
enrichmentMeta.combined.perspectives = cached.items.filter(item => item.strategic || item.regional || item.contrarian).length;
enrichmentMeta.combined.cases = cached.items.filter(item => item.baseCase || item.escalatoryCase || item.contrarianCase).length;
enrichmentMeta.combined.rawItemCount = cached.items.length;
for (const item of cached.items) {
if (item.index >= 0 && item.index < topWithPerspectives.length) {
applyTraceMeta(topWithPerspectives[item.index], {
@@ -3944,10 +3959,18 @@ async function enrichScenariosWithLLM(predictions) {
enrichmentMeta.combined.source = 'live';
enrichmentMeta.combined.provider = result.provider;
enrichmentMeta.combined.model = result.model;
enrichmentMeta.combined.rawItemCount = Array.isArray(raw) ? raw.length : 0;
enrichmentMeta.combined.scenarios = validScenarios.length;
enrichmentMeta.combined.perspectives = validPerspectives.length;
enrichmentMeta.combined.cases = validCases.length;
enrichmentMeta.combined.succeeded = validScenarios.length > 0 || validPerspectives.length > 0 || validCases.length > 0;
enrichmentMeta.combined.failureReason = getEnrichmentFailureReason({
result,
raw,
scenarios: validScenarios.length,
perspectives: validPerspectives.length,
cases: validCases.length,
});
for (const s of validScenarios) {
applyTraceMeta(topWithPerspectives[s.index], {
@@ -3989,12 +4012,15 @@ async function enrichScenariosWithLLM(predictions) {
console.log(JSON.stringify({
event: 'llm_combined', provider: result.provider, model: result.model,
hash, count: topWithPerspectives.length,
rawItems: Array.isArray(raw) ? raw.length : 0,
scenarios: validScenarios.length, perspectives: validPerspectives.length, cases: validCases.length,
failureReason: enrichmentMeta.combined.failureReason || '',
latencyMs: Math.round(Date.now() - t0), cached: false,
}));
if (items.length > 0) await redisSet(url, token, cacheKey, { items }, 3600);
} else {
enrichmentMeta.combined.failureReason = 'call_failed';
console.warn(' [LLM:combined] call failed');
}
}
@@ -4013,6 +4039,7 @@ async function enrichScenariosWithLLM(predictions) {
enrichmentMeta.scenario.model = 'cache';
enrichmentMeta.scenario.scenarios = cached.scenarios.filter(item => item.scenario).length;
enrichmentMeta.scenario.cases = cached.scenarios.filter(item => item.baseCase || item.escalatoryCase || item.contrarianCase).length;
enrichmentMeta.scenario.rawItemCount = cached.scenarios.length;
for (const s of cached.scenarios) {
if (s.index >= 0 && s.index < scenarioOnly.length && s.scenario) {
applyTraceMeta(scenarioOnly[s.index], {
@@ -4044,9 +4071,16 @@ async function enrichScenariosWithLLM(predictions) {
enrichmentMeta.scenario.source = 'live';
enrichmentMeta.scenario.provider = result.provider;
enrichmentMeta.scenario.model = result.model;
enrichmentMeta.scenario.rawItemCount = Array.isArray(raw) ? raw.length : 0;
enrichmentMeta.scenario.scenarios = valid.length;
enrichmentMeta.scenario.cases = validCases.length;
enrichmentMeta.scenario.succeeded = valid.length > 0 || validCases.length > 0;
enrichmentMeta.scenario.failureReason = getEnrichmentFailureReason({
result,
raw,
scenarios: valid.length,
cases: validCases.length,
});
for (const s of valid) {
applyTraceMeta(scenarioOnly[s.index], {
narrativeSource: 'llm_scenario',
@@ -4068,7 +4102,8 @@ async function enrichScenariosWithLLM(predictions) {
console.log(JSON.stringify({
event: 'llm_scenario', provider: result.provider, model: result.model,
hash, count: scenarioOnly.length, scenarios: valid.length, cases: validCases.length,
hash, count: scenarioOnly.length, rawItems: Array.isArray(raw) ? raw.length : 0, scenarios: valid.length, cases: validCases.length,
failureReason: enrichmentMeta.scenario.failureReason || '',
latencyMs: Math.round(Date.now() - t0), cached: false,
}));
@@ -4099,6 +4134,7 @@ async function enrichScenariosWithLLM(predictions) {
await redisSet(url, token, cacheKey, { scenarios }, 3600);
}
} else {
enrichmentMeta.scenario.failureReason = 'call_failed';
console.warn(' [LLM:scenario] call failed');
}
}

View File

@@ -176,8 +176,8 @@ describe('forecast trace artifact builder', () => {
predictions: [a, b],
enrichmentMeta: {
selection: { candidateCount: 2, readinessEligibleCount: 2, selectedCombinedCount: 1, selectedScenarioCount: 1, reservedScenarioDomains: ['market'] },
combined: { requested: 1, source: 'live', provider: 'openrouter', model: 'google/gemini-2.5-flash', scenarios: 1, perspectives: 1, cases: 1, succeeded: true },
scenario: { requested: 1, source: 'cache', provider: 'cache', model: 'cache', scenarios: 0, cases: 0, succeeded: true },
combined: { requested: 1, source: 'live', provider: 'openrouter', model: 'google/gemini-2.5-flash', scenarios: 1, perspectives: 1, cases: 1, rawItemCount: 2, failureReason: '', succeeded: true },
scenario: { requested: 1, source: 'cache', provider: 'cache', model: 'cache', scenarios: 0, cases: 0, rawItemCount: 1, failureReason: '', succeeded: true },
},
},
{ runId: 'run-quality' },
@@ -194,6 +194,9 @@ describe('forecast trace artifact builder', () => {
assert.ok(artifacts.summary.quality.traced.topSuppressionSignals.length >= 1);
assert.equal(artifacts.summary.quality.enrichment.selection.selectedCombinedCount, 1);
assert.equal(artifacts.summary.quality.enrichment.combined.provider, 'openrouter');
assert.equal(artifacts.summary.quality.enrichment.combined.rawItemCount, 2);
assert.equal(artifacts.summary.quality.enrichment.scenario.rawItemCount, 1);
assert.equal(artifacts.summary.quality.enrichment.combined.failureReason, '');
});
});