fix(forecast): improve trace enrichment diagnostics (#1797)

* fix(forecast): avoid duplicate prior world-state read * feat(forecast): record llm enrichment failure reasons * fix(forecast): preserve latest pointer continuity fallback
2026-04-25 17:14:57 +02:00 · 2026-03-18 09:55:28 +04:00
parent 827cf886a5
commit 527002f873
2 changed files with 44 additions and 5 deletions
--- a/scripts/seed-forecasts.mjs
+++ b/scripts/seed-forecasts.mjs
@@ -3155,8 +3155,10 @@ async function writeForecastTraceArtifacts(data, context = {}) {
  const traceCap = getTraceCapLog(predictionCount);
  console.log(`  Trace cap: raw=${traceCap.raw ?? 'default'} resolved=${traceCap.resolved} total=${traceCap.totalForecasts}`);

-  // Run both reads in parallel; derive priorWorldState from history head to avoid
-  // a redundant R2 GET (TRACE_RUNS_KEY[0] and TRACE_LATEST_KEY normally point to the same object).
+  // Keep TRACE_LATEST_KEY as a fallback because writeForecastTracePointer() updates
+  // the latest pointer and history list in separate Redis calls. If SET succeeds
+  // but LPUSH/LTRIM fails or the history list is stale, continuity should still
+  // see the most recent prior world state.
  const [priorWorldStates, priorWorldStateFallback] = await Promise.all([
    readForecastWorldStateHistory(storageConfig, WORLD_STATE_HISTORY_LIMIT),
    readPreviousForecastWorldState(storageConfig),
@@ -3654,6 +3656,14 @@ function validateScenarios(scenarios, predictions) {
  });
 }

+function getEnrichmentFailureReason({ result, raw, scenarios = 0, perspectives = 0, cases = 0 }) {
+  if (!result) return 'call_failed';
+  if (raw == null) return 'parse_failed';
+  if (Array.isArray(raw) && raw.length === 0) return 'empty_output';
+  if ((scenarios + perspectives + cases) === 0) return 'validation_failed';
+  return '';
+}
+
 async function callForecastLLM(systemPrompt, userPrompt, options = {}) {
  const stage = options.stage || 'default';
  const providers = resolveForecastLlmProviders(options);
@@ -3880,6 +3890,8 @@ async function enrichScenariosWithLLM(predictions) {
      scenarios: 0,
      perspectives: 0,
      cases: 0,
+      rawItemCount: 0,
+      failureReason: '',
      succeeded: false,
    },
    scenario: {
@@ -3889,6 +3901,8 @@ async function enrichScenariosWithLLM(predictions) {
      model: '',
      scenarios: 0,
      cases: 0,
+      rawItemCount: 0,
+      failureReason: '',
      succeeded: false,
    },
  };
@@ -3911,6 +3925,7 @@ async function enrichScenariosWithLLM(predictions) {
      enrichmentMeta.combined.scenarios = cached.items.filter(item => item.scenario).length;
      enrichmentMeta.combined.perspectives = cached.items.filter(item => item.strategic || item.regional || item.contrarian).length;
      enrichmentMeta.combined.cases = cached.items.filter(item => item.baseCase || item.escalatoryCase || item.contrarianCase).length;
+      enrichmentMeta.combined.rawItemCount = cached.items.length;
      for (const item of cached.items) {
        if (item.index >= 0 && item.index < topWithPerspectives.length) {
          applyTraceMeta(topWithPerspectives[item.index], {
@@ -3944,10 +3959,18 @@ async function enrichScenariosWithLLM(predictions) {
        enrichmentMeta.combined.source = 'live';
        enrichmentMeta.combined.provider = result.provider;
        enrichmentMeta.combined.model = result.model;
+        enrichmentMeta.combined.rawItemCount = Array.isArray(raw) ? raw.length : 0;
        enrichmentMeta.combined.scenarios = validScenarios.length;
        enrichmentMeta.combined.perspectives = validPerspectives.length;
        enrichmentMeta.combined.cases = validCases.length;
        enrichmentMeta.combined.succeeded = validScenarios.length > 0 || validPerspectives.length > 0 || validCases.length > 0;
+        enrichmentMeta.combined.failureReason = getEnrichmentFailureReason({
+          result,
+          raw,
+          scenarios: validScenarios.length,
+          perspectives: validPerspectives.length,
+          cases: validCases.length,
+        });

        for (const s of validScenarios) {
          applyTraceMeta(topWithPerspectives[s.index], {
@@ -3989,12 +4012,15 @@ async function enrichScenariosWithLLM(predictions) {
        console.log(JSON.stringify({
          event: 'llm_combined', provider: result.provider, model: result.model,
          hash, count: topWithPerspectives.length,
+          rawItems: Array.isArray(raw) ? raw.length : 0,
          scenarios: validScenarios.length, perspectives: validPerspectives.length, cases: validCases.length,
+          failureReason: enrichmentMeta.combined.failureReason || '',
          latencyMs: Math.round(Date.now() - t0), cached: false,
        }));

        if (items.length > 0) await redisSet(url, token, cacheKey, { items }, 3600);
      } else {
+        enrichmentMeta.combined.failureReason = 'call_failed';
        console.warn('  [LLM:combined] call failed');
      }
    }
@@ -4013,6 +4039,7 @@ async function enrichScenariosWithLLM(predictions) {
      enrichmentMeta.scenario.model = 'cache';
      enrichmentMeta.scenario.scenarios = cached.scenarios.filter(item => item.scenario).length;
      enrichmentMeta.scenario.cases = cached.scenarios.filter(item => item.baseCase || item.escalatoryCase || item.contrarianCase).length;
+      enrichmentMeta.scenario.rawItemCount = cached.scenarios.length;
      for (const s of cached.scenarios) {
        if (s.index >= 0 && s.index < scenarioOnly.length && s.scenario) {
          applyTraceMeta(scenarioOnly[s.index], {
@@ -4044,9 +4071,16 @@ async function enrichScenariosWithLLM(predictions) {
        enrichmentMeta.scenario.source = 'live';
        enrichmentMeta.scenario.provider = result.provider;
        enrichmentMeta.scenario.model = result.model;
+        enrichmentMeta.scenario.rawItemCount = Array.isArray(raw) ? raw.length : 0;
        enrichmentMeta.scenario.scenarios = valid.length;
        enrichmentMeta.scenario.cases = validCases.length;
        enrichmentMeta.scenario.succeeded = valid.length > 0 || validCases.length > 0;
+        enrichmentMeta.scenario.failureReason = getEnrichmentFailureReason({
+          result,
+          raw,
+          scenarios: valid.length,
+          cases: validCases.length,
+        });
        for (const s of valid) {
          applyTraceMeta(scenarioOnly[s.index], {
            narrativeSource: 'llm_scenario',
@@ -4068,7 +4102,8 @@ async function enrichScenariosWithLLM(predictions) {

        console.log(JSON.stringify({
          event: 'llm_scenario', provider: result.provider, model: result.model,
-          hash, count: scenarioOnly.length, scenarios: valid.length, cases: validCases.length,
+          hash, count: scenarioOnly.length, rawItems: Array.isArray(raw) ? raw.length : 0, scenarios: valid.length, cases: validCases.length,
+          failureReason: enrichmentMeta.scenario.failureReason || '',
          latencyMs: Math.round(Date.now() - t0), cached: false,
        }));

@@ -4099,6 +4134,7 @@ async function enrichScenariosWithLLM(predictions) {
          await redisSet(url, token, cacheKey, { scenarios }, 3600);
        }
      } else {
+        enrichmentMeta.scenario.failureReason = 'call_failed';
        console.warn('  [LLM:scenario] call failed');
      }
    }
--- a/tests/forecast-trace-export.test.mjs
+++ b/tests/forecast-trace-export.test.mjs
@@ -176,8 +176,8 @@ describe('forecast trace artifact builder', () => {
        predictions: [a, b],
        enrichmentMeta: {
          selection: { candidateCount: 2, readinessEligibleCount: 2, selectedCombinedCount: 1, selectedScenarioCount: 1, reservedScenarioDomains: ['market'] },
-          combined: { requested: 1, source: 'live', provider: 'openrouter', model: 'google/gemini-2.5-flash', scenarios: 1, perspectives: 1, cases: 1, succeeded: true },
-          scenario: { requested: 1, source: 'cache', provider: 'cache', model: 'cache', scenarios: 0, cases: 0, succeeded: true },
+          combined: { requested: 1, source: 'live', provider: 'openrouter', model: 'google/gemini-2.5-flash', scenarios: 1, perspectives: 1, cases: 1, rawItemCount: 2, failureReason: '', succeeded: true },
+          scenario: { requested: 1, source: 'cache', provider: 'cache', model: 'cache', scenarios: 0, cases: 0, rawItemCount: 1, failureReason: '', succeeded: true },
        },
      },
      { runId: 'run-quality' },
@@ -194,6 +194,9 @@ describe('forecast trace artifact builder', () => {
    assert.ok(artifacts.summary.quality.traced.topSuppressionSignals.length >= 1);
    assert.equal(artifacts.summary.quality.enrichment.selection.selectedCombinedCount, 1);
    assert.equal(artifacts.summary.quality.enrichment.combined.provider, 'openrouter');
+    assert.equal(artifacts.summary.quality.enrichment.combined.rawItemCount, 2);
+    assert.equal(artifacts.summary.quality.enrichment.scenario.rawItemCount, 1);
+    assert.equal(artifacts.summary.quality.enrichment.combined.failureReason, '');
  });
 });