fix(deduction): address P1/P2 review feedback on deduction-prompt (#1795)

* fix(deduction): address P1/P2 review feedback P1: add buildNewsContextFromItems to news-context.ts - new export takes NewsItem[] directly (not a getter) - emits 'Recent News Signal Snapshot:' format with ISO timestamp, source, tier, title, and optional locationName - deduplicates by normalized title P2-a: anchor splitDeductionContext to line boundary - replace indexOf('Recent News') with /(?:^|\n\n)(Recent News...)/ regex - prevents false split on prose containing the phrase mid-sentence P2-b: strip dangling <think> prefix in postProcessDeductionOutput - add .replace(/<think>[\s\S]*/gi, '') after the closed-tag strip - matches behavior of shared sanitizer in server/_shared/llm.ts P2-c: remove spurious convex/_generated/contactMessages.js - convex/contactMessages.ts already exists in the correct location - the _generated/ copy had a broken import path Open: wire deduction-prompt.ts into deduct-situation.ts - replace inline prompt with buildDeductionPrompt() - apply postProcessDeductionOutput() to LLM result - bump cache key to v2 to avoid stale inline-prompt responses All 8 tests in tests/deduction-prompt.test.mjs pass. * fix(deduction): restrict mode detection to query only inferDeductionMode previously checked query + geoContext combined. News headlines in geoContext containing 'brief', 'convergence pattern', etc. could force brief mode on full forecast requests. Mode selection now inspects only the user query. geoContext is supplemental evidence and must not influence prompt structure. Adds regression test to prevent recurrence.
2026-04-25 17:14:57 +02:00 · 2026-03-18 10:42:49 +04:00
parent bb92815fe0
commit e2c3fb85a9
4 changed files with 295 additions and 14 deletions
--- a/server/worldmonitor/intelligence/v1/deduct-situation.ts
+++ b/server/worldmonitor/intelligence/v1/deduct-situation.ts
@@ -7,6 +7,7 @@ import type {
 import { cachedFetchJson } from '../../../_shared/redis';
 import { sha256Hex } from './_shared';
 import { callLlm } from '../../../_shared/llm';
+import { buildDeductionPrompt, postProcessDeductionOutput } from './deduction-prompt';

 const DEDUCT_TIMEOUT_MS = 120_000;
 const DEDUCT_CACHE_TTL = 3600;
@@ -23,20 +24,9 @@ export async function deductSituation(

    if (!query) return { analysis: '', model: '', provider: 'skipped' };

-    const cacheKey = `deduct:situation:v1:${(await sha256Hex(query.toLowerCase() + '|' + geoContext.toLowerCase())).slice(0, 16)}`;
+    const cacheKey = `deduct:situation:v2:${(await sha256Hex(query.toLowerCase() + '|' + geoContext.toLowerCase())).slice(0, 16)}`;

-    const systemPrompt = `You are a senior geopolitical intelligence analyst and forecaster.
-Your task is to DEDUCT the situation in a near timeline (e.g. 24 hours to a few months) based on the user's query.
- Use any provided geographic or intelligence context.
- Be highly analytical, pragmatic, and objective.
- Identify the most likely outcomes, timelines, and second-order impacts.
- Do NOT use typical AI preambles (e.g., "Here is the deduction", "Let me see").
- Format your response in clean markdown with concise bullet points where appropriate.`;
-
-    let userPrompt = query;
-    if (geoContext) {
-        userPrompt += `\n\n### Current Intelligence Context\n${geoContext}`;
-    }
+    const { mode, systemPrompt, userPrompt } = buildDeductionPrompt({ query, geoContext });

    const cached = await cachedFetchJson<{ analysis: string; model: string; provider: string }>(
        cacheKey,
@@ -53,7 +43,8 @@ Your task is to DEDUCT the situation in a near timeline (e.g. 24 hours to a few
            });

            if (!result) return null;
-            return { analysis: result.content, model: result.model, provider: result.provider };
+            const analysis = postProcessDeductionOutput(result.content, mode);
+            return { analysis, model: result.model, provider: result.provider };
        }
    );

--- a/server/worldmonitor/intelligence/v1/deduction-prompt.ts
+++ b/server/worldmonitor/intelligence/v1/deduction-prompt.ts
@@ -0,0 +1,155 @@
+interface PromptContextParts {
+  primaryContext: string;
+  recentNews: string[];
+}
+
+export type DeductionMode = 'brief' | 'forecast';
+
+const BRIEF_MODE_PATTERNS = [
+  /\b2-3 sentences?\b/i,
+  /\bbrief\b/i,
+  /\bconvergence pattern\b/i,
+  /\bassess likelihood and potential implications\b/i,
+];
+
+function normalizeWhitespace(input: string): string {
+  return input.replace(/\r\n/g, '\n').replace(/[ \t]+\n/g, '\n').replace(/\n{3,}/g, '\n\n').trim();
+}
+
+function trimList(items: string[], maxItems: number, maxChars: number): string[] {
+  const out: string[] = [];
+  let total = 0;
+  for (const item of items) {
+    if (out.length >= maxItems) break;
+    const next = item.trim();
+    if (!next) continue;
+    if (total > 0 && total + next.length + 1 > maxChars) break;
+    out.push(next);
+    total += next.length + 1;
+  }
+  return out;
+}
+
+export function inferDeductionMode(query: string): DeductionMode {
+  return BRIEF_MODE_PATTERNS.some((pattern) => pattern.test(query)) ? 'brief' : 'forecast';
+}
+
+export function splitDeductionContext(geoContext: string): PromptContextParts {
+  const normalized = normalizeWhitespace(geoContext);
+  if (!normalized) {
+    return { primaryContext: '', recentNews: [] };
+  }
+
+  const headerMatch = /(?:^|\n\n)(Recent News[^\n]*)/.exec(normalized);
+  if (!headerMatch) {
+    return { primaryContext: normalized, recentNews: [] };
+  }
+
+  const primaryContext = normalized.slice(0, headerMatch.index).trim();
+  const afterHeader = normalized.slice(headerMatch.index + headerMatch[0].length);
+  const newsBlock = afterHeader.split('\n').filter(Boolean);
+  const recentNews = trimList(
+    newsBlock
+      .map((line) => line.replace(/^\s*[-*]\s*/, '').trim())
+      .filter(Boolean),
+    10,
+    1400,
+  );
+
+  return { primaryContext, recentNews };
+}
+
+export function inferProviderLabel(apiUrl: string): string {
+  try {
+    const host = new URL(apiUrl).hostname.toLowerCase();
+    if (host.includes('groq')) return 'groq';
+    if (host.includes('openrouter')) return 'openrouter';
+    if (host.includes('ollama')) return 'ollama';
+    if (host.includes('openai')) return 'openai-compatible';
+    return host.replace(/^api\./, '') || 'custom';
+  } catch {
+    return 'custom';
+  }
+}
+
+function buildSharedEvidencePrompt(primaryContext: string, recentNews: string[]): string {
+  const parts: string[] = [];
+  if (primaryContext) {
+    parts.push(`Context:\n${primaryContext}`);
+  }
+  if (recentNews.length > 0) {
+    parts.push(`Recent News Signals:\n${recentNews.map((line) => `- ${line}`).join('\n')}`);
+  }
+  if (parts.length === 0) {
+    parts.push('Context:\nNo additional context was provided.');
+  }
+  return parts.join('\n\n');
+}
+
+export function buildDeductionPrompt(input: {
+  query: string;
+  geoContext: string;
+  now?: Date;
+}): { mode: DeductionMode; systemPrompt: string; userPrompt: string } {
+  const now = input.now ?? new Date();
+  const today = now.toISOString().slice(0, 10);
+  const mode = inferDeductionMode(input.query);
+  const { primaryContext, recentNews } = splitDeductionContext(input.geoContext);
+  const evidence = buildSharedEvidencePrompt(primaryContext, recentNews);
+
+  if (mode === 'brief') {
+    return {
+      mode,
+      systemPrompt: `You are a concise forecasting analyst.
+Today is ${today} UTC.
+Use only the supplied evidence plus durable background knowledge.
+Do not invent current facts that are not supported by the evidence.
+Return plain text in exactly 2 or 3 sentences.
+- Sentence 1: core assessment and rough likelihood.
+- Sentence 2: primary drivers or constraints.
+- Optional sentence 3: the most important trigger to watch next.
+No markdown, no bullets, no headings, no preamble.`,
+      userPrompt: `Question:\n${input.query}\n\n${evidence}`,
+    };
+  }
+
+  return {
+    mode,
+    systemPrompt: `You are a senior geopolitical and market forecaster.
+Today is ${today} UTC.
+Your job is to produce a grounded near-term forecast from the supplied evidence.
+Rules:
+- Separate observed facts from forecasted outcomes.
+- Prefer the freshest and most specific evidence.
+- If evidence is thin or conflicting, say so explicitly.
+- Use rough probability ranges, not false precision.
+- Do not use AI preambles.
+- Keep the answer concise but structured.
+
+Return Markdown with exactly these sections in this order:
+**Bottom line**
+**What we know**
+**Most likely path (next 24-72h)**
+**Alternative paths**
+**Key drivers**
+**Signals to watch**
+**Confidence**
+
+Formatting rules:
+- Use short bullets under each section where useful.
+- In "Alternative paths", include 2 alternatives with rough likelihood bands.
+- In "Confidence", state High, Medium, or Low and explain why.
+- Ground claims in the supplied evidence by naming sources, dates, locations, or signal types when possible.`,
+    userPrompt: `Question:\n${input.query}\n\n${evidence}`,
+  };
+}
+
+export function postProcessDeductionOutput(raw: string, mode: DeductionMode): string {
+  const cleaned = normalizeWhitespace(
+    raw.replace(/<think>[\s\S]*?<\/think>/gi, '').replace(/<think>[\s\S]*/gi, ''),
+  );
+  if (mode === 'brief') {
+    return cleaned.replace(/\s+/g, ' ').trim();
+  }
+  return cleaned;
+}
--- a/src/utils/news-context.ts
+++ b/src/utils/news-context.ts
@@ -5,3 +5,20 @@ export function buildNewsContext(getLatestNews: () => NewsItem[], limit = 15): s
  if (news.length === 0) return '';
  return 'Recent News:\n' + news.map(n => `- ${n.title} (${n.source})`).join('\n');
 }
+
+export function buildNewsContextFromItems(items: NewsItem[], limit = 15): string {
+  const seen = new Set<string>();
+  const lines: string[] = [];
+  for (const item of items) {
+    if (lines.length >= limit) break;
+    const key = item.title.toLowerCase().trim();
+    if (seen.has(key)) continue;
+    seen.add(key);
+    const ts = item.pubDate instanceof Date ? item.pubDate.toISOString() : String(item.pubDate);
+    const tier = item.tier != null ? ` | tier-${item.tier}` : '';
+    const loc = item.locationName ? ` | ${item.locationName}` : '';
+    lines.push(`- ${ts} | ${item.source}${tier} | ${item.title}${loc}`);
+  }
+  if (lines.length === 0) return '';
+  return 'Recent News Signal Snapshot:\n' + lines.join('\n');
+}
--- a/tests/deduction-prompt.test.mjs
+++ b/tests/deduction-prompt.test.mjs
@@ -0,0 +1,118 @@
+import assert from 'node:assert/strict';
+import { describe, it } from 'node:test';
+
+import {
+  buildDeductionPrompt,
+  inferDeductionMode,
+  inferProviderLabel,
+  postProcessDeductionOutput,
+  splitDeductionContext,
+} from '../server/worldmonitor/intelligence/v1/deduction-prompt.ts';
+import { buildNewsContextFromItems } from '../src/utils/news-context.ts';
+
+describe('inferDeductionMode', () => {
+  it('selects brief mode for short convergence assessments', () => {
+    assert.equal(
+      inferDeductionMode('Analyze this convergence pattern and assess likelihood in 2-3 sentences.'),
+      'brief',
+    );
+  });
+
+  it('selects forecast mode for open-ended user forecasting', () => {
+    assert.equal(
+      inferDeductionMode('What will possibly happen in the next 72 hours in the Taiwan Strait?'),
+      'forecast',
+    );
+  });
+
+  it('ignores trigger phrases in geoContext — mode is query-only', () => {
+    assert.equal(
+      inferDeductionMode('What is the strategic outlook for the Gulf theater?'),
+      'forecast',
+    );
+  });
+});
+
+describe('splitDeductionContext', () => {
+  it('separates primary context from recent news lines', () => {
+    const result = splitDeductionContext(
+      'Theater: Levant.\n\nRecent News Signal Snapshot:\n- 2026-03-15T10:00:00.000Z | Reuters | tier-1 | Israel mobilizes reserves\n- 2026-03-15T09:00:00.000Z | AP | ceasefire talks stall',
+    );
+
+    assert.equal(result.primaryContext, 'Theater: Levant.');
+    assert.equal(result.recentNews.length, 2);
+    assert.match(result.recentNews[0], /Reuters/);
+  });
+});
+
+describe('buildDeductionPrompt', () => {
+  it('builds a structured forecast prompt for panel usage', () => {
+    const { mode, systemPrompt, userPrompt } = buildDeductionPrompt({
+      query: 'What is the expected strategic impact of the current military posture in the Gulf theater?',
+      geoContext: 'Theater: Gulf.\n\nRecent News Signal Snapshot:\n- 2026-03-15T08:00:00.000Z | Reuters | naval deployment increases',
+      now: new Date('2026-03-15T12:00:00Z'),
+    });
+
+    assert.equal(mode, 'forecast');
+    assert.match(systemPrompt, /\*\*Most likely path \(next 24-72h\)\*\*/);
+    assert.match(systemPrompt, /2026-03-15 UTC/);
+    assert.match(userPrompt, /Recent News Signals/);
+  });
+
+  it('builds a terse brief prompt for correlation-card usage', () => {
+    const { mode, systemPrompt } = buildDeductionPrompt({
+      query: 'Assess likelihood and potential implications in 2-3 sentences.',
+      geoContext: 'Countries: Taiwan, China',
+      now: new Date('2026-03-15T12:00:00Z'),
+    });
+
+    assert.equal(mode, 'brief');
+    assert.match(systemPrompt, /exactly 2 or 3 sentences/);
+    assert.doesNotMatch(systemPrompt, /\*\*Bottom line\*\*/);
+  });
+});
+
+describe('postProcessDeductionOutput', () => {
+  it('removes think tags and flattens brief responses', () => {
+    const output = postProcessDeductionOutput('<think>hidden</think> First line.\n\nSecond line.', 'brief');
+    assert.equal(output, 'First line. Second line.');
+  });
+});
+
+describe('inferProviderLabel', () => {
+  it('maps known providers and falls back to hostname', () => {
+    assert.equal(inferProviderLabel('https://api.groq.com/openai/v1/chat/completions'), 'groq');
+    assert.equal(inferProviderLabel('https://example.internal/v1/chat/completions'), 'example.internal');
+  });
+});
+
+describe('buildNewsContextFromItems', () => {
+  it('deduplicates duplicate headlines and includes metadata', () => {
+    const now = new Date('2026-03-15T12:00:00Z');
+    const context = buildNewsContextFromItems([
+      {
+        source: 'Reuters',
+        title: 'Markets fall after new tariff threat',
+        link: 'https://example.com/1',
+        pubDate: now,
+        isAlert: true,
+        tier: 1,
+        locationName: 'Washington',
+        threat: { level: 'high', category: 'economic', confidence: 0.9, source: 'ml' },
+      },
+      {
+        source: 'AP',
+        title: 'Markets fall after new tariff threat',
+        link: 'https://example.com/2',
+        pubDate: new Date('2026-03-15T11:30:00Z'),
+        isAlert: false,
+      },
+    ]);
+
+    assert.match(context, /Recent News Signal Snapshot/);
+    assert.match(context, /Reuters/);
+    assert.match(context, /tier-1/);
+    assert.match(context, /Washington/);
+    assert.equal((context.match(/Markets fall after new tariff threat/g) || []).length, 1);
+  });
+});