Files
worldmonitor/api/chat-analyst.ts
Elie Habib bcdd508b69 feat(analyst): topic-aware digest search fixes hallucination on niche queries (#2677)
* feat(analyst): topic-aware digest search for WM Analyst

The analyst was answering topic-specific questions from forecast probabilities and model knowledge instead of from actual ingested news articles.

Root cause: 200 RSS feed articles flow into news:digest:v1:full:en but only 8 top-scored stories make it to news:insights:v1. Topic-relevant articles were silently discarded at the clustering step.

Three gaps fixed:
- GDELT live headlines now append up to 3 user query keywords to surface topic-relevant live articles
- Full digest corpus is now keyword-searched per query; top 8 matched articles injected first in context as 'Matched News Articles'
- Fallback prompt no longer invites model speculation; replaced with explicit prohibition

Keyword extraction runs once in assembleAnalystContext and is shared by both GDELT and digest search (zero added latency).

TODO: fan out digest search to multi-language keys when available.

* fix(analyst): multi-turn retrieval continuity and word-boundary keyword matching

P1: prepend last user turn to retrieval query for follow-up topic continuity
P2: preserve 2-char acronyms (US/UK/EU); use tokenizeForMatch+findMatchingKeywords for word-boundary-safe scoring instead of String.includes

* fix(analyst): prioritize current-turn keywords in retrieval query

extractKeywords processes tokens left-to-right and caps at 8 distinct
terms. Building the retrieval string as prevTurn + currentQuery let a
long prior question fill the cap before the pivot term in the follow-up
(e.g. 'germany' in 'What about Germany?') was ever seen.

Swapped to currentQuery + prevTurn so current-turn keywords always win
the available slots; prior-turn terms backfill what remains for topic
continuity.

* fix(analyst): preserve 2-char acronyms case-insensitively in keyword extraction

Previous guard (/^[A-Z]{2}$/) only matched uppercase input, so common
lowercase queries like 'us sanctions', 'uk energy', 'ai chip exports'
still dropped the key term before retrieval.

Added KNOWN_2CHAR_ACRONYMS set (us, uk, eu, un, ai) checked against the
lowercased token, so the preservation path triggers regardless of how the
user typed the query.

* test(analyst): cover extractKeywords edge cases and retrieval priority ordering

Fills the coverage gap noted in review: existing tests only exercised
prompt text, leaving keyword extraction and retrieval assembly untested.

- Export extractKeywords() to make it unit-testable
- Fix emptyCtx/fullCtx fixtures to include relevantArticles field
- extractKeywords suite: stopword filtering, deduplication, 8-keyword cap,
  known 2-char acronyms (us/uk/eu/un/ai) case-insensitive, non-acronym
  2-char drop, empty-result path
- Retrieval priority suite: verifies current-turn pivot appears first in
  keyword list when query+prevTurn is combined, prior-turn backfills
  remaining slots, long prior turns cannot crowd out current-turn pivot
2026-04-04 15:11:50 +04:00

176 lines
6.4 KiB
TypeScript

/**
* Streaming chat analyst edge function — Pro only.
*
* POST /api/chat-analyst
* Body: { history: {role,content}[], query: string, domainFocus?: string, geoContext?: string }
*
* Returns text/event-stream SSE:
* data: {"meta":{"sources":["Brief","Risk",...],"degraded":false}} — always first event
* data: {"action":{"type":"suggest-widget","label":"...","prefill":"..."}} — optional, visual queries only
* data: {"delta":"..."} — one per content token
* data: {"done":true} — terminal event
* data: {"error":"..."} — on auth/llm failure
*/
export const config = { runtime: 'edge', regions: ['iad1', 'lhr1', 'fra1', 'sfo1'] };
// @ts-expect-error — JS module, no declaration file
import { getCorsHeaders } from './_cors.js';
import { isCallerPremium } from '../server/_shared/premium-check';
import { checkRateLimit } from '../server/_shared/rate-limit';
import { assembleAnalystContext } from '../server/worldmonitor/intelligence/v1/chat-analyst-context';
import { buildAnalystSystemPrompt } from '../server/worldmonitor/intelligence/v1/chat-analyst-prompt';
import { buildActionEvents } from '../server/worldmonitor/intelligence/v1/chat-analyst-actions';
import { callLlmReasoningStream } from '../server/_shared/llm';
import { sanitizeForPrompt } from '../server/_shared/llm-sanitize.js';
const MAX_QUERY_LEN = 500;
const MAX_HISTORY_MESSAGES = 20;
const MAX_MESSAGE_CHARS = 800;
const MAX_GEO_LEN = 2;
const VALID_DOMAINS = new Set(['all', 'geo', 'market', 'military', 'economic']);
interface ChatMessage {
role: 'user' | 'assistant';
content: string;
}
interface ChatAnalystRequestBody {
history?: unknown[];
query?: unknown;
domainFocus?: unknown;
geoContext?: unknown;
}
function json(body: unknown, status: number, cors: Record<string, string>): Response {
return new Response(JSON.stringify(body), {
status,
headers: { 'Content-Type': 'application/json', ...cors },
});
}
function prependSseEvents(events: Array<Record<string, unknown>>, stream: ReadableStream<Uint8Array>): ReadableStream<Uint8Array> {
const enc = new TextEncoder();
const prefixes = events.map((e) => enc.encode(`data: ${JSON.stringify(e)}\n\n`));
let innerReader: ReadableStreamDefaultReader<Uint8Array> | null = null;
return new ReadableStream<Uint8Array>({
async start(controller) {
for (const p of prefixes) controller.enqueue(p);
innerReader = stream.getReader();
while (true) {
const { done, value } = await innerReader.read();
if (done) { controller.close(); return; }
controller.enqueue(value);
}
},
cancel() { innerReader?.cancel(); },
});
}
export default async function handler(req: Request): Promise<Response> {
const corsHeaders = getCorsHeaders(req) as Record<string, string>;
if (req.method === 'OPTIONS') {
return new Response(null, {
status: 204,
headers: {
...corsHeaders,
'Access-Control-Allow-Methods': 'POST, OPTIONS',
'Access-Control-Allow-Headers': 'Content-Type, Authorization, X-WorldMonitor-Key',
},
});
}
if (req.method !== 'POST') {
return json({ error: 'Method not allowed' }, 405, corsHeaders);
}
const isPremium = await isCallerPremium(req);
if (!isPremium) {
return json({ error: 'Pro subscription required' }, 403, corsHeaders);
}
const rateLimitResponse = await checkRateLimit(req, corsHeaders);
if (rateLimitResponse) return rateLimitResponse;
let body: ChatAnalystRequestBody;
try {
body = (await req.json()) as ChatAnalystRequestBody;
} catch {
return json({ error: 'Invalid JSON body' }, 400, corsHeaders);
}
const rawQuery = typeof body.query === 'string' ? body.query.trim().slice(0, MAX_QUERY_LEN) : '';
if (!rawQuery) return json({ error: 'query is required' }, 400, corsHeaders);
const query = sanitizeForPrompt(rawQuery);
if (!query) return json({ error: 'query is required' }, 400, corsHeaders);
// Validate domainFocus against the fixed domain set to prevent prompt injection
const rawDomain = typeof body.domainFocus === 'string' ? body.domainFocus.trim() : '';
const domainFocus = VALID_DOMAINS.has(rawDomain) ? rawDomain : 'all';
const geoContext = typeof body.geoContext === 'string'
? body.geoContext.trim().toUpperCase().slice(0, MAX_GEO_LEN)
: undefined;
const rawHistory = Array.isArray(body.history) ? body.history : [];
const history: ChatMessage[] = rawHistory
.filter((m): m is ChatMessage => {
if (!m || typeof m !== 'object') return false;
const msg = m as Record<string, unknown>;
return (msg.role === 'user' || msg.role === 'assistant') && typeof msg.content === 'string';
})
.slice(-MAX_HISTORY_MESSAGES)
.map((m) => {
const sanitized = sanitizeForPrompt(m.content.slice(0, MAX_MESSAGE_CHARS)) ?? '';
return { role: m.role, content: sanitized };
})
.filter((m) => m.content.length > 0);
// Build retrieval query with current turn FIRST so its keywords fill the
// extraction cap before prior-turn terms. This ensures pivot words like
// "Germany" in "What about Germany?" are never crowded out by a long
// previous question. Prior turn backfills remaining slots for topic continuity.
const prevUserTurn = history.filter((m) => m.role === 'user').slice(-1)[0]?.content ?? '';
const retrievalQuery = prevUserTurn ? `${query} ${prevUserTurn}` : query;
const context = await assembleAnalystContext(geoContext, domainFocus, retrievalQuery);
const systemPrompt = buildAnalystSystemPrompt(context, domainFocus);
const messages = [
{ role: 'system', content: systemPrompt },
...history,
{ role: 'user', content: query },
];
const llmStream = callLlmReasoningStream({
messages,
maxTokens: 600,
temperature: 0.35,
timeoutMs: 25_000,
signal: req.signal,
});
// Always prepend a meta event so the client knows which sources are live
// and whether context is degraded — before the first token arrives.
// Optionally follows with an action event for visual/chart queries.
const stream = prependSseEvents(
[
{ meta: { sources: context.activeSources, degraded: context.degraded } },
...buildActionEvents(query).map((a) => ({ action: a })),
],
llmStream,
);
return new Response(stream, {
status: 200,
headers: {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache, no-store',
'X-Accel-Buffering': 'no',
...corsHeaders,
},
});
}