mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
* feat(analyst): topic-aware digest search for WM Analyst
The analyst was answering topic-specific questions from forecast probabilities and model knowledge instead of from actual ingested news articles.
Root cause: 200 RSS feed articles flow into news:digest:v1:full:en but only 8 top-scored stories make it to news:insights:v1. Topic-relevant articles were silently discarded at the clustering step.
Three gaps fixed:
- GDELT live headlines now append up to 3 user query keywords to surface topic-relevant live articles
- Full digest corpus is now keyword-searched per query; top 8 matched articles injected first in context as 'Matched News Articles'
- Fallback prompt no longer invites model speculation; replaced with explicit prohibition
Keyword extraction runs once in assembleAnalystContext and is shared by both GDELT and digest search (zero added latency).
TODO: fan out digest search to multi-language keys when available.
* fix(analyst): multi-turn retrieval continuity and word-boundary keyword matching
P1: prepend last user turn to retrieval query for follow-up topic continuity
P2: preserve 2-char acronyms (US/UK/EU); use tokenizeForMatch+findMatchingKeywords for word-boundary-safe scoring instead of String.includes
* fix(analyst): prioritize current-turn keywords in retrieval query
extractKeywords processes tokens left-to-right and caps at 8 distinct
terms. Building the retrieval string as prevTurn + currentQuery let a
long prior question fill the cap before the pivot term in the follow-up
(e.g. 'germany' in 'What about Germany?') was ever seen.
Swapped to currentQuery + prevTurn so current-turn keywords always win
the available slots; prior-turn terms backfill what remains for topic
continuity.
* fix(analyst): preserve 2-char acronyms case-insensitively in keyword extraction
Previous guard (/^[A-Z]{2}$/) only matched uppercase input, so common
lowercase queries like 'us sanctions', 'uk energy', 'ai chip exports'
still dropped the key term before retrieval.
Added KNOWN_2CHAR_ACRONYMS set (us, uk, eu, un, ai) checked against the
lowercased token, so the preservation path triggers regardless of how the
user typed the query.
* test(analyst): cover extractKeywords edge cases and retrieval priority ordering
Fills the coverage gap noted in review: existing tests only exercised
prompt text, leaving keyword extraction and retrieval assembly untested.
- Export extractKeywords() to make it unit-testable
- Fix emptyCtx/fullCtx fixtures to include relevantArticles field
- extractKeywords suite: stopword filtering, deduplication, 8-keyword cap,
known 2-char acronyms (us/uk/eu/un/ai) case-insensitive, non-acronym
2-char drop, empty-result path
- Retrieval priority suite: verifies current-turn pivot appears first in
keyword list when query+prevTurn is combined, prior-turn backfills
remaining slots, long prior turns cannot crowd out current-turn pivot
176 lines
6.4 KiB
TypeScript
176 lines
6.4 KiB
TypeScript
/**
|
|
* Streaming chat analyst edge function — Pro only.
|
|
*
|
|
* POST /api/chat-analyst
|
|
* Body: { history: {role,content}[], query: string, domainFocus?: string, geoContext?: string }
|
|
*
|
|
* Returns text/event-stream SSE:
|
|
* data: {"meta":{"sources":["Brief","Risk",...],"degraded":false}} — always first event
|
|
* data: {"action":{"type":"suggest-widget","label":"...","prefill":"..."}} — optional, visual queries only
|
|
* data: {"delta":"..."} — one per content token
|
|
* data: {"done":true} — terminal event
|
|
* data: {"error":"..."} — on auth/llm failure
|
|
*/
|
|
|
|
export const config = { runtime: 'edge', regions: ['iad1', 'lhr1', 'fra1', 'sfo1'] };
|
|
|
|
// @ts-expect-error — JS module, no declaration file
|
|
import { getCorsHeaders } from './_cors.js';
|
|
import { isCallerPremium } from '../server/_shared/premium-check';
|
|
import { checkRateLimit } from '../server/_shared/rate-limit';
|
|
import { assembleAnalystContext } from '../server/worldmonitor/intelligence/v1/chat-analyst-context';
|
|
import { buildAnalystSystemPrompt } from '../server/worldmonitor/intelligence/v1/chat-analyst-prompt';
|
|
import { buildActionEvents } from '../server/worldmonitor/intelligence/v1/chat-analyst-actions';
|
|
import { callLlmReasoningStream } from '../server/_shared/llm';
|
|
import { sanitizeForPrompt } from '../server/_shared/llm-sanitize.js';
|
|
|
|
const MAX_QUERY_LEN = 500;
|
|
const MAX_HISTORY_MESSAGES = 20;
|
|
const MAX_MESSAGE_CHARS = 800;
|
|
const MAX_GEO_LEN = 2;
|
|
const VALID_DOMAINS = new Set(['all', 'geo', 'market', 'military', 'economic']);
|
|
|
|
interface ChatMessage {
|
|
role: 'user' | 'assistant';
|
|
content: string;
|
|
}
|
|
|
|
interface ChatAnalystRequestBody {
|
|
history?: unknown[];
|
|
query?: unknown;
|
|
domainFocus?: unknown;
|
|
geoContext?: unknown;
|
|
}
|
|
|
|
function json(body: unknown, status: number, cors: Record<string, string>): Response {
|
|
return new Response(JSON.stringify(body), {
|
|
status,
|
|
headers: { 'Content-Type': 'application/json', ...cors },
|
|
});
|
|
}
|
|
|
|
function prependSseEvents(events: Array<Record<string, unknown>>, stream: ReadableStream<Uint8Array>): ReadableStream<Uint8Array> {
|
|
const enc = new TextEncoder();
|
|
const prefixes = events.map((e) => enc.encode(`data: ${JSON.stringify(e)}\n\n`));
|
|
let innerReader: ReadableStreamDefaultReader<Uint8Array> | null = null;
|
|
return new ReadableStream<Uint8Array>({
|
|
async start(controller) {
|
|
for (const p of prefixes) controller.enqueue(p);
|
|
innerReader = stream.getReader();
|
|
while (true) {
|
|
const { done, value } = await innerReader.read();
|
|
if (done) { controller.close(); return; }
|
|
controller.enqueue(value);
|
|
}
|
|
},
|
|
cancel() { innerReader?.cancel(); },
|
|
});
|
|
}
|
|
|
|
export default async function handler(req: Request): Promise<Response> {
|
|
const corsHeaders = getCorsHeaders(req) as Record<string, string>;
|
|
|
|
if (req.method === 'OPTIONS') {
|
|
return new Response(null, {
|
|
status: 204,
|
|
headers: {
|
|
...corsHeaders,
|
|
'Access-Control-Allow-Methods': 'POST, OPTIONS',
|
|
'Access-Control-Allow-Headers': 'Content-Type, Authorization, X-WorldMonitor-Key',
|
|
},
|
|
});
|
|
}
|
|
|
|
if (req.method !== 'POST') {
|
|
return json({ error: 'Method not allowed' }, 405, corsHeaders);
|
|
}
|
|
|
|
const isPremium = await isCallerPremium(req);
|
|
if (!isPremium) {
|
|
return json({ error: 'Pro subscription required' }, 403, corsHeaders);
|
|
}
|
|
|
|
const rateLimitResponse = await checkRateLimit(req, corsHeaders);
|
|
if (rateLimitResponse) return rateLimitResponse;
|
|
|
|
let body: ChatAnalystRequestBody;
|
|
try {
|
|
body = (await req.json()) as ChatAnalystRequestBody;
|
|
} catch {
|
|
return json({ error: 'Invalid JSON body' }, 400, corsHeaders);
|
|
}
|
|
|
|
const rawQuery = typeof body.query === 'string' ? body.query.trim().slice(0, MAX_QUERY_LEN) : '';
|
|
if (!rawQuery) return json({ error: 'query is required' }, 400, corsHeaders);
|
|
|
|
const query = sanitizeForPrompt(rawQuery);
|
|
if (!query) return json({ error: 'query is required' }, 400, corsHeaders);
|
|
|
|
// Validate domainFocus against the fixed domain set to prevent prompt injection
|
|
const rawDomain = typeof body.domainFocus === 'string' ? body.domainFocus.trim() : '';
|
|
const domainFocus = VALID_DOMAINS.has(rawDomain) ? rawDomain : 'all';
|
|
|
|
const geoContext = typeof body.geoContext === 'string'
|
|
? body.geoContext.trim().toUpperCase().slice(0, MAX_GEO_LEN)
|
|
: undefined;
|
|
|
|
const rawHistory = Array.isArray(body.history) ? body.history : [];
|
|
const history: ChatMessage[] = rawHistory
|
|
.filter((m): m is ChatMessage => {
|
|
if (!m || typeof m !== 'object') return false;
|
|
const msg = m as Record<string, unknown>;
|
|
return (msg.role === 'user' || msg.role === 'assistant') && typeof msg.content === 'string';
|
|
})
|
|
.slice(-MAX_HISTORY_MESSAGES)
|
|
.map((m) => {
|
|
const sanitized = sanitizeForPrompt(m.content.slice(0, MAX_MESSAGE_CHARS)) ?? '';
|
|
return { role: m.role, content: sanitized };
|
|
})
|
|
.filter((m) => m.content.length > 0);
|
|
|
|
// Build retrieval query with current turn FIRST so its keywords fill the
|
|
// extraction cap before prior-turn terms. This ensures pivot words like
|
|
// "Germany" in "What about Germany?" are never crowded out by a long
|
|
// previous question. Prior turn backfills remaining slots for topic continuity.
|
|
const prevUserTurn = history.filter((m) => m.role === 'user').slice(-1)[0]?.content ?? '';
|
|
const retrievalQuery = prevUserTurn ? `${query} ${prevUserTurn}` : query;
|
|
|
|
const context = await assembleAnalystContext(geoContext, domainFocus, retrievalQuery);
|
|
const systemPrompt = buildAnalystSystemPrompt(context, domainFocus);
|
|
|
|
const messages = [
|
|
{ role: 'system', content: systemPrompt },
|
|
...history,
|
|
{ role: 'user', content: query },
|
|
];
|
|
|
|
const llmStream = callLlmReasoningStream({
|
|
messages,
|
|
maxTokens: 600,
|
|
temperature: 0.35,
|
|
timeoutMs: 25_000,
|
|
signal: req.signal,
|
|
});
|
|
|
|
// Always prepend a meta event so the client knows which sources are live
|
|
// and whether context is degraded — before the first token arrives.
|
|
// Optionally follows with an action event for visual/chart queries.
|
|
const stream = prependSseEvents(
|
|
[
|
|
{ meta: { sources: context.activeSources, degraded: context.degraded } },
|
|
...buildActionEvents(query).map((a) => ({ action: a })),
|
|
],
|
|
llmStream,
|
|
);
|
|
|
|
return new Response(stream, {
|
|
status: 200,
|
|
headers: {
|
|
'Content-Type': 'text/event-stream',
|
|
'Cache-Control': 'no-cache, no-store',
|
|
'X-Accel-Buffering': 'no',
|
|
...corsHeaders,
|
|
},
|
|
});
|
|
}
|