mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
* fix(intelligence): include framework/systemAppend hash in cache keys (todos 041, 045, 051) * fix(intelligence): gate framework/systemAppend on server-side PRO check (todo 042) * fix(skills): exact hostname allowlist + redirect:manual to prevent SSRF (todos 043, 054) * fix(intelligence): sanitize systemAppend against prompt injection before LLM (todo 044) * fix(intelligence): use framework field in DeductionPanel, fix InsightsPanel double increment (todos 046, 047) * fix(intelligence): settings export, hot-path cache, country-brief debounce (todos 048, 049, 050) * fix(intelligence): i18n, FrameworkSelector note, stripThinkingTags dedup, UUID IDs (todos 052, 055, 056, 057) - i18n Analysis Frameworks settings section (en + fr locales, replace all hardcoded English strings with t() calls) - FrameworkSelector: replace panelId==='insights' hardcode with note? option; both InsightsPanel and DailyMarketBriefPanel pass note - stripThinkingTags: remove inline duplicate in summarize-article.ts, import from _shared/llm; add Strip unterminated comment so tests can locate the section - Replace Date.now() IDs for imported frameworks with crypto.randomUUID() - Drop 'not supported in phase 1' phrasing to 'not supported' - test: fix summarize-reasoning Fix 2 suite to read from llm.ts - test: add premium-check-stub and wire into redis-caching country intel brief importPatchedTsModule so test can resolve the new import * fix(security): address P1 review findings from PR #2386 - premium-check: require `required: true` from validateApiKey so trusted browser origins (worldmonitor.app, Vercel previews, localhost) are not treated as PRO callers; fixes free-user bypass of framework/systemAppend gate - llm: replace weak sanitizeSystemAppend with sanitizeForPrompt from llm-sanitize.js; all callLlm callers now get model-delimiter and control-char stripping, not just phrase blocklist - get-country-intel-brief: apply sanitizeForPrompt to contextSnapshot before injecting into user prompt; fixes unsanitized query-param injection Closes todos 060, 061, 062 (P1 — blocked merge of #2386). * chore(todos): mark P1 todos 060-062 complete * fix(agentskills): address Greptile P2 review comments - hoist ALLOWED_AGENTSKILLS_HOSTS Set to module scope (was reallocated per-request) - add res.type === 'opaqueredirect' check alongside the 3xx guard; Edge Runtime returns status=0 for opaque redirects so the status range check alone is dead code
261 lines
10 KiB
JavaScript
261 lines
10 KiB
JavaScript
/**
|
|
* Tests for Ollama thinking token / reasoning leak fixes.
|
|
*
|
|
* Verifies:
|
|
* - message.reasoning fallback is removed (Fix 1)
|
|
* - Multiple thinking tag formats are stripped (Fix 2)
|
|
* - Plain-text reasoning preambles are detected (Fix 3)
|
|
* - Mode guard only applies to brief/analysis (Fix 3)
|
|
* - Cache version bumped to v5 (Fix 4)
|
|
*/
|
|
|
|
import { describe, it } from 'node:test';
|
|
import assert from 'node:assert/strict';
|
|
import { readFileSync } from 'node:fs';
|
|
import { dirname, resolve } from 'node:path';
|
|
import { fileURLToPath } from 'node:url';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const root = resolve(__dirname, '..');
|
|
|
|
const readSrc = (relPath) => readFileSync(resolve(root, relPath), 'utf-8');
|
|
|
|
// ========================================================================
|
|
// Fix 1: message.reasoning fallback removed
|
|
// ========================================================================
|
|
|
|
describe('Fix 1: message.reasoning fallback', () => {
|
|
const src = readSrc('server/worldmonitor/news/v1/summarize-article.ts');
|
|
|
|
it('does NOT fall back to message.reasoning', () => {
|
|
assert.doesNotMatch(src, /message\?\.reasoning/,
|
|
'Should not reference message.reasoning — reasoning tokens must never be used as summary');
|
|
});
|
|
|
|
it('only uses message.content', () => {
|
|
assert.match(src, /message\?\.content/,
|
|
'Should extract content from message.content');
|
|
});
|
|
});
|
|
|
|
// ========================================================================
|
|
// Fix 2: Extended tag stripping
|
|
// ========================================================================
|
|
|
|
describe('Fix 2: thinking tag stripping formats', () => {
|
|
// stripThinkingTags was consolidated into server/_shared/llm.ts (todo 056 dedup)
|
|
const src = readSrc('server/_shared/llm.ts');
|
|
|
|
it('strips <think> tags', () => {
|
|
assert.match(src, /<think>/i, 'Should handle <think> tags');
|
|
});
|
|
|
|
it('strips <|thinking|> tags', () => {
|
|
assert.ok(src.includes('\\|thinking\\|'), 'Should handle <|thinking|> tags');
|
|
});
|
|
|
|
it('strips <reasoning> tags', () => {
|
|
assert.match(src, /<reasoning>/, 'Should handle <reasoning> tags');
|
|
});
|
|
|
|
it('strips <reflection> tags', () => {
|
|
assert.match(src, /<reflection>/, 'Should handle <reflection> tags');
|
|
});
|
|
|
|
it('handles unterminated <think> blocks', () => {
|
|
// Second .replace block should have <think> without closing tag pattern
|
|
const lines = src.split('\n');
|
|
const unterminatedSection = lines.findIndex(l => l.includes('Strip unterminated'));
|
|
assert.ok(unterminatedSection > -1, 'Should have unterminated block stripping section');
|
|
const sectionSlice = lines.slice(unterminatedSection, unterminatedSection + 8).join('\n');
|
|
assert.ok(sectionSlice.includes('<think>'), 'Should strip unterminated <think>');
|
|
});
|
|
|
|
it('handles unterminated <|thinking|> blocks', () => {
|
|
const lines = src.split('\n');
|
|
const unterminatedSection = lines.findIndex(l => l.includes('Strip unterminated'));
|
|
const sectionSlice = lines.slice(unterminatedSection, unterminatedSection + 8).join('\n');
|
|
assert.ok(sectionSlice.includes('\\|thinking\\|'), 'Should strip unterminated <|thinking|>');
|
|
});
|
|
|
|
it('handles unterminated <reasoning> blocks', () => {
|
|
const lines = src.split('\n');
|
|
const unterminatedSection = lines.findIndex(l => l.includes('Strip unterminated'));
|
|
const sectionSlice = lines.slice(unterminatedSection, unterminatedSection + 8).join('\n');
|
|
assert.ok(sectionSlice.includes('<reasoning>'), 'Should strip unterminated <reasoning>');
|
|
});
|
|
|
|
it('handles unterminated <reflection> blocks', () => {
|
|
const lines = src.split('\n');
|
|
const unterminatedSection = lines.findIndex(l => l.includes('Strip unterminated'));
|
|
const sectionSlice = lines.slice(unterminatedSection, unterminatedSection + 8).join('\n');
|
|
assert.ok(sectionSlice.includes('<reflection>'), 'Should strip unterminated <reflection>');
|
|
});
|
|
|
|
it('strips <|begin_of_thought|> tags (terminated)', () => {
|
|
assert.ok(src.includes('begin_of_thought'), 'Should handle <|begin_of_thought|> tags');
|
|
});
|
|
|
|
it('strips <|begin_of_thought|> tags (unterminated)', () => {
|
|
const lines = src.split('\n');
|
|
const unterminatedSection = lines.findIndex(l => l.includes('Strip unterminated'));
|
|
const sectionSlice = lines.slice(unterminatedSection, unterminatedSection + 10).join('\n');
|
|
assert.ok(sectionSlice.includes('begin_of_thought'), 'Should strip unterminated <|begin_of_thought|>');
|
|
});
|
|
});
|
|
|
|
// ========================================================================
|
|
// Fix 3: Reasoning preamble detection
|
|
// ========================================================================
|
|
|
|
describe('Fix 3: hasReasoningPreamble', () => {
|
|
const src = readSrc('server/worldmonitor/news/v1/summarize-article.ts');
|
|
|
|
// Extract production regexes from source to avoid drift between test and implementation.
|
|
// Pattern: `export const TASK_NARRATION = /.../.../;`
|
|
function extractRegex(source, name) {
|
|
const match = source.match(new RegExp(`export const ${name}\\s*=\\s*(/[^;]+/[gimsuy]*)`));
|
|
if (!match) throw new Error(`Could not extract ${name} from source`);
|
|
const [, full] = match;
|
|
const lastSlash = full.lastIndexOf('/');
|
|
const pattern = full.slice(1, lastSlash);
|
|
const flags = full.slice(lastSlash + 1);
|
|
return new RegExp(pattern, flags);
|
|
}
|
|
|
|
const TASK_NARRATION = extractRegex(src, 'TASK_NARRATION');
|
|
const PROMPT_ECHO = extractRegex(src, 'PROMPT_ECHO');
|
|
|
|
function hasReasoningPreamble(text) {
|
|
const trimmed = text.trim();
|
|
return TASK_NARRATION.test(trimmed) || PROMPT_ECHO.test(trimmed);
|
|
}
|
|
|
|
// Task narration patterns
|
|
it('detects "We need to summarize..."', () => {
|
|
assert.ok(hasReasoningPreamble('We need to summarize the top story.'));
|
|
});
|
|
|
|
it('detects "Let me analyze..."', () => {
|
|
assert.ok(hasReasoningPreamble('Let me analyze these headlines.'));
|
|
});
|
|
|
|
it('detects "I should lead with..."', () => {
|
|
assert.ok(hasReasoningPreamble('I should lead with what happened.'));
|
|
});
|
|
|
|
it('detects "So we need to..."', () => {
|
|
assert.ok(hasReasoningPreamble('So we need to lead with WHAT happened and WHERE.'));
|
|
});
|
|
|
|
it('detects "Okay, I\'ll summarize..."', () => {
|
|
assert.ok(hasReasoningPreamble("Okay, I'll summarize the top story."));
|
|
});
|
|
|
|
it('detects "Okay, let me..."', () => {
|
|
assert.ok(hasReasoningPreamble('Okay, let me analyze these headlines.'));
|
|
});
|
|
|
|
it('detects "Sure, here is..."', () => {
|
|
assert.ok(hasReasoningPreamble('Sure, here is the summary.'));
|
|
});
|
|
|
|
it('passes "Okay" without reasoning continuation', () => {
|
|
assert.ok(!hasReasoningPreamble('Okay results from the summit are mixed.'));
|
|
});
|
|
|
|
// Prompt echo patterns
|
|
it('detects "Summarize the top story..."', () => {
|
|
assert.ok(hasReasoningPreamble('Summarize the top story: the headline says...'));
|
|
});
|
|
|
|
it('detects "The top story is likely..."', () => {
|
|
assert.ok(hasReasoningPreamble('The top story is likely the first headline about Russia.'));
|
|
});
|
|
|
|
it('detects "Rules:" echo', () => {
|
|
assert.ok(hasReasoningPreamble('Rules: lead with WHAT happened'));
|
|
});
|
|
|
|
// Valid summaries that must NOT be detected
|
|
it('passes clean geopolitical summary', () => {
|
|
assert.ok(!hasReasoningPreamble("Iran's nuclear program faces increased international scrutiny."));
|
|
});
|
|
|
|
it('passes clean event summary', () => {
|
|
assert.ok(!hasReasoningPreamble('The US Treasury announced new sanctions against Russian entities.'));
|
|
});
|
|
|
|
it('passes clean protest summary', () => {
|
|
assert.ok(!hasReasoningPreamble('Protests erupted in multiple cities across France.'));
|
|
});
|
|
|
|
it('passes summary starting with country name', () => {
|
|
assert.ok(!hasReasoningPreamble('Russia has escalated its nuclear rhetoric.'));
|
|
});
|
|
|
|
// New patterns: first/to summarize/my task/step
|
|
it('detects "First, I need to..."', () => {
|
|
assert.ok(hasReasoningPreamble('First, I need to identify the most important headline.'));
|
|
});
|
|
|
|
it('detects "First, let me..."', () => {
|
|
assert.ok(hasReasoningPreamble('First, let me analyze these headlines.'));
|
|
});
|
|
|
|
it('detects "To summarize the headlines..."', () => {
|
|
assert.ok(hasReasoningPreamble('To summarize the headlines, we look at the key events.'));
|
|
});
|
|
|
|
it('detects "My task is to..."', () => {
|
|
assert.ok(hasReasoningPreamble('My task is to summarize the top story.'));
|
|
});
|
|
|
|
it('detects "Step 1: analyze..."', () => {
|
|
assert.ok(hasReasoningPreamble('Step 1: analyze the most important headline.'));
|
|
});
|
|
|
|
// Negative cases — legitimate summaries that start similarly
|
|
it('passes "First responders arrived..."', () => {
|
|
assert.ok(!hasReasoningPreamble('First responders arrived at the scene of the earthquake.'));
|
|
});
|
|
|
|
it('passes "To summarize, the summit concluded..."', () => {
|
|
assert.ok(!hasReasoningPreamble('To summarize, the summit concluded with a joint statement.'));
|
|
});
|
|
|
|
it('passes "My task force deployed..."', () => {
|
|
assert.ok(!hasReasoningPreamble('My task force deployed to the border region.'));
|
|
});
|
|
|
|
// Mode guard
|
|
it('is gated to brief and analysis modes in source', () => {
|
|
assert.match(src, /\['brief',\s*'analysis'\]\.includes\(mode\)/,
|
|
'Reasoning preamble check must be gated to brief/analysis modes');
|
|
});
|
|
|
|
it('does not apply to translate mode', () => {
|
|
assert.doesNotMatch(src, /mode\s*!==\s*'translate'.*hasReasoningPreamble/,
|
|
'Should NOT use negation-based mode guard');
|
|
});
|
|
|
|
// Min-length gate
|
|
it('has mode-scoped min-length gate for brief/analysis', () => {
|
|
assert.match(src, /\['brief',\s*'analysis'\]\.includes\(mode\)\s*&&\s*rawContent\.length\s*<\s*20/,
|
|
'Should reject outputs shorter than 20 chars in brief/analysis modes');
|
|
});
|
|
});
|
|
|
|
// ========================================================================
|
|
// Fix 4: Cache version bump
|
|
// ========================================================================
|
|
|
|
describe('Fix 4: cache version bump', () => {
|
|
const src = readSrc('src/utils/summary-cache-key.ts');
|
|
|
|
it('CACHE_VERSION is v5', () => {
|
|
assert.match(src, /CACHE_VERSION\s*=\s*'v5'/,
|
|
'CACHE_VERSION must be v5 to invalidate entries from old conflating prompts');
|
|
});
|
|
});
|