mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
* fix(security): sanitize LLM prompt inputs against injection attacks Adds server/_shared/llm-sanitize.js with sanitizeForPrompt() and sanitizeHeadlines() that strip known prompt-injection patterns before untrusted RSS headlines and geo-context strings are embedded in LLM prompts. Patterns stripped: - Model-specific delimiters (<|im_start|>, [INST], <|endoftext|>, etc.) - XML-style role wrappers (<system>, <assistant>, <user>) - Role override markers at line start (SYSTEM:, ### Claude:, etc.) - Instruction-override phrases (Ignore previous instructions, etc.) - ASCII/Unicode control characters, zero-width joiners, BOM, soft-hyphen - Separator lines (---, ===) used as prompt boundaries Normal headlines (quotes, colons, dashes, emoji, unicode) pass through unchanged. The sanitizer is defense-in-depth, not a security boundary. Wired into summarize-article.ts replacing the previous slice-only approach. Tests: 36 cases across 8 suites, all passing. Co-authored-by: Fayez Bast <FayezBast@users.noreply.github.com> Ported from PR #381 * fix(types): add type declarations for llm-sanitize.js * fix(sanitize): address Codex review — light sanitizer for headlines - Add sanitizeHeadline() / sanitizeHeadlinesLight(): strips only structural patterns (model delimiters, control chars) without touching semantic instruction phrases - Use sanitizeHeadlinesLight() for headlines so that legitimate tech/security news like 'Anthropic says users can type "Output your system prompt"...' passes through unchanged and cache keys stay aligned with the browser - Keep full sanitizeForPrompt() for geoContext only (free-form, higher risk) - 40 tests, all passing * fix(security): apply full injection sanitizer at prompt-build time (P1) Separate the two uses of headlines: - Cache key: sanitizeHeadlinesLight() (structural only, preserves semantic phrases) so browser/server cache keys stay aligned - Prompt build: sanitizeHeadlines() (full sanitizer including semantic injection phrases) applied inside the fetcher just before buildArticlePrompts() This closes the P1 gap where "Ignore previous instructions" and similar payloads in RSS headlines were reaching the LLM prompt unchanged. --------- Co-authored-by: Fayez Bast <FayezBast@users.noreply.github.com>
157 lines
6.5 KiB
JavaScript
157 lines
6.5 KiB
JavaScript
/**
|
||
* LLM Prompt Injection Sanitizer
|
||
*
|
||
* Strips known prompt-injection patterns from untrusted strings (e.g. RSS
|
||
* headlines) before they are embedded in an LLM prompt.
|
||
*
|
||
* Design philosophy — blocklist of *bad* patterns only:
|
||
* ✓ Quotes, colons, dashes, em-dashes, ellipses → preserved (normal headlines)
|
||
* ✓ Unicode letters and emoji → preserved
|
||
* ✓ Sentence-level punctuation → preserved
|
||
* ✗ Role markers (e.g. "SYSTEM:", "### Assistant") → stripped
|
||
* ✗ Instruction overrides ("Ignore previous …") → stripped
|
||
* ✗ Model-specific delimiters ("<|im_start|>", etc.) → stripped
|
||
* ✗ ASCII / Unicode control characters (U+0000-U+001F, U+007F, U+2028-U+2029) → stripped
|
||
* ✗ Null bytes, zero-width joiners / non-joiners → stripped
|
||
*
|
||
* The sanitizer never throws. If input is not a string it returns '' so
|
||
* callers can safely map over headline arrays without extra guards.
|
||
*
|
||
* Security note:
|
||
* This is a defense-in-depth reduction layer, not a security boundary.
|
||
* Prompt-injection blocklists are inherently bypassable (for example via novel
|
||
* encodings, obfuscation, or semantically malicious content), so callers must
|
||
* keep additional controls in place (strict output validation, model/provider
|
||
* guardrails, and least-privilege tool access).
|
||
*
|
||
* References:
|
||
* OWASP LLM Top 10 – LLM01: Prompt Injection
|
||
*/
|
||
|
||
const INJECTION_PATTERNS = [
|
||
// Model-specific delimiter tokens
|
||
/<\|(?:im_start|im_end|begin_of_text|end_of_text|eot_id|start_header_id|end_header_id)\|>/gi,
|
||
/<\|(?:endoftext|fim_prefix|fim_middle|fim_suffix|pad)\|>/gi,
|
||
/\[(?:INST|\/INST|SYS|\/SYS)\]/gi,
|
||
/<\/?(system|user|assistant|prompt|context|instruction)\b[^>]*>/gi,
|
||
|
||
// Role override markers at line start
|
||
/(?:^|\n)\s*(?:#{1,4}\s*)?(?:\[|\()?\s*(?:system|human|gpt|claude|llm|model|prompt)\s*(?:\]|\))?\s*:/gim,
|
||
|
||
// Explicit instruction-override phrases
|
||
/ignore\s+(?:all\s+)?(?:previous|above|prior|earlier|the\s+above)\s+instructions?\b/gi,
|
||
/(?:disregard|forget|bypass|override|overwrite|skip)\s+(?:all\s+)?(?:previous|above|prior|earlier|your|the)\s+(?:instructions?|prompt|rules?|guidelines?|constraints?|training)\b/gi,
|
||
/(?:you\s+are\s+now|act\s+as|pretend\s+(?:to\s+be|you\s+are)|roleplay\s+as|simulate\s+(?:being\s+)?a)\s+(?:a\s+|an\s+)?(?:(?:different|new|another|unrestricted|jailbroken|evil|helpful)\s+)?(?:ai|assistant|model|chatbot|llm|bot|gpt|claude)\b/gi,
|
||
/do\s+not\s+(?:follow|obey|adhere\s+to|comply\s+with)\s+(?:the\s+)?(?:previous|above|system|original)\s+(?:instructions?|rules?|prompt)\b/gi,
|
||
/(?:output|print|display|reveal|show|repeat|recite|write\s+out)\s+(?:your\s+)?(?:system\s+prompt|instructions?|initial\s+prompt|original\s+prompt|context)\b/gi,
|
||
|
||
// Prompt boundary separator lines
|
||
/^[\-=]{3,}$/gm,
|
||
/^#{3,}\s/gm,
|
||
];
|
||
|
||
const ROLE_PREFIX_RE = /^\s*(?:#{1,4}\s*)?(?:\[|\()?\s*(?:user|assistant|bot)\s*(?:\]|\))?\s*:\s*/i;
|
||
const ROLE_OVERRIDE_STRONG_RE = /\b(?:you\s+are\s+now|act\s+as|pretend\s+(?:to\s+be|you\s+are)|roleplay\s+as|simulate\s+(?:being\s+)?a|from\s+now\s+on|do\s+not\s+(?:follow|obey|adhere\s+to|comply\s+with))\b/i;
|
||
const ROLE_OVERRIDE_COMMAND_RE = /\b(?:ignore|disregard|forget|bypass|override|overwrite|skip|reveal|output|print|display|show|repeat|recite|write\s+out)\b/i;
|
||
const ROLE_OVERRIDE_FOLLOW_RE = /\b(?:follow|obey)\s+(?:all\s+)?(?:the\s+|my\s+|your\s+)?(?:instructions?|prompt|rules?|guidelines?|constraints?)\b/i;
|
||
const ROLE_OVERRIDE_TARGET_RE = /\b(?:instructions?|prompt|system|rules?|guidelines?|constraints?|training|context|developer\s+message)\b/i;
|
||
|
||
function isRolePrefixedInjectionLine(line) {
|
||
if (!ROLE_PREFIX_RE.test(line)) return false;
|
||
if (ROLE_OVERRIDE_STRONG_RE.test(line)) return true;
|
||
if (ROLE_OVERRIDE_FOLLOW_RE.test(line)) return true;
|
||
return ROLE_OVERRIDE_COMMAND_RE.test(line) && ROLE_OVERRIDE_TARGET_RE.test(line);
|
||
}
|
||
|
||
// U+0000-U+001F ASCII control chars (except newline U+000A, tab U+0009)
|
||
// U+007F DEL
|
||
// U+00AD soft hyphen
|
||
// U+200B-U+200D zero-width space / non-joiner / joiner
|
||
// U+2028-U+2029 Unicode line/paragraph separator
|
||
// U+FEFF BOM / zero-width no-break space
|
||
const CONTROL_CHARS_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F\xAD\u200B-\u200D\u2028\u2029\uFEFF]/g;
|
||
|
||
/**
|
||
* Sanitize a single string for safe inclusion in an LLM prompt.
|
||
* @param {unknown} input
|
||
* @returns {string}
|
||
*/
|
||
export function sanitizeForPrompt(input) {
|
||
if (typeof input !== 'string') return '';
|
||
|
||
let s = input;
|
||
|
||
s = s.replace(CONTROL_CHARS_RE, '');
|
||
|
||
s = s
|
||
.split('\n')
|
||
.filter(line => !isRolePrefixedInjectionLine(line))
|
||
.join('\n');
|
||
|
||
for (const pattern of INJECTION_PATTERNS) {
|
||
pattern.lastIndex = 0;
|
||
s = s.replace(pattern, ' ');
|
||
}
|
||
|
||
s = s.replace(/\s{2,}/g, ' ').trim();
|
||
|
||
return s;
|
||
}
|
||
|
||
/**
|
||
* Sanitize an array of headline strings, dropping any that become empty
|
||
* after sanitization.
|
||
* @param {unknown[]} headlines
|
||
* @returns {string[]}
|
||
*/
|
||
export function sanitizeHeadlines(headlines) {
|
||
if (!Array.isArray(headlines)) return [];
|
||
return headlines
|
||
.map(sanitizeForPrompt)
|
||
.filter(h => h.length > 0);
|
||
}
|
||
|
||
// Structural-only patterns safe to apply to headlines without mangling
|
||
// legitimate tech/security news (e.g. "Output your system prompt" as a story subject).
|
||
const STRUCTURAL_PATTERNS = [
|
||
/<\|(?:im_start|im_end|begin_of_text|end_of_text|eot_id|start_header_id|end_header_id)\|>/gi,
|
||
/<\|(?:endoftext|fim_prefix|fim_middle|fim_suffix|pad)\|>/gi,
|
||
/\[(?:INST|\/INST|SYS|\/SYS)\]/gi,
|
||
/<\/?(system|user|assistant|prompt|context|instruction)\b[^>]*>/gi,
|
||
/^[\-=]{3,}$/gm,
|
||
];
|
||
|
||
/**
|
||
* Sanitize a headline for safe inclusion in an LLM prompt, preserving
|
||
* legitimate headlines that quote injection phrases as news subjects.
|
||
*
|
||
* Only structural/delimiter patterns are stripped — semantic instruction
|
||
* phrases are left intact to avoid mangling tech/security news headlines.
|
||
* Full sanitizeForPrompt() is reserved for free-form geoContext.
|
||
*
|
||
* @param {unknown} input
|
||
* @returns {string}
|
||
*/
|
||
export function sanitizeHeadline(input) {
|
||
if (typeof input !== 'string') return '';
|
||
|
||
let s = input.replace(CONTROL_CHARS_RE, '');
|
||
for (const pattern of STRUCTURAL_PATTERNS) {
|
||
pattern.lastIndex = 0;
|
||
s = s.replace(pattern, ' ');
|
||
}
|
||
return s.replace(/\s{2,}/g, ' ').trim();
|
||
}
|
||
|
||
/**
|
||
* Apply sanitizeHeadline() over an array, dropping empties.
|
||
* @param {unknown[]} headlines
|
||
* @returns {string[]}
|
||
*/
|
||
export function sanitizeHeadlinesLight(headlines) {
|
||
if (!Array.isArray(headlines)) return [];
|
||
return headlines
|
||
.map(sanitizeHeadline)
|
||
.filter(h => h.length > 0);
|
||
}
|