Files
worldmonitor/api/classify-batch.js
Elie Habib a9224254a5 fix: security hardening — CORS, auth bypass, origin validation & bump v2.2.7
- Tighten CORS regex to block worldmonitorEVIL.vercel.app spoofing
- Move sidecar /api/local-env-update behind token auth + add key allowlist
- Add postMessage origin/source validation in LiveNewsPanel
- Replace postMessage wildcard '*' targetOrigin with specific origin
- Add isDisallowedOrigin() check to 25 API endpoints missing it
- Migrate gdelt-geo & EIA from custom CORS to shared _cors.js
- Add CORS to firms-fires, stock-index, youtube/live endpoints
- Tighten youtube/embed.js ALLOWED_ORIGINS regex
- Remove 'unsafe-inline' from CSP script-src
- Add iframe sandbox attribute to YouTube embed
- Validate meta-tags URL query params with regex allowlist
2026-02-15 20:33:20 +04:00

199 lines
6.7 KiB
JavaScript

import { getCachedJson, setCachedJson, mget, hashString } from './_upstash-cache.js';
import { getCorsHeaders, isDisallowedOrigin } from './_cors.js';
export const config = {
runtime: 'edge',
};
const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions';
const MODEL = 'llama-3.1-8b-instant';
const CACHE_TTL_SECONDS = 86400;
const CACHE_VERSION = 'v1';
const MAX_BATCH_SIZE = 20;
const VALID_LEVELS = ['critical', 'high', 'medium', 'low', 'info'];
const VALID_CATEGORIES = [
'conflict', 'protest', 'disaster', 'diplomatic', 'economic',
'terrorism', 'cyber', 'health', 'environmental', 'military',
'crime', 'infrastructure', 'tech', 'general',
];
export default async function handler(request) {
const corsHeaders = getCorsHeaders(request, 'POST, OPTIONS');
if (request.method === 'OPTIONS') {
return new Response(null, { status: 204, headers: corsHeaders });
}
if (request.method !== 'POST') {
return new Response(JSON.stringify({ error: 'Method not allowed' }), {
status: 405,
headers: { ...corsHeaders, 'Content-Type': 'application/json' },
});
}
if (isDisallowedOrigin(request)) {
return new Response(JSON.stringify({ error: 'Origin not allowed' }), {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
const apiKey = process.env.GROQ_API_KEY;
if (!apiKey) {
return new Response(JSON.stringify({ results: [], fallback: true, skipped: true, reason: 'GROQ_API_KEY not configured' }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
}
const contentLength = parseInt(request.headers.get('content-length') || '0', 10);
if (contentLength > 51200) {
return new Response(JSON.stringify({ error: 'Payload too large' }), {
status: 413,
headers: { ...corsHeaders, 'Content-Type': 'application/json' },
});
}
let body;
try {
body = await request.json();
} catch {
return new Response(JSON.stringify({ error: 'Invalid JSON body' }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
const { titles, variant = 'full' } = body;
if (!Array.isArray(titles) || titles.length === 0) {
return new Response(JSON.stringify({ error: 'titles array required' }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
const batch = titles.slice(0, MAX_BATCH_SIZE);
const results = new Array(batch.length).fill(null);
const uncachedIndices = [];
const cacheKeys = batch.map(
(t) => `classify:${CACHE_VERSION}:${hashString(t.toLowerCase() + ':' + variant)}`
);
const cached = await mget(...cacheKeys);
for (let i = 0; i < cached.length; i++) {
const val = cached[i];
if (val && typeof val === 'object' && val.level) {
results[i] = { level: val.level, category: val.category, cached: true };
} else {
uncachedIndices.push(i);
}
}
if (uncachedIndices.length === 0) {
return new Response(JSON.stringify({ results }), {
status: 200,
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=600' },
});
}
const uncachedTitles = uncachedIndices.map((i) => batch[i]);
const isTech = variant === 'tech';
const numberedList = uncachedTitles.map((t, i) => `${i + 1}. ${t}`).join('\n');
const systemPrompt = `You classify news headlines into threat level and category. Return ONLY a valid JSON array, no other text.
Levels: critical, high, medium, low, info
Categories: conflict, protest, disaster, diplomatic, economic, terrorism, cyber, health, environmental, military, crime, infrastructure, tech, general
${isTech ? 'Focus: technology, startups, AI, cybersecurity. Most tech news is "low" or "info" unless it involves outages, breaches, or major disruptions.' : 'Focus: geopolitical events, conflicts, disasters, diplomacy. Classify by real-world severity and impact.'}
Return a JSON array with one object per headline in order: [{"level":"...","category":"..."},...]`;
try {
const response = await fetch(GROQ_API_URL, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: MODEL,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: numberedList },
],
temperature: 0,
max_tokens: uncachedTitles.length * 60,
}),
});
if (!response.ok) {
console.error('[ClassifyBatch] Groq error:', response.status);
return new Response(JSON.stringify({ results, fallback: true }), {
status: response.status,
headers: { 'Content-Type': 'application/json' },
});
}
const data = await response.json();
const raw = data.choices?.[0]?.message?.content?.trim();
if (!raw) {
return new Response(JSON.stringify({ results, fallback: true }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
}
let parsed;
try {
parsed = JSON.parse(raw);
} catch {
const match = raw.match(/\[[\s\S]*\]/);
if (match) {
try { parsed = JSON.parse(match[0]); } catch { /* fall through */ }
}
}
if (!Array.isArray(parsed)) {
return new Response(JSON.stringify({ results, fallback: true }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
}
const cacheWrites = [];
for (let i = 0; i < uncachedIndices.length; i++) {
const classification = parsed[i];
if (!classification) continue;
const level = VALID_LEVELS.includes(classification.level) ? classification.level : null;
const category = VALID_CATEGORIES.includes(classification.category) ? classification.category : null;
if (!level || !category) continue;
const idx = uncachedIndices[i];
results[idx] = { level, category, cached: false };
const cacheKey = `classify:${CACHE_VERSION}:${hashString(batch[idx].toLowerCase() + ':' + variant)}`;
cacheWrites.push(
setCachedJson(cacheKey, { level, category, timestamp: Date.now() }, CACHE_TTL_SECONDS)
);
}
if (cacheWrites.length > 0) {
await Promise.allSettled(cacheWrites);
}
return new Response(JSON.stringify({ results }), {
status: 200,
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600, s-maxage=3600, stale-while-revalidate=600' },
});
} catch (error) {
console.error('[ClassifyBatch] Error:', error.message);
return new Response(JSON.stringify({ results, fallback: true }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
}