Files
worldmonitor/api/classify-batch.js
Elie Habib 871af119a3 Batch AI classification and Railway-direct AIS routing
- Add /api/classify-batch endpoint: classifies up to 20 headlines per Groq call
  (reduces 182 individual API calls to ~10 batched calls, 90% rate limit savings)
- Update threat-classifier.ts: collect headlines in batch queue, flush every 500ms
  or when batch reaches 20 items
- Route AIS snapshot through Railway directly when VITE_WS_RELAY_URL is set,
  falling back to Vercel — eliminates 503 when WS_RELAY_URL not configured on Vercel
2026-02-13 20:38:20 +04:00

218 lines
6.9 KiB
JavaScript

import { Redis } from '@upstash/redis';
export const config = {
runtime: 'edge',
};
const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions';
const MODEL = 'llama-3.1-8b-instant';
const CACHE_TTL_SECONDS = 86400;
const CACHE_VERSION = 'v1';
const MAX_BATCH_SIZE = 20;
let redis = null;
let redisInitFailed = false;
function getRedis() {
if (redis) return redis;
if (redisInitFailed) return null;
const url = process.env.UPSTASH_REDIS_REST_URL;
const token = process.env.UPSTASH_REDIS_REST_TOKEN;
if (url && token) {
try {
redis = new Redis({ url, token });
} catch (err) {
console.warn('[ClassifyBatch] Redis init failed:', err.message);
redisInitFailed = true;
return null;
}
}
return redis;
}
function hashString(str) {
let hash = 0;
for (let i = 0; i < str.length; i++) {
hash = ((hash << 5) - hash) + str.charCodeAt(i);
hash |= 0;
}
return Math.abs(hash).toString(36);
}
const VALID_LEVELS = ['critical', 'high', 'medium', 'low', 'info'];
const VALID_CATEGORIES = [
'conflict', 'protest', 'disaster', 'diplomatic', 'economic',
'terrorism', 'cyber', 'health', 'environmental', 'military',
'crime', 'infrastructure', 'tech', 'general',
];
export default async function handler(request) {
if (request.method !== 'POST') {
return new Response(JSON.stringify({ error: 'Method not allowed' }), {
status: 405,
headers: { 'Content-Type': 'application/json' },
});
}
const apiKey = process.env.GROQ_API_KEY;
if (!apiKey) {
return new Response(JSON.stringify({ fallback: true }), {
status: 503,
headers: { 'Content-Type': 'application/json' },
});
}
let body;
try {
body = await request.json();
} catch {
return new Response(JSON.stringify({ error: 'Invalid JSON body' }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
const { titles, variant = 'full' } = body;
if (!Array.isArray(titles) || titles.length === 0) {
return new Response(JSON.stringify({ error: 'titles array required' }), {
status: 400,
headers: { 'Content-Type': 'application/json' },
});
}
const batch = titles.slice(0, MAX_BATCH_SIZE);
const results = new Array(batch.length).fill(null);
const uncachedIndices = [];
const redisClient = getRedis();
if (redisClient) {
try {
const cacheKeys = batch.map(
(t) => `classify:${CACHE_VERSION}:${hashString(t.toLowerCase() + ':' + variant)}`
);
const cached = await redisClient.mget(...cacheKeys);
for (let i = 0; i < cached.length; i++) {
const val = cached[i];
if (val && typeof val === 'object' && val.level) {
results[i] = { level: val.level, category: val.category, cached: true };
} else {
uncachedIndices.push(i);
}
}
} catch (e) {
console.warn('[ClassifyBatch] Cache read error:', e.message);
for (let i = 0; i < batch.length; i++) uncachedIndices.push(i);
}
} else {
for (let i = 0; i < batch.length; i++) uncachedIndices.push(i);
}
if (uncachedIndices.length === 0) {
return new Response(JSON.stringify({ results }), {
status: 200,
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
});
}
const uncachedTitles = uncachedIndices.map((i) => batch[i]);
const isTech = variant === 'tech';
const numberedList = uncachedTitles.map((t, i) => `${i + 1}. ${t}`).join('\n');
const systemPrompt = `You classify news headlines into threat level and category. Return ONLY a valid JSON array, no other text.
Levels: critical, high, medium, low, info
Categories: conflict, protest, disaster, diplomatic, economic, terrorism, cyber, health, environmental, military, crime, infrastructure, tech, general
${isTech ? 'Focus: technology, startups, AI, cybersecurity. Most tech news is "low" or "info" unless it involves outages, breaches, or major disruptions.' : 'Focus: geopolitical events, conflicts, disasters, diplomacy. Classify by real-world severity and impact.'}
Return a JSON array with one object per headline in order: [{"level":"...","category":"..."},...]`;
try {
const response = await fetch(GROQ_API_URL, {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: MODEL,
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: numberedList },
],
temperature: 0,
max_tokens: uncachedTitles.length * 60,
}),
});
if (!response.ok) {
console.error('[ClassifyBatch] Groq error:', response.status);
return new Response(JSON.stringify({ results, fallback: true }), {
status: response.status,
headers: { 'Content-Type': 'application/json' },
});
}
const data = await response.json();
const raw = data.choices?.[0]?.message?.content?.trim();
if (!raw) {
return new Response(JSON.stringify({ results, fallback: true }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
}
let parsed;
try {
parsed = JSON.parse(raw);
} catch {
const match = raw.match(/\[[\s\S]*\]/);
if (match) {
try { parsed = JSON.parse(match[0]); } catch { /* fall through */ }
}
}
if (!Array.isArray(parsed)) {
return new Response(JSON.stringify({ results, fallback: true }), {
status: 200,
headers: { 'Content-Type': 'application/json' },
});
}
const cacheWrites = [];
for (let i = 0; i < uncachedIndices.length; i++) {
const classification = parsed[i];
if (!classification) continue;
const level = VALID_LEVELS.includes(classification.level) ? classification.level : null;
const category = VALID_CATEGORIES.includes(classification.category) ? classification.category : null;
if (!level || !category) continue;
const idx = uncachedIndices[i];
results[idx] = { level, category, cached: false };
if (redisClient) {
const cacheKey = `classify:${CACHE_VERSION}:${hashString(batch[idx].toLowerCase() + ':' + variant)}`;
cacheWrites.push(
redisClient.set(cacheKey, { level, category, timestamp: Date.now() }, { ex: CACHE_TTL_SECONDS })
.catch((e) => console.warn('[ClassifyBatch] Cache write error:', e.message))
);
}
}
if (cacheWrites.length > 0) {
await Promise.allSettled(cacheWrites);
}
return new Response(JSON.stringify({ results }), {
status: 200,
headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
});
} catch (error) {
console.error('[ClassifyBatch] Error:', error.message);
return new Response(JSON.stringify({ results, fallback: true }), {
status: 500,
headers: { 'Content-Type': 'application/json' },
});
}
}