worldmonitor/api/classify-batch.js

import { Redis } from '@upstash/redis';

export const config = {
  runtime: 'edge',
};

const GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions';
const MODEL = 'llama-3.1-8b-instant';
const CACHE_TTL_SECONDS = 86400;
const CACHE_VERSION = 'v1';
const MAX_BATCH_SIZE = 20;

let redis = null;
let redisInitFailed = false;
function getRedis() {
  if (redis) return redis;
  if (redisInitFailed) return null;
  const url = process.env.UPSTASH_REDIS_REST_URL;
  const token = process.env.UPSTASH_REDIS_REST_TOKEN;
  if (url && token) {
    try {
      redis = new Redis({ url, token });
    } catch (err) {
      console.warn('[ClassifyBatch] Redis init failed:', err.message);
      redisInitFailed = true;
      return null;
    }
  }
  return redis;
}

function hashString(str) {
  let hash = 0;
  for (let i = 0; i < str.length; i++) {
    hash = ((hash << 5) - hash) + str.charCodeAt(i);
    hash |= 0;
  }
  return Math.abs(hash).toString(36);
}

const VALID_LEVELS = ['critical', 'high', 'medium', 'low', 'info'];
const VALID_CATEGORIES = [
  'conflict', 'protest', 'disaster', 'diplomatic', 'economic',
  'terrorism', 'cyber', 'health', 'environmental', 'military',
  'crime', 'infrastructure', 'tech', 'general',
];

export default async function handler(request) {
  if (request.method !== 'POST') {
    return new Response(JSON.stringify({ error: 'Method not allowed' }), {
      status: 405,
      headers: { 'Content-Type': 'application/json' },
    });
  }

  const apiKey = process.env.GROQ_API_KEY;
  if (!apiKey) {
    return new Response(JSON.stringify({ fallback: true }), {
      status: 503,
      headers: { 'Content-Type': 'application/json' },
    });
  }

  let body;
  try {
    body = await request.json();
  } catch {
    return new Response(JSON.stringify({ error: 'Invalid JSON body' }), {
      status: 400,
      headers: { 'Content-Type': 'application/json' },
    });
  }

  const { titles, variant = 'full' } = body;
  if (!Array.isArray(titles) || titles.length === 0) {
    return new Response(JSON.stringify({ error: 'titles array required' }), {
      status: 400,
      headers: { 'Content-Type': 'application/json' },
    });
  }

  const batch = titles.slice(0, MAX_BATCH_SIZE);
  const results = new Array(batch.length).fill(null);
  const uncachedIndices = [];

  const redisClient = getRedis();
  if (redisClient) {
    try {
      const cacheKeys = batch.map(
        (t) => `classify:${CACHE_VERSION}:${hashString(t.toLowerCase() + ':' + variant)}`
      );
      const cached = await redisClient.mget(...cacheKeys);
      for (let i = 0; i < cached.length; i++) {
        const val = cached[i];
        if (val && typeof val === 'object' && val.level) {
          results[i] = { level: val.level, category: val.category, cached: true };
        } else {
          uncachedIndices.push(i);
        }
      }
    } catch (e) {
      console.warn('[ClassifyBatch] Cache read error:', e.message);
      for (let i = 0; i < batch.length; i++) uncachedIndices.push(i);
    }
  } else {
    for (let i = 0; i < batch.length; i++) uncachedIndices.push(i);
  }

  if (uncachedIndices.length === 0) {
    return new Response(JSON.stringify({ results }), {
      status: 200,
      headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
    });
  }

  const uncachedTitles = uncachedIndices.map((i) => batch[i]);
  const isTech = variant === 'tech';
  const numberedList = uncachedTitles.map((t, i) => `${i + 1}. ${t}`).join('\n');

  const systemPrompt = `You classify news headlines into threat level and category. Return ONLY a valid JSON array, no other text.

Levels: critical, high, medium, low, info
Categories: conflict, protest, disaster, diplomatic, economic, terrorism, cyber, health, environmental, military, crime, infrastructure, tech, general

${isTech ? 'Focus: technology, startups, AI, cybersecurity. Most tech news is "low" or "info" unless it involves outages, breaches, or major disruptions.' : 'Focus: geopolitical events, conflicts, disasters, diplomacy. Classify by real-world severity and impact.'}

Return a JSON array with one object per headline in order: [{"level":"...","category":"..."},...]`;

  try {
    const response = await fetch(GROQ_API_URL, {
      method: 'POST',
      headers: {
        'Authorization': `Bearer ${apiKey}`,
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        model: MODEL,
        messages: [
          { role: 'system', content: systemPrompt },
          { role: 'user', content: numberedList },
        ],
        temperature: 0,
        max_tokens: uncachedTitles.length * 60,
      }),
    });

    if (!response.ok) {
      console.error('[ClassifyBatch] Groq error:', response.status);
      return new Response(JSON.stringify({ results, fallback: true }), {
        status: response.status,
        headers: { 'Content-Type': 'application/json' },
      });
    }

    const data = await response.json();
    const raw = data.choices?.[0]?.message?.content?.trim();
    if (!raw) {
      return new Response(JSON.stringify({ results, fallback: true }), {
        status: 200,
        headers: { 'Content-Type': 'application/json' },
      });
    }

    let parsed;
    try {
      parsed = JSON.parse(raw);
    } catch {
      const match = raw.match(/\[[\s\S]*\]/);
      if (match) {
        try { parsed = JSON.parse(match[0]); } catch { /* fall through */ }
      }
    }

    if (!Array.isArray(parsed)) {
      return new Response(JSON.stringify({ results, fallback: true }), {
        status: 200,
        headers: { 'Content-Type': 'application/json' },
      });
    }

    const cacheWrites = [];
    for (let i = 0; i < uncachedIndices.length; i++) {
      const classification = parsed[i];
      if (!classification) continue;

      const level = VALID_LEVELS.includes(classification.level) ? classification.level : null;
      const category = VALID_CATEGORIES.includes(classification.category) ? classification.category : null;
      if (!level || !category) continue;

      const idx = uncachedIndices[i];
      results[idx] = { level, category, cached: false };

      if (redisClient) {
        const cacheKey = `classify:${CACHE_VERSION}:${hashString(batch[idx].toLowerCase() + ':' + variant)}`;
        cacheWrites.push(
          redisClient.set(cacheKey, { level, category, timestamp: Date.now() }, { ex: CACHE_TTL_SECONDS })
            .catch((e) => console.warn('[ClassifyBatch] Cache write error:', e.message))
        );
      }
    }

    if (cacheWrites.length > 0) {
      await Promise.allSettled(cacheWrites);
    }

    return new Response(JSON.stringify({ results }), {
      status: 200,
      headers: { 'Content-Type': 'application/json', 'Cache-Control': 'public, max-age=3600' },
    });
  } catch (error) {
    console.error('[ClassifyBatch] Error:', error.message);
    return new Response(JSON.stringify({ results, fallback: true }), {
      status: 500,
      headers: { 'Content-Type': 'application/json' },
    });
  }
}