worldmonitor/scripts/_llm-json.mjs

/**
 * Shared LLM JSON extraction utilities.
 *
 * Handles the common failure modes of LLM JSON output:
 *   - C-style line and block comments (// ..., /* ... *\/)
 *   - Trailing commas before } and ]
 *   - Partial outputs (brace/bracket extraction as fallback)
 *
 * Note: cleanJsonText strips `//` best-effort and will incorrectly strip
 * URLs inside JSON string values (e.g. "https://..."). Acceptable for LLM output.
 */

/**
 * Strip C-style comments and trailing commas from a JSON-like string.
 */
export function cleanJsonText(text) {
  return text
    .replace(/\/\*[\s\S]*?\*\//g, '')
    .replace(/\/\/[^\n]*/g, '')
    .replace(/,(\s*[}\]])/g, '$1')
    .trim();
}

function extractFirstDelimited(text, open, close) {
  const cleaned = cleanJsonText(text);
  const start = cleaned.indexOf(open);
  if (start === -1) return '';
  let depth = 0;
  let inString = false;
  let escaped = false;
  for (let i = start; i < cleaned.length; i++) {
    const char = cleaned[i];
    if (escaped) { escaped = false; continue; }
    if (char === '\\') { escaped = true; continue; }
    if (char === '"') { inString = !inString; continue; }
    if (inString) continue;
    if (char === open) depth++;
    if (char === close && --depth === 0) return cleaned.slice(start, i + 1);
  }
  return cleaned.slice(start);
}

export const extractFirstJsonObject = (text) => extractFirstDelimited(text, '{', '}');
export const extractFirstJsonArray  = (text) => extractFirstDelimited(text, '[', ']');