Files
worldmonitor/server/_shared/llm.ts
Elie Habib fe67111dc9 feat: harness engineering P0 - linting, testing, architecture docs (#1587)
* feat: harness engineering P0 - linting, testing, architecture docs

Add foundational infrastructure for agent-first development:

- AGENTS.md: agent entry point with progressive disclosure to deeper docs
- ARCHITECTURE.md: 12-section system reference with source-file refs and ownership rule
- Biome 2.4.7 linter with project-tuned rules, CI workflow (lint-code.yml)
- Architectural boundary lint enforcing forward-only dependency direction (lint-boundaries.mjs)
- Unit test CI workflow (test.yml), all 1083 tests passing
- Fixed 9 pre-existing test failures (bootstrap sync, deploy-config headers, globe parity, redis mocks, geometry URL, import.meta.env null safety)
- Fixed 12 architectural boundary violations (types moved to proper layers)
- Added 3 missing cache tier entries in gateway.ts
- Synced cache-keys.ts with bootstrap.js
- Renamed docs/architecture.mdx to "Design Philosophy" with cross-references
- Deprecated legacy docs/Docs_To_Review/ARCHITECTURE.md
- Harness engineering roadmap tracking doc

* fix: address PR review feedback on harness-engineering-p0

- countries-geojson.test.mjs: skip gracefully when CDN unreachable
  instead of failing CI on network issues
- country-geometry-overrides.test.mts: relax timing assertion
  (250ms -> 2000ms) for constrained CI environments
- lint-boundaries.mjs: implement the documented api/ boundary check
  (was documented but missing, causing false green)

* fix(lint): scan api/ .ts files in boundary check

The api/ boundary check only scanned .js/.mjs files, missing the 25
sebuf RPC .ts edge functions. Now scans .ts files with correct rules:
- Legacy .js: fully self-contained (no server/ or src/ imports)
- RPC .ts: may import server/ and src/generated/ (bundled at deploy),
  but blocks imports from src/ application code

* fix(lint): detect import() type expressions in boundary lint

- Move AppContext back to app/app-context.ts (aggregate type that
  references components/services/utils belongs at the top, not types/)
- Move HappyContentCategory and TechHQ to types/ (simple enums/interfaces)
- Boundary lint now catches import('@/layer') expressions, not just
  from '@/layer' imports
- correlation-engine imports of AppContext marked boundary-ignore
  (type-only imports of top-level aggregate)
2026-03-14 21:29:21 +04:00

202 lines
5.5 KiB
TypeScript

import { CHROME_UA } from './constants';
import { isProviderAvailable } from './llm-health';
export interface ProviderCredentials {
apiUrl: string;
model: string;
headers: Record<string, string>;
extraBody?: Record<string, unknown>;
}
const OLLAMA_HOST_ALLOWLIST = new Set([
'localhost', '127.0.0.1', '::1', '[::1]', 'host.docker.internal',
]);
function isLocalDeployment(): boolean {
const mode = typeof process !== 'undefined' ? (process.env?.LOCAL_API_MODE || '') : '';
return mode.includes('sidecar') || mode.includes('docker');
}
export function getProviderCredentials(provider: string): ProviderCredentials | null {
if (provider === 'ollama') {
const baseUrl = process.env.OLLAMA_API_URL;
if (!baseUrl) return null;
if (!isLocalDeployment()) {
try {
const hostname = new URL(baseUrl).hostname;
if (!OLLAMA_HOST_ALLOWLIST.has(hostname)) {
console.warn(`[llm] Ollama blocked: hostname "${hostname}" not in allowlist`);
return null;
}
} catch {
return null;
}
}
const headers: Record<string, string> = { 'Content-Type': 'application/json' };
const apiKey = process.env.OLLAMA_API_KEY;
if (apiKey) headers.Authorization = `Bearer ${apiKey}`;
return {
apiUrl: new URL('/v1/chat/completions', baseUrl).toString(),
model: process.env.OLLAMA_MODEL || 'llama3.1:8b',
headers,
extraBody: { think: false },
};
}
if (provider === 'groq') {
const apiKey = process.env.GROQ_API_KEY;
if (!apiKey) return null;
return {
apiUrl: 'https://api.groq.com/openai/v1/chat/completions',
model: 'llama-3.1-8b-instant',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
},
};
}
if (provider === 'openrouter') {
const apiKey = process.env.OPENROUTER_API_KEY;
if (!apiKey) return null;
return {
apiUrl: 'https://openrouter.ai/api/v1/chat/completions',
model: 'openrouter/free',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json',
'HTTP-Referer': 'https://worldmonitor.app',
'X-Title': 'World Monitor',
},
};
}
return null;
}
export function stripThinkingTags(text: string): string {
let s = text
.replace(/<think>[\s\S]*?<\/think>/gi, '')
.replace(/<\|thinking\|>[\s\S]*?<\|\/thinking\|>/gi, '')
.replace(/<reasoning>[\s\S]*?<\/reasoning>/gi, '')
.replace(/<reflection>[\s\S]*?<\/reflection>/gi, '')
.replace(/<\|begin_of_thought\|>[\s\S]*?<\|end_of_thought\|>/gi, '')
.trim();
s = s
.replace(/<think>[\s\S]*/gi, '')
.replace(/<\|thinking\|>[\s\S]*/gi, '')
.replace(/<reasoning>[\s\S]*/gi, '')
.replace(/<reflection>[\s\S]*/gi, '')
.replace(/<\|begin_of_thought\|>[\s\S]*/gi, '')
.trim();
return s;
}
const PROVIDER_CHAIN = ['ollama', 'groq', 'openrouter'] as const;
export interface LlmCallOptions {
messages: Array<{ role: string; content: string }>;
temperature?: number;
maxTokens?: number;
timeoutMs?: number;
provider?: string;
stripThinkingTags?: boolean;
validate?: (content: string) => boolean;
}
export interface LlmCallResult {
content: string;
model: string;
provider: string;
tokens: number;
}
export async function callLlm(opts: LlmCallOptions): Promise<LlmCallResult | null> {
const {
messages,
temperature = 0.3,
maxTokens = 1500,
timeoutMs = 25_000,
provider: forcedProvider,
stripThinkingTags: shouldStrip = true,
validate,
} = opts;
const providers = forcedProvider ? [forcedProvider] : [...PROVIDER_CHAIN];
for (const providerName of providers) {
const creds = getProviderCredentials(providerName);
if (!creds) {
if (forcedProvider) return null;
continue;
}
// Health gate: skip provider if endpoint is unreachable
if (!(await isProviderAvailable(creds.apiUrl))) {
console.warn(`[llm:${providerName}] Offline, skipping`);
if (forcedProvider) return null;
continue;
}
try {
const resp = await fetch(creds.apiUrl, {
method: 'POST',
headers: { ...creds.headers, 'User-Agent': CHROME_UA },
body: JSON.stringify({
...creds.extraBody,
model: creds.model,
messages,
temperature,
max_tokens: maxTokens,
}),
signal: AbortSignal.timeout(timeoutMs),
});
if (!resp.ok) {
console.warn(`[llm:${providerName}] HTTP ${resp.status}`);
if (forcedProvider) return null;
continue;
}
const data = (await resp.json()) as {
choices?: Array<{ message?: { content?: string } }>;
usage?: { total_tokens?: number };
};
let content = data.choices?.[0]?.message?.content?.trim() || '';
if (!content) {
if (forcedProvider) return null;
continue;
}
const tokens = data.usage?.total_tokens ?? 0;
if (shouldStrip) {
content = stripThinkingTags(content);
if (!content) {
if (forcedProvider) return null;
continue;
}
}
if (validate && !validate(content)) {
console.warn(`[llm:${providerName}] validate() rejected response, trying next`);
if (forcedProvider) return null;
continue;
}
return { content, model: creds.model, provider: providerName, tokens };
} catch (err) {
console.warn(`[llm:${providerName}] ${(err as Error).message}`);
if (forcedProvider) return null;
}
}
return null;
}