diff --git a/api/api-route-exceptions.json b/api/api-route-exceptions.json index 460de55f1..b1d07d1cf 100644 --- a/api/api-route-exceptions.json +++ b/api/api-route-exceptions.json @@ -316,20 +316,6 @@ "removal_issue": "TBD" }, - { - "path": "api/enrichment/company.js", - "category": "migration-pending", - "reason": "Migrating to enrichment/v1 service in commit 3 of #3207.", - "owner": "@SebastienMelki", - "removal_issue": "#3207" - }, - { - "path": "api/enrichment/signals.js", - "category": "migration-pending", - "reason": "Migrating to enrichment/v1 service in commit 3 of #3207.", - "owner": "@SebastienMelki", - "removal_issue": "#3207" - }, { "path": "api/contact.js", "category": "migration-pending", diff --git a/api/enrichment/_domain.js b/api/enrichment/_domain.js deleted file mode 100644 index f5e374576..000000000 --- a/api/enrichment/_domain.js +++ /dev/null @@ -1,19 +0,0 @@ -const DOMAIN_SUFFIX_RE = /\.(com|io|co|org|net|ai|dev|app)$/; - -export function toOrgSlugFromDomain(domain) { - return (domain || '') - .trim() - .toLowerCase() - .replace(DOMAIN_SUFFIX_RE, '') - .split('.') - .pop() || ''; -} - -export function inferCompanyNameFromDomain(domain) { - const orgSlug = toOrgSlugFromDomain(domain); - if (!orgSlug) return domain || ''; - - return orgSlug - .replace(/-/g, ' ') - .replace(/\b\w/g, (c) => c.toUpperCase()); -} diff --git a/api/enrichment/company.js b/api/enrichment/company.js deleted file mode 100644 index 5d1bc6236..000000000 --- a/api/enrichment/company.js +++ /dev/null @@ -1,203 +0,0 @@ -/** - * Company Enrichment API — Vercel Edge Function - * Aggregates company data from multiple public sources: - * - GitHub org data - * - Hacker News mentions - * - SEC EDGAR filings (public US companies) - * - Tech stack inference from GitHub repos - * - * GET /api/enrichment/company?domain=example.com - * GET /api/enrichment/company?name=Stripe - */ - -import { getCorsHeaders, isDisallowedOrigin } from '../_cors.js'; -import { checkRateLimit } from '../_rate-limit.js'; -import { inferCompanyNameFromDomain, toOrgSlugFromDomain } from './_domain.js'; - -export const config = { runtime: 'edge' }; - -const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'; -const CACHE_TTL_SECONDS = 3600; -const GITHUB_API_HEADERS = Object.freeze({ Accept: 'application/vnd.github.v3+json', 'User-Agent': UA }); - -async function fetchGitHubOrg(name) { - try { - const res = await fetch(`https://api.github.com/orgs/${encodeURIComponent(name)}`, { - headers: GITHUB_API_HEADERS, - signal: AbortSignal.timeout(5000), - }); - if (!res.ok) return null; - const data = await res.json(); - return { - name: data.name || data.login, - description: data.description, - blog: data.blog, - location: data.location, - publicRepos: data.public_repos, - followers: data.followers, - avatarUrl: data.avatar_url, - createdAt: data.created_at, - }; - } catch { - return null; - } -} - -async function fetchGitHubTechStack(orgName) { - try { - const res = await fetch( - `https://api.github.com/orgs/${encodeURIComponent(orgName)}/repos?sort=stars&per_page=10`, - { - headers: GITHUB_API_HEADERS, - signal: AbortSignal.timeout(5000), - }, - ); - if (!res.ok) return []; - const repos = await res.json(); - const languages = new Map(); - for (const repo of repos) { - if (repo.language) { - languages.set(repo.language, (languages.get(repo.language) || 0) + repo.stargazers_count + 1); - } - } - return Array.from(languages.entries()) - .sort((a, b) => b[1] - a[1]) - .slice(0, 10) - .map(([lang, score]) => ({ name: lang, category: 'Programming Language', confidence: Math.min(1, score / 100) })); - } catch { - return []; - } -} - -async function fetchSECData(companyName) { - try { - const res = await fetch( - `https://efts.sec.gov/LATEST/search-index?q=${encodeURIComponent(companyName)}&dateRange=custom&startdt=${getDateMonthsAgo(6)}&enddt=${getTodayISO()}&forms=10-K,10-Q,8-K&from=0&size=5`, - { - headers: { 'User-Agent': 'WorldMonitor research@worldmonitor.app', 'Accept': 'application/json' }, - signal: AbortSignal.timeout(8000), - }, - ); - if (!res.ok) return null; - const data = await res.json(); - if (!data.hits || !data.hits.hits || data.hits.hits.length === 0) return null; - return { - totalFilings: data.hits.total?.value || 0, - recentFilings: data.hits.hits.slice(0, 5).map((h) => ({ - form: h._source?.form_type || h._source?.file_type, - date: h._source?.file_date || h._source?.period_of_report, - description: h._source?.display_names?.[0] || companyName, - })), - }; - } catch { - return null; - } -} - -async function fetchHackerNewsMentions(companyName) { - try { - const res = await fetch( - `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(companyName)}&tags=story&hitsPerPage=5`, - { - headers: { 'User-Agent': UA }, - signal: AbortSignal.timeout(5000), - }, - ); - if (!res.ok) return []; - const data = await res.json(); - return (data.hits || []).map((h) => ({ - title: h.title, - url: h.url, - points: h.points, - comments: h.num_comments, - date: h.created_at, - })); - } catch { - return []; - } -} - -function getTodayISO() { - return toISODate(new Date()); -} - -function getDateMonthsAgo(months) { - const d = new Date(); - d.setMonth(d.getMonth() - months); - return toISODate(d); -} - -function toISODate(date) { - return date.toISOString().split('T')[0]; -} - -export default async function handler(req) { - const cors = getCorsHeaders(req, 'GET, OPTIONS'); - - if (req.method === 'OPTIONS') { - return new Response(null, { status: 204, headers: cors }); - } - - if (isDisallowedOrigin(req)) { - return new Response('Forbidden', { status: 403, headers: cors }); - } - - const rateLimitResult = await checkRateLimit(req, 'enrichment', 30, '60s'); - if (rateLimitResult) return rateLimitResult; - - const url = new URL(req.url); - const domain = url.searchParams.get('domain')?.trim().toLowerCase(); - const name = url.searchParams.get('name')?.trim(); - - if (!domain && !name) { - return new Response(JSON.stringify({ error: 'Provide ?domain= or ?name= parameter' }), { - status: 400, - headers: { ...cors, 'Content-Type': 'application/json' }, - }); - } - - const companyName = name || (domain ? inferCompanyNameFromDomain(domain) : 'Unknown'); - const searchName = domain ? toOrgSlugFromDomain(domain) : companyName.toLowerCase().replace(/\s+/g, ''); - - const [githubOrg, techStack, secData, hnMentions] = await Promise.all([ - fetchGitHubOrg(searchName), - fetchGitHubTechStack(searchName), - fetchSECData(companyName), - fetchHackerNewsMentions(companyName), - ]); - - const enrichedData = { - company: { - name: githubOrg?.name || companyName, - domain: domain || githubOrg?.blog?.replace(/^https?:\/\//, '').replace(/\/$/, '') || null, - description: githubOrg?.description || null, - location: githubOrg?.location || null, - website: githubOrg?.blog || (domain ? `https://${domain}` : null), - founded: githubOrg?.createdAt ? new Date(githubOrg.createdAt).getFullYear() : null, - }, - github: githubOrg ? { - publicRepos: githubOrg.publicRepos, - followers: githubOrg.followers, - avatarUrl: githubOrg.avatarUrl, - } : null, - techStack: techStack.length > 0 ? techStack : null, - secFilings: secData, - hackerNewsMentions: hnMentions.length > 0 ? hnMentions : null, - enrichedAt: new Date().toISOString(), - sources: [ - githubOrg ? 'github' : null, - techStack.length > 0 ? 'github_repos' : null, - secData ? 'sec_edgar' : null, - hnMentions.length > 0 ? 'hacker_news' : null, - ].filter(Boolean), - }; - - return new Response(JSON.stringify(enrichedData), { - status: 200, - headers: { - ...cors, - 'Content-Type': 'application/json', - 'Cache-Control': `public, s-maxage=${CACHE_TTL_SECONDS}, stale-while-revalidate=${CACHE_TTL_SECONDS * 2}`, - }, - }); -} diff --git a/api/enrichment/signals.js b/api/enrichment/signals.js deleted file mode 100644 index 74e504220..000000000 --- a/api/enrichment/signals.js +++ /dev/null @@ -1,218 +0,0 @@ -/** - * Signal Discovery API — Vercel Edge Function - * Discovers activity signals for a company from public sources: - * - News mentions (Hacker News) - * - GitHub activity spikes - * - Job posting signals (HN hiring threads) - * - * GET /api/enrichment/signals?company=Stripe&domain=stripe.com - */ - -import { getCorsHeaders, isDisallowedOrigin } from '../_cors.js'; -import { checkRateLimit } from '../_rate-limit.js'; -import { toOrgSlugFromDomain } from './_domain.js'; - -export const config = { runtime: 'edge' }; - -const UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36'; -const UPSTREAM_TIMEOUT_MS = 5000; -const DEFAULT_HEADERS = Object.freeze({ 'User-Agent': UA }); -const GITHUB_HEADERS = Object.freeze({ Accept: 'application/vnd.github.v3+json', ...DEFAULT_HEADERS }); - -const SIGNAL_KEYWORDS = { - hiring_surge: ['hiring', 'we\'re hiring', 'join our team', 'open positions', 'new roles', 'growing team'], - funding_event: ['raised', 'funding', 'series', 'investment', 'valuation', 'backed by'], - expansion_signal: ['expansion', 'new office', 'opening', 'entering market', 'new region', 'international'], - technology_adoption: ['migrating to', 'adopting', 'implementing', 'rolling out', 'tech stack', 'infrastructure'], - executive_movement: ['appointed', 'joins as', 'new ceo', 'new cto', 'new vp', 'leadership change', 'promoted to'], - financial_trigger: ['revenue', 'ipo', 'acquisition', 'merger', 'quarterly results', 'earnings'], -}; - -function classifySignal(text) { - const lower = text.toLowerCase(); - for (const [type, keywords] of Object.entries(SIGNAL_KEYWORDS)) { - for (const kw of keywords) { - if (lower.includes(kw)) return type; - } - } - return 'press_release'; -} - -function scoreSignalStrength(points, comments, recencyDays) { - let score = 0; - if (points > 100) score += 3; - else if (points > 30) score += 2; - else score += 1; - - if (comments > 50) score += 2; - else if (comments > 10) score += 1; - - if (recencyDays <= 3) score += 3; - else if (recencyDays <= 7) score += 2; - else if (recencyDays <= 14) score += 1; - - if (score >= 7) return 'critical'; - if (score >= 5) return 'high'; - if (score >= 3) return 'medium'; - return 'low'; -} - -async function fetchHNSignals(companyName) { - try { - const res = await fetch( - `https://hn.algolia.com/api/v1/search_by_date?query=${encodeURIComponent(companyName)}&tags=story&hitsPerPage=20&numericFilters=created_at_i>${Math.floor(Date.now() / 1000) - 30 * 86400}`, - { - headers: DEFAULT_HEADERS, - signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS), - }, - ); - if (!res.ok) return []; - const data = await res.json(); - const now = Date.now(); - - return (data.hits || []).map((h) => { - const recencyDays = (now - new Date(h.created_at).getTime()) / 86400000; - return { - type: classifySignal(h.title), - title: h.title, - url: h.url || `https://news.ycombinator.com/item?id=${h.objectID}`, - source: 'Hacker News', - sourceTier: 2, - timestamp: h.created_at, - strength: scoreSignalStrength(h.points || 0, h.num_comments || 0, recencyDays), - engagement: { points: h.points, comments: h.num_comments }, - }; - }); - } catch { - return []; - } -} - -async function fetchGitHubSignals(orgName) { - try { - const res = await fetch( - `https://api.github.com/orgs/${encodeURIComponent(orgName)}/repos?sort=created&per_page=10`, - { - headers: GITHUB_HEADERS, - signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS), - }, - ); - if (!res.ok) return []; - const repos = await res.json(); - const now = Date.now(); - const thirtyDaysAgo = now - 30 * 86400000; - - return repos - .filter((r) => new Date(r.created_at).getTime() > thirtyDaysAgo) - .map((r) => ({ - type: 'technology_adoption', - title: `New repository: ${r.full_name} — ${r.description || 'No description'}`, - url: r.html_url, - source: 'GitHub', - sourceTier: 2, - timestamp: r.created_at, - strength: r.stargazers_count > 50 ? 'high' : r.stargazers_count > 10 ? 'medium' : 'low', - engagement: { stars: r.stargazers_count, forks: r.forks_count }, - })); - } catch { - return []; - } -} - -async function fetchJobSignals(companyName) { - try { - const res = await fetch( - `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(companyName)}&tags=comment,ask_hn&hitsPerPage=10&numericFilters=created_at_i>${Math.floor(Date.now() / 1000) - 60 * 86400}`, - { - headers: DEFAULT_HEADERS, - signal: AbortSignal.timeout(UPSTREAM_TIMEOUT_MS), - }, - ); - if (!res.ok) return []; - const data = await res.json(); - - const hiringComments = (data.hits || []).filter((h) => { - const text = (h.comment_text || '').toLowerCase(); - return text.includes('hiring') || text.includes('job') || text.includes('apply'); - }); - - if (hiringComments.length === 0) return []; - - return [{ - type: 'hiring_surge', - title: `${companyName} hiring activity (${hiringComments.length} mentions in HN hiring threads)`, - url: `https://news.ycombinator.com/item?id=${hiringComments[0].story_id}`, - source: 'HN Hiring Threads', - sourceTier: 3, - timestamp: hiringComments[0].created_at, - strength: hiringComments.length >= 3 ? 'high' : 'medium', - engagement: { mentions: hiringComments.length }, - }]; - } catch { - return []; - } -} - -export default async function handler(req) { - const cors = getCorsHeaders(req, 'GET, OPTIONS'); - - if (req.method === 'OPTIONS') { - return new Response(null, { status: 204, headers: cors }); - } - - if (isDisallowedOrigin(req)) { - return new Response('Forbidden', { status: 403, headers: cors }); - } - - const rateLimitResult = await checkRateLimit(req, 'signals', 20, '60s'); - if (rateLimitResult) return rateLimitResult; - - const url = new URL(req.url); - const company = url.searchParams.get('company')?.trim(); - const domain = url.searchParams.get('domain')?.trim().toLowerCase(); - - if (!company) { - return new Response(JSON.stringify({ error: 'Provide ?company= parameter' }), { - status: 400, - headers: { ...cors, 'Content-Type': 'application/json' }, - }); - } - - const orgName = toOrgSlugFromDomain(domain) || company.toLowerCase().replace(/\s+/g, ''); - - const [hnSignals, githubSignals, jobSignals] = await Promise.all([ - fetchHNSignals(company), - fetchGitHubSignals(orgName), - fetchJobSignals(company), - ]); - - const allSignals = [...hnSignals, ...githubSignals, ...jobSignals] - .sort((a, b) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime()); - - const signalTypeCounts = {}; - for (const s of allSignals) { - signalTypeCounts[s.type] = (signalTypeCounts[s.type] || 0) + 1; - } - - const result = { - company, - domain: domain || null, - signals: allSignals, - summary: { - totalSignals: allSignals.length, - byType: signalTypeCounts, - strongestSignal: allSignals[0] || null, - signalDiversity: Object.keys(signalTypeCounts).length, - }, - discoveredAt: new Date().toISOString(), - }; - - return new Response(JSON.stringify(result), { - status: 200, - headers: { - ...cors, - 'Content-Type': 'application/json', - 'Cache-Control': 'public, s-maxage=1800, stale-while-revalidate=3600', - }, - }); -} diff --git a/docs/api-proxies.mdx b/docs/api-proxies.mdx index ca8d9eaba..309b166f6 100644 --- a/docs/api-proxies.mdx +++ b/docs/api-proxies.mdx @@ -37,14 +37,6 @@ All proxies: Fetches an RSS/Atom feed and returns the parsed JSON. The URL must match one of the patterns in `_rss-allowed-domains.js` — arbitrary URLs are refused to prevent SSRF. -### `GET /api/enrichment/company?domain=` - -Returns company metadata (name, logo, industry, HQ country) for a website domain. Composite of public sources. - -### `GET /api/enrichment/signals?domain=` - -Returns trust and risk signals (TLS grade, DNS age, WHOIS country, threat-list membership) for a domain. - ## Skills registry ### `GET /api/skills/fetch-agentskills` diff --git a/docs/architecture.mdx b/docs/architecture.mdx index b77379e84..4506fae57 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -271,7 +271,7 @@ World Monitor uses 60+ Vercel Edge Functions as a lightweight API layer, split i - **BIS Integration** — policy rates, real effective exchange rates, and credit-to-GDP ratios from the Bank for International Settlements, cached with 30-minute TTL - **WTO Trade Policy** — trade restrictions, tariff trends, bilateral trade flows, and SPS/TBT barriers from the World Trade Organization - **Supply Chain Intelligence** — maritime chokepoint disruption scores (cross-referencing NGA warnings + AIS data), FRED shipping freight indices with spike detection, and critical mineral supply concentration via Herfindahl-Hirschman Index analysis -- **Company Enrichment** — `/api/enrichment/company` aggregates GitHub organization data, inferred tech stack (derived from repository language distributions weighted by star count), SEC EDGAR public filings (10-K, 10-Q, 8-K), and Hacker News mentions into a single response. `/api/enrichment/signals` surfaces real-time company activity signals — funding events, hiring surges, executive changes, and expansion announcements — sourced from Hacker News and GitHub, each classified by signal type and scored for strength based on engagement, comment volume, and recency +- **Company Enrichment** — `IntelligenceService.GetCompanyEnrichment` aggregates GitHub organization data, inferred tech stack (derived from repository language distributions weighted by star count), SEC EDGAR public filings (10-K, 10-Q, 8-K), and Hacker News mentions into a single response. `IntelligenceService.ListCompanySignals` surfaces real-time company activity signals — funding events, hiring surges, executive changes, and expansion announcements — sourced from Hacker News and GitHub, each classified by signal type and scored for strength based on engagement, comment volume, and recency All edge functions include circuit breaker logic and return cached stale data when upstream APIs are unavailable, ensuring the dashboard never shows blank panels.