Files
worldmonitor/middleware.ts
Elie Habib 0500733541 feat(variants): wire energy.worldmonitor.app subdomain (gaps #9-11) (#3394)
DNS (Cloudflare) and the Vercel domain are already provisioned by the
operator; this lands the matching code-side wiring so the variant
actually resolves and renders correctly.

Changes:

middleware.ts
- Add `'energy.worldmonitor.app': 'energy'` to VARIANT_HOST_MAP. This
  also auto-includes the host in ALLOWED_HOSTS via the spread on
  line 87.
- Add `energy` entry to VARIANT_OG with the Energy-Atlas-specific
  title + description from `src/config/variant-meta.ts:130-152`. OG
  image points at `https://energy.worldmonitor.app/favico/energy/og-image.png`,
  matching the per-variant convention used by tech / finance /
  commodity / happy.

vercel.json
- Add `https://energy.worldmonitor.app` to BOTH `frame-src` and
  `frame-ancestors` in the global Content-Security-Policy header.
  Without this, the variant subdomain would render but be blocked
  from being framed back into worldmonitor.app for any embedded
  flow (Slack/LinkedIn previews, future iframe widgets, etc.).
  This supersedes the CSP-only portion of PR #3359 (which mixed
  CSP with unrelated relay/military changes).

convex/payments/checkout.ts:108-117
- Add `https://energy.worldmonitor.app` to the checkout returnUrl
  allowlist. Without this, a PRO upgrade flow initiated from the
  energy subdomain would fail with "Invalid returnUrl" on Convex.

src-tauri/tauri.conf.json:32
- Add `https://energy.worldmonitor.app` to the Tauri desktop CSP
  frame-src so the desktop app can embed the variant the same way
  it embeds the other 4.

public/favico/energy/* (NEW, 7 files)
- Stub the per-variant favicon directory by copying the root-level
  WorldMonitor brand assets (android-chrome 192/512, apple-touch,
  favicon 16/32/ico, og-image). This keeps the launch unblocked
  on design assets — every referenced URL resolves with valid
  bytes from day one. Replace with energy-themed designs in a
  follow-up PR; the file paths are stable.

Other variant subdomains already on main (tech / finance / commodity /
happy) are unchanged. APP_HOSTS in src/services/runtime.ts already
admits any `*.worldmonitor.app` via `host.endsWith('.worldmonitor.app')`
on line 226, so no edit needed there.

Closes gaps §L #9, #10, #11 in
docs/internal/energy-atlas-registry-expansion.md.
2026-04-25 14:19:28 +04:00

211 lines
8.7 KiB
TypeScript

const BOT_UA =
/bot|crawl|spider|slurp|archiver|wget|curl\/|python-requests|scrapy|httpclient|go-http|java\/|libwww|perl|ruby|php\/|ahrefsbot|semrushbot|mj12bot|dotbot|baiduspider|yandexbot|sogou|bytespider|petalbot|gptbot|claudebot|ccbot/i;
const SOCIAL_PREVIEW_UA =
/twitterbot|facebookexternalhit|linkedinbot|slackbot|telegrambot|whatsapp|discordbot|redditbot/i;
const SOCIAL_PREVIEW_PATHS = new Set(['/api/story', '/api/og-story']);
// Paths that bypass bot/script UA filtering below. Each must carry its own
// auth (API key, shared secret, or intentionally-public semantics) because
// this list disables the middleware's generic bot gate.
// - /api/version, /api/health: intentionally public, monitoring-friendly.
// - /api/seed-contract-probe: requires RELAY_SHARED_SECRET header; called by
// UptimeRobot + ops curl. Was blocked by the curl/bot UA regex before this
// exception landed (Vercel log 2026-04-15: "Middleware 403 Forbidden" on
// /api/seed-contract-probe).
// - /api/internal/brief-why-matters: requires RELAY_SHARED_SECRET Bearer
// (subtle-crypto HMAC timing-safe compare in server/_shared/internal-auth.ts).
// Called from the Railway digest-notifications cron whose fetch() uses the
// Node undici default UA, which is short enough to trip the "no UA or
// suspiciously short" 403 below (Railway log 2026-04-21 post-#3248 merge:
// every cron call returned 403 and silently fell back to legacy Gemini).
const PUBLIC_API_PATHS = new Set([
'/api/version',
'/api/health',
'/api/seed-contract-probe',
'/api/internal/brief-why-matters',
]);
const SOCIAL_IMAGE_UA =
/Slack-ImgProxy|Slackbot|twitterbot|facebookexternalhit|linkedinbot|telegrambot|whatsapp|discordbot|redditbot/i;
// Must match the exact route shape enforced by
// api/brief/carousel/[userId]/[issueDate]/[page].ts:
// /api/brief/carousel/<userId>/YYYY-MM-DD-HHMM/<0|1|2>
// The issueDate segment is a per-run slot (date + HHMM in the user's
// tz) so same-day digests produce distinct carousel URLs.
// pageFromIndex() in brief-carousel-render.ts accepts only 0/1/2, so
// the trailing segment is tightly bounded.
const BRIEF_CAROUSEL_PATH_RE =
/^\/api\/brief\/carousel\/[^/]+\/\d{4}-\d{2}-\d{2}-\d{4}\/[0-2]\/?$/;
const VARIANT_HOST_MAP: Record<string, string> = {
'tech.worldmonitor.app': 'tech',
'finance.worldmonitor.app': 'finance',
'commodity.worldmonitor.app': 'commodity',
'happy.worldmonitor.app': 'happy',
'energy.worldmonitor.app': 'energy',
};
// Source of truth: src/config/variant-meta.ts — keep in sync when variant metadata changes.
const VARIANT_OG: Record<string, { title: string; description: string; image: string; url: string }> = {
tech: {
title: 'Tech Monitor - Real-Time AI & Tech Industry Dashboard',
description: 'Real-time AI and tech industry dashboard tracking tech giants, AI labs, startup ecosystems, funding rounds, and tech events worldwide.',
image: 'https://tech.worldmonitor.app/favico/tech/og-image.png',
url: 'https://tech.worldmonitor.app/',
},
finance: {
title: 'Finance Monitor - Real-Time Markets & Trading Dashboard',
description: 'Real-time finance and trading dashboard tracking global markets, stock exchanges, central banks, commodities, forex, crypto, and economic indicators worldwide.',
image: 'https://finance.worldmonitor.app/favico/finance/og-image.png',
url: 'https://finance.worldmonitor.app/',
},
commodity: {
title: 'Commodity Monitor - Real-Time Commodity Markets & Supply Chain Dashboard',
description: 'Real-time commodity markets dashboard tracking mining sites, processing plants, commodity ports, supply chains, and global commodity trade flows.',
image: 'https://commodity.worldmonitor.app/favico/commodity/og-image.png',
url: 'https://commodity.worldmonitor.app/',
},
happy: {
title: 'Happy Monitor - Good News & Global Progress',
description: 'Curated positive news, progress data, and uplifting stories from around the world.',
image: 'https://happy.worldmonitor.app/favico/happy/og-image.png',
url: 'https://happy.worldmonitor.app/',
},
energy: {
title: 'Energy Atlas - Real-Time Global Energy Intelligence Dashboard',
description: 'Real-time global energy atlas tracking oil and gas pipelines, storage facilities, chokepoints, fuel shortages, tanker flows, and disruption events worldwide.',
image: 'https://energy.worldmonitor.app/favico/energy/og-image.png',
url: 'https://energy.worldmonitor.app/',
},
};
const ALLOWED_HOSTS = new Set([
'worldmonitor.app',
...Object.keys(VARIANT_HOST_MAP),
]);
const VERCEL_PREVIEW_RE = /^[a-z0-9-]+-[a-z0-9]{8,}\.vercel\.app$/;
function normalizeHost(raw: string): string {
return raw.toLowerCase().replace(/:\d+$/, '');
}
function isAllowedHost(host: string): boolean {
return ALLOWED_HOSTS.has(host) || VERCEL_PREVIEW_RE.test(host);
}
export default function middleware(request: Request) {
const url = new URL(request.url);
const ua = request.headers.get('user-agent') ?? '';
const path = url.pathname;
const host = normalizeHost(request.headers.get('host') ?? url.hostname);
// Social bot OG response for variant subdomain root pages
if (path === '/' && SOCIAL_PREVIEW_UA.test(ua)) {
const variant = VARIANT_HOST_MAP[host];
if (variant && isAllowedHost(host)) {
const og = VARIANT_OG[variant as keyof typeof VARIANT_OG];
if (og) {
const html = `<!DOCTYPE html><html><head>
<meta property="og:type" content="website"/>
<meta property="og:title" content="${og.title}"/>
<meta property="og:description" content="${og.description}"/>
<meta property="og:image" content="${og.image}"/>
<meta property="og:url" content="${og.url}"/>
<meta name="twitter:card" content="summary_large_image"/>
<meta name="twitter:title" content="${og.title}"/>
<meta name="twitter:description" content="${og.description}"/>
<meta name="twitter:image" content="${og.image}"/>
<title>${og.title}</title>
</head><body></body></html>`;
return new Response(html, {
status: 200,
headers: {
'Content-Type': 'text/html; charset=utf-8',
'Cache-Control': 'no-store',
'Vary': 'User-Agent, Host',
},
});
}
}
}
// Only apply bot filtering to /api/* and /favico/* paths
if (!path.startsWith('/api/') && !path.startsWith('/favico/')) {
return;
}
// Allow social preview/image bots on OG image assets.
//
// Image-returning API routes that don't end in `.png` also need
// an explicit carve-out — otherwise server-side fetches from
// Slack / Telegram / Discord / LinkedIn / WhatsApp / Facebook /
// Twitter / Reddit all trip the BOT_UA gate below. Telegram
// surfaces it as error 400 "WEBPAGE_CURL_FAILED" on sendMediaGroup;
// the others silently drop the preview image.
//
// Only the brief carousel route shape is allowlisted — a strict
// regex (same shape enforced by the handler) prevents a future
// /api/brief/carousel/admin or similar sibling from accidentally
// inheriting this bypass. HMAC token in the URL is the real auth;
// this allowlist is defence-in-depth for any well-shaped request
// whose UA happens to be in SOCIAL_IMAGE_UA.
if (
path.startsWith('/favico/') ||
path.endsWith('.png') ||
BRIEF_CAROUSEL_PATH_RE.test(path)
) {
if (SOCIAL_IMAGE_UA.test(ua)) {
return;
}
}
// Allow social preview bots on exact OG routes only
if (SOCIAL_PREVIEW_UA.test(ua) && SOCIAL_PREVIEW_PATHS.has(path)) {
return;
}
// Public endpoints bypass all bot filtering
if (PUBLIC_API_PATHS.has(path)) {
return;
}
// Authenticated Pro API clients bypass UA filtering. This is a cheap
// edge heuristic, not auth — real validation (SHA-256 hash vs Convex
// userApiKeys + entitlement) happens in server/gateway.ts. To keep the
// bot-UA shield meaningful, require the exact key shape emitted by
// src/services/api-keys.ts:generateKey: `wm_` + 40 lowercase hex chars.
// A random scraper would have to guess a specific 43-char format, and
// spoofed-but-well-shaped keys still 401 at the gateway.
const WM_KEY_SHAPE = /^wm_[a-f0-9]{40}$/;
const apiKey =
request.headers.get('x-worldmonitor-key') ??
request.headers.get('x-api-key') ??
'';
if (WM_KEY_SHAPE.test(apiKey)) {
return;
}
// Block bots from all API routes
if (BOT_UA.test(ua)) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
// No user-agent or suspiciously short — likely a script
if (!ua || ua.length < 10) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
}
export const config = {
matcher: ['/', '/api/:path*', '/favico/:path*'],
};