Files
worldmonitor/middleware.ts
Elie Habib 0169245f45 feat(seo): BlogPosting schema, FAQPage JSON-LD, extensible author system (#2284)
* feat(seo): BlogPosting schema, FAQPage JSON-LD, author system, AI crawler welcome

Blog structured data:
- Change @type Article to BlogPosting for all blog posts
- Author: Organization to Person with extensible default (Elie Habib)
- Add per-post author/authorUrl/authorBio/modifiedDate frontmatter fields
- Auto-extract FAQPage JSON-LD from FAQ sections in all 17 posts
- Show Updated date when modifiedDate differs from pubDate
- Add author bio section with GitHub avatar and fallback

Main app:
- Add commodity variant to middleware VARIANT_HOST_MAP and VARIANT_OG
- Add commodity.worldmonitor.app to sitemap.xml
- Shorten index.html meta description to 136 chars (was 161)
- Remove worksFor block from index.html author JSON-LD
- Welcome all bots in robots.txt (removed per-bot blocks, global allows)
- Update llms.txt: five variants listed, all 17 blog post URLs added

* fix(seo): scope FAQ regex to section boundary, use author-aware avatar

- extractFaqLd now slices only to the next ## heading (was: to end of body)
  preventing bold text in post-FAQ sections from being mistakenly extracted
- Avatar src now derived from DEFAULT_AUTHOR_GITHUB constant (koala73)
  only when using the default author; custom authors fall back to favicon
  so multi-author posts show a correct image instead of the wrong profile
2026-03-26 12:48:56 +04:00

141 lines
5.2 KiB
TypeScript

const BOT_UA =
/bot|crawl|spider|slurp|archiver|wget|curl\/|python-requests|scrapy|httpclient|go-http|java\/|libwww|perl|ruby|php\/|ahrefsbot|semrushbot|mj12bot|dotbot|baiduspider|yandexbot|sogou|bytespider|petalbot|gptbot|claudebot|ccbot/i;
const SOCIAL_PREVIEW_UA =
/twitterbot|facebookexternalhit|linkedinbot|slackbot|telegrambot|whatsapp|discordbot|redditbot/i;
const SOCIAL_PREVIEW_PATHS = new Set(['/api/story', '/api/og-story']);
const PUBLIC_API_PATHS = new Set(['/api/version', '/api/health']);
const SOCIAL_IMAGE_UA =
/Slack-ImgProxy|Slackbot|twitterbot|facebookexternalhit|linkedinbot|telegrambot|whatsapp|discordbot|redditbot/i;
const VARIANT_HOST_MAP: Record<string, string> = {
'tech.worldmonitor.app': 'tech',
'finance.worldmonitor.app': 'finance',
'commodity.worldmonitor.app': 'commodity',
'happy.worldmonitor.app': 'happy',
};
// Source of truth: src/config/variant-meta.ts — keep in sync when variant metadata changes.
const VARIANT_OG: Record<string, { title: string; description: string; image: string; url: string }> = {
tech: {
title: 'Tech Monitor - Real-Time AI & Tech Industry Dashboard',
description: 'Real-time AI and tech industry dashboard tracking tech giants, AI labs, startup ecosystems, funding rounds, and tech events worldwide.',
image: 'https://tech.worldmonitor.app/favico/tech/og-image.png',
url: 'https://tech.worldmonitor.app/',
},
finance: {
title: 'Finance Monitor - Real-Time Markets & Trading Dashboard',
description: 'Real-time finance and trading dashboard tracking global markets, stock exchanges, central banks, commodities, forex, crypto, and economic indicators worldwide.',
image: 'https://finance.worldmonitor.app/favico/finance/og-image.png',
url: 'https://finance.worldmonitor.app/',
},
commodity: {
title: 'Commodity Monitor - Real-Time Commodity Markets & Supply Chain Dashboard',
description: 'Real-time commodity markets dashboard tracking mining sites, processing plants, commodity ports, supply chains, and global commodity trade flows.',
image: 'https://commodity.worldmonitor.app/favico/commodity/og-image.png',
url: 'https://commodity.worldmonitor.app/',
},
happy: {
title: 'Happy Monitor - Good News & Global Progress',
description: 'Curated positive news, progress data, and uplifting stories from around the world.',
image: 'https://happy.worldmonitor.app/favico/happy/og-image.png',
url: 'https://happy.worldmonitor.app/',
},
};
const ALLOWED_HOSTS = new Set([
'worldmonitor.app',
...Object.keys(VARIANT_HOST_MAP),
]);
const VERCEL_PREVIEW_RE = /^[a-z0-9-]+-[a-z0-9]{8,}\.vercel\.app$/;
function normalizeHost(raw: string): string {
return raw.toLowerCase().replace(/:\d+$/, '');
}
function isAllowedHost(host: string): boolean {
return ALLOWED_HOSTS.has(host) || VERCEL_PREVIEW_RE.test(host);
}
export default function middleware(request: Request) {
const url = new URL(request.url);
const ua = request.headers.get('user-agent') ?? '';
const path = url.pathname;
const host = normalizeHost(request.headers.get('host') ?? url.hostname);
// Social bot OG response for variant subdomain root pages
if (path === '/' && SOCIAL_PREVIEW_UA.test(ua)) {
const variant = VARIANT_HOST_MAP[host];
if (variant && isAllowedHost(host)) {
const og = VARIANT_OG[variant as keyof typeof VARIANT_OG];
if (og) {
const html = `<!DOCTYPE html><html><head>
<meta property="og:type" content="website"/>
<meta property="og:title" content="${og.title}"/>
<meta property="og:description" content="${og.description}"/>
<meta property="og:image" content="${og.image}"/>
<meta property="og:url" content="${og.url}"/>
<meta name="twitter:card" content="summary_large_image"/>
<meta name="twitter:title" content="${og.title}"/>
<meta name="twitter:description" content="${og.description}"/>
<meta name="twitter:image" content="${og.image}"/>
<title>${og.title}</title>
</head><body></body></html>`;
return new Response(html, {
status: 200,
headers: {
'Content-Type': 'text/html; charset=utf-8',
'Cache-Control': 'no-store',
'Vary': 'User-Agent, Host',
},
});
}
}
}
// Only apply bot filtering to /api/* and /favico/* paths
if (!path.startsWith('/api/') && !path.startsWith('/favico/')) {
return;
}
// Allow social preview/image bots on OG image assets
if (path.startsWith('/favico/') || path.endsWith('.png')) {
if (SOCIAL_IMAGE_UA.test(ua)) {
return;
}
}
// Allow social preview bots on exact OG routes only
if (SOCIAL_PREVIEW_UA.test(ua) && SOCIAL_PREVIEW_PATHS.has(path)) {
return;
}
// Public endpoints bypass all bot filtering
if (PUBLIC_API_PATHS.has(path)) {
return;
}
// Block bots from all API routes
if (BOT_UA.test(ua)) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
// No user-agent or suspiciously short — likely a script
if (!ua || ua.length < 10) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
}
export const config = {
matcher: ['/', '/api/:path*', '/favico/:path*'],
};