perf: add bot protection middleware and robots.txt to reduce API abuse

Block crawlers/scrapers from /api/* routes via Edge Middleware (403 for
bot user-agents and missing/short UAs). Social preview bots (Twitter,
Facebook, LinkedIn, Slack, Discord) are allowed on /api/story and
/api/og-story for OG previews. robots.txt reinforces the same policy.
This commit is contained in:
Elie Habib
2026-02-20 13:38:43 +04:00
parent 84336a9f94
commit 3ffe76b208
2 changed files with 70 additions and 0 deletions

44
middleware.ts Normal file
View File

@@ -0,0 +1,44 @@
/**
* Vercel Edge Middleware — blocks bot/crawler traffic from API routes.
* Runs on /api/* paths only (configured via matcher below).
* Social preview bots are allowed on /api/story and /api/og-story.
*/
const BOT_UA =
/bot|crawl|spider|slurp|archiver|wget|curl\/|python-requests|scrapy|httpclient|go-http|java\/|libwww|perl|ruby|php\/|ahrefsbot|semrushbot|mj12bot|dotbot|baiduspider|yandexbot|sogou|bytespider|petalbot|gptbot|claudebot|ccbot/i;
const SOCIAL_PREVIEW_UA =
/twitterbot|facebookexternalhit|linkedinbot|slackbot|telegrambot|whatsapp|discordbot|redditbot/i;
const SOCIAL_PREVIEW_PATHS = ['/api/story', '/api/og-story'];
export default function middleware(request: Request) {
const ua = request.headers.get('user-agent') ?? '';
const url = new URL(request.url);
const path = url.pathname;
// Allow social preview bots on OG routes
if (SOCIAL_PREVIEW_UA.test(ua) && SOCIAL_PREVIEW_PATHS.some((p) => path.startsWith(p))) {
return;
}
// Block bots from all API routes
if (BOT_UA.test(ua)) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
// No user-agent or suspiciously short — likely a script
if (!ua || ua.length < 10) {
return new Response('{"error":"Forbidden"}', {
status: 403,
headers: { 'Content-Type': 'application/json' },
});
}
}
export const config = {
matcher: '/api/:path*',
};

26
public/robots.txt Normal file
View File

@@ -0,0 +1,26 @@
# WorldMonitor - protect API routes from crawlers
User-agent: *
Allow: /
Disallow: /api/
Disallow: /tests/
# Allow social media bots for OG previews
User-agent: Twitterbot
Allow: /api/story
Allow: /api/og-story
User-agent: facebookexternalhit
Allow: /api/story
Allow: /api/og-story
User-agent: LinkedInBot
Allow: /api/story
Allow: /api/og-story
User-agent: Slackbot
Allow: /api/story
Allow: /api/og-story
User-agent: Discordbot
Allow: /api/story
Allow: /api/og-story