Files
worldmonitor/api/rss-proxy.js
Elie Habib e2f6c46e45 fix(relay): block rsshub.app requests with 410 Gone (#526)
Stale clients still send RSS requests to rsshub.app (NHK, MOFCOM, MIIT).
These feeds were migrated to Google News RSS but cached PWA clients keep
hitting the relay, which forwards to rsshub.app and gets 403.

- Add explicit blocklist returning 410 Gone before allowlist check
- Remove rsshub.app from all allowlists (relay, edge proxy, vite)
- Remove dead AP News dev proxy target
2026-02-28 14:52:45 +04:00

441 lines
12 KiB
JavaScript

// Non-sebuf: returns XML/HTML, stays as standalone Vercel function
import { getCorsHeaders, isDisallowedOrigin } from './_cors.js';
export const config = { runtime: 'edge' };
// Fetch with timeout
async function fetchWithTimeout(url, options, timeoutMs = 15000) {
const controller = new AbortController();
const timeout = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch(url, { ...options, signal: controller.signal });
return response;
} finally {
clearTimeout(timeout);
}
}
function getRelayBaseUrl() {
const relayUrl = process.env.WS_RELAY_URL || '';
if (!relayUrl) return '';
return relayUrl.replace('wss://', 'https://').replace('ws://', 'http://').replace(/\/$/, '');
}
function getRelayHeaders(baseHeaders = {}) {
const headers = { ...baseHeaders };
const relaySecret = process.env.RELAY_SHARED_SECRET || '';
if (relaySecret) {
const relayHeader = (process.env.RELAY_AUTH_HEADER || 'x-relay-key').toLowerCase();
headers[relayHeader] = relaySecret;
headers.Authorization = `Bearer ${relaySecret}`;
}
return headers;
}
async function fetchViaRailway(feedUrl, timeoutMs) {
const relayBaseUrl = getRelayBaseUrl();
if (!relayBaseUrl) return null;
const relayUrl = `${relayBaseUrl}/rss?url=${encodeURIComponent(feedUrl)}`;
return fetchWithTimeout(relayUrl, {
headers: getRelayHeaders({
'Accept': 'application/rss+xml, application/xml, text/xml, */*',
'User-Agent': 'WorldMonitor-RSS-Proxy/1.0',
}),
}, timeoutMs);
}
// Allowed RSS feed domains for security
const ALLOWED_DOMAINS = [
'feeds.bbci.co.uk',
'www.theguardian.com',
'feeds.npr.org',
'news.google.com',
'www.aljazeera.com',
'www.aljazeera.net',
'rss.cnn.com',
'hnrss.org',
'feeds.arstechnica.com',
'www.theverge.com',
'www.cnbc.com',
'feeds.marketwatch.com',
'www.defenseone.com',
'breakingdefense.com',
'www.bellingcat.com',
'techcrunch.com',
'huggingface.co',
'www.technologyreview.com',
'rss.arxiv.org',
'export.arxiv.org',
'www.federalreserve.gov',
'www.sec.gov',
'www.whitehouse.gov',
'www.state.gov',
'www.defense.gov',
'home.treasury.gov',
'www.justice.gov',
'tools.cdc.gov',
'www.fema.gov',
'www.dhs.gov',
'www.thedrive.com',
'krebsonsecurity.com',
'finance.yahoo.com',
'thediplomat.com',
'venturebeat.com',
'foreignpolicy.com',
'www.ft.com',
'openai.com',
'www.reutersagency.com',
'feeds.reuters.com',
'asia.nikkei.com',
'www.cfr.org',
'www.csis.org',
'www.politico.com',
'www.brookings.edu',
'layoffs.fyi',
'www.defensenews.com',
'www.militarytimes.com',
'taskandpurpose.com',
'news.usni.org',
'www.oryxspioenkop.com',
'www.gov.uk',
'www.foreignaffairs.com',
'www.atlanticcouncil.org',
// Tech variant domains
'www.zdnet.com',
'www.techmeme.com',
'www.darkreading.com',
'www.schneier.com',
'www.ransomware.live',
'rss.politico.com',
'www.anandtech.com',
'www.tomshardware.com',
'www.semianalysis.com',
'feed.infoq.com',
'thenewstack.io',
'devops.com',
'dev.to',
'lobste.rs',
'changelog.com',
'seekingalpha.com',
'news.crunchbase.com',
'www.saastr.com',
'feeds.feedburner.com',
// Additional tech variant domains
'www.producthunt.com',
'www.axios.com',
'api.axios.com',
'github.blog',
'githubnext.com',
'mshibanami.github.io',
'www.engadget.com',
'news.mit.edu',
'dev.events',
// VC blogs
'www.ycombinator.com',
'a16z.com',
'review.firstround.com',
'www.sequoiacap.com',
'www.nfx.com',
'www.aaronsw.com',
'bothsidesofthetable.com',
'www.lennysnewsletter.com',
'stratechery.com',
// Regional startup news
'www.eu-startups.com',
'tech.eu',
'sifted.eu',
'www.techinasia.com',
'kr-asia.com',
'techcabal.com',
'disrupt-africa.com',
'lavca.org',
'contxto.com',
'inc42.com',
'yourstory.com',
// Funding & VC
'pitchbook.com',
'www.cbinsights.com',
// Accelerators
'www.techstars.com',
// Middle East & Regional News
'english.alarabiya.net',
'www.arabnews.com',
'www.timesofisrael.com',
'www.haaretz.com',
'www.scmp.com',
'kyivindependent.com',
'www.themoscowtimes.com',
'feeds.24.com',
'feeds.news24.com', // News24 main feed domain
'feeds.capi24.com', // News24 redirect destination
// International News Sources
'www.france24.com',
'www.euronews.com',
'de.euronews.com',
'es.euronews.com',
'fr.euronews.com',
'it.euronews.com',
'pt.euronews.com',
'ru.euronews.com',
'www.lemonde.fr',
'rss.dw.com',
'www.bild.de',
'www.africanews.com',
'fr.africanews.com',
// Nigeria
'www.premiumtimesng.com',
'www.vanguardngr.com',
'www.channelstv.com',
'dailytrust.com',
'www.thisdaylive.com',
// Greek
'www.naftemporiki.gr',
'www.in.gr',
'www.iefimerida.gr',
'www.lasillavacia.com',
'www.channelnewsasia.com',
'japantoday.com',
'www.thehindu.com',
'indianexpress.com',
'www.twz.com',
'gcaptain.com',
// International Organizations
'news.un.org',
'www.iaea.org',
'www.who.int',
'www.cisa.gov',
'www.crisisgroup.org',
// Think Tanks & Research (Added 2026-01-29)
'rusi.org',
'warontherocks.com',
'www.aei.org',
'responsiblestatecraft.org',
'www.fpri.org',
'jamestown.org',
'www.chathamhouse.org',
'ecfr.eu',
'www.gmfus.org',
'www.wilsoncenter.org',
'www.lowyinstitute.org',
'www.mei.edu',
'www.stimson.org',
'www.cnas.org',
'carnegieendowment.org',
'www.rand.org',
'fas.org',
'www.armscontrol.org',
'www.nti.org',
'thebulletin.org',
'www.iss.europa.eu',
// Economic & Food Security
'www.fao.org',
'worldbank.org',
'www.imf.org',
// International news (various languages)
'www.bbc.com',
'www.spiegel.de',
'www.tagesschau.de',
'newsfeed.zeit.de',
'feeds.elpais.com',
'e00-elmundo.uecdn.es',
'www.repubblica.it',
'www.ansa.it',
'xml2.corriereobjects.it',
'feeds.nos.nl',
'www.nrc.nl',
'www.telegraaf.nl',
'www.dn.se',
'www.svd.se',
'www.svt.se',
'www.asahi.com',
'www.clarin.com',
'oglobo.globo.com',
'feeds.folha.uol.com.br',
'www.eltiempo.com',
'www.eluniversal.com.mx',
'www.jeuneafrique.com',
'www.lorientlejour.com',
// Regional locale feeds (tr, pl, ru, th, vi, pt)
'www.hurriyet.com.tr',
'tvn24.pl',
'www.polsatnews.pl',
'www.rp.pl',
'meduza.io',
'novayagazeta.eu',
'www.bangkokpost.com',
'vnexpress.net',
'www.abc.net.au',
'islandtimes.org',
'www.brasilparalelo.com.br',
// Mexico & LatAm Security
'mexiconewsdaily.com',
'animalpolitico.com',
'www.proceso.com.mx',
'www.milenio.com',
'insightcrime.org',
// Additional
'news.ycombinator.com',
// Finance variant
'seekingalpha.com',
'www.coindesk.com',
'cointelegraph.com',
// Security advisories — government travel advisory feeds
'travel.state.gov',
'www.smartraveller.gov.au',
'www.safetravel.govt.nz',
// US Embassy security alerts
'th.usembassy.gov',
'ae.usembassy.gov',
'de.usembassy.gov',
'ua.usembassy.gov',
'mx.usembassy.gov',
'in.usembassy.gov',
'pk.usembassy.gov',
'co.usembassy.gov',
'pl.usembassy.gov',
'bd.usembassy.gov',
'it.usembassy.gov',
'do.usembassy.gov',
'mm.usembassy.gov',
// Health advisories
'wwwnc.cdc.gov',
'www.ecdc.europa.eu',
'www.who.int',
'www.afro.who.int',
// Happy variant — positive news sources
'www.goodnewsnetwork.org',
'www.positive.news',
'reasonstobecheerful.world',
'www.optimistdaily.com',
'www.upworthy.com',
'www.dailygood.org',
'www.goodgoodgood.co',
'www.good.is',
'www.sunnyskyz.com',
'thebetterindia.com',
'singularityhub.com',
'humanprogress.org',
'greatergood.berkeley.edu',
'www.onlygoodnewsdaily.com',
'www.sciencedaily.com',
'feeds.nature.com',
'www.nature.com',
'www.livescience.com',
'www.newscientist.com',
];
export default async function handler(req) {
const corsHeaders = getCorsHeaders(req, 'GET, OPTIONS');
// Handle CORS preflight
if (req.method === 'OPTIONS') {
return new Response(null, { status: 204, headers: corsHeaders });
}
const requestUrl = new URL(req.url);
const feedUrl = requestUrl.searchParams.get('url');
if (!feedUrl) {
return new Response(JSON.stringify({ error: 'Missing url parameter' }), {
status: 400,
headers: { 'Content-Type': 'application/json', ...corsHeaders },
});
}
try {
const parsedUrl = new URL(feedUrl);
// Block deprecated feed domains (stale clients still request these)
const BLOCKED_DOMAINS = ['rsshub.app'];
if (BLOCKED_DOMAINS.includes(parsedUrl.hostname)) {
return new Response(JSON.stringify({ error: 'Feed deprecated' }), {
status: 410,
headers: { 'Content-Type': 'application/json', ...corsHeaders },
});
}
// Security: Check if domain is allowed (normalize www prefix)
const hostname = parsedUrl.hostname;
const bare = hostname.replace(/^www\./, '');
const withWww = hostname.startsWith('www.') ? hostname : `www.${hostname}`;
if (!ALLOWED_DOMAINS.includes(hostname) && !ALLOWED_DOMAINS.includes(bare) && !ALLOWED_DOMAINS.includes(withWww)) {
return new Response(JSON.stringify({ error: 'Domain not allowed' }), {
status: 403,
headers: { 'Content-Type': 'application/json', ...corsHeaders },
});
}
// Google News is slow - use longer timeout
const isGoogleNews = feedUrl.includes('news.google.com');
const timeout = isGoogleNews ? 20000 : 12000;
const fetchDirect = async () => {
const response = await fetchWithTimeout(feedUrl, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/rss+xml, application/xml, text/xml, */*',
'Accept-Language': 'en-US,en;q=0.9',
},
redirect: 'manual',
}, timeout);
if (response.status >= 300 && response.status < 400) {
const location = response.headers.get('location');
if (location) {
const redirectUrl = new URL(location, feedUrl);
if (!ALLOWED_DOMAINS.includes(redirectUrl.hostname)) {
throw new Error('Redirect to disallowed domain');
}
return fetchWithTimeout(redirectUrl.href, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Accept': 'application/rss+xml, application/xml, text/xml, */*',
'Accept-Language': 'en-US,en;q=0.9',
},
}, timeout);
}
}
return response;
};
let response;
let usedRelay = false;
try {
response = await fetchDirect();
} catch (directError) {
response = await fetchViaRailway(feedUrl, timeout);
usedRelay = !!response;
if (!response) throw directError;
}
if (!response.ok && !usedRelay) {
const relayResponse = await fetchViaRailway(feedUrl, timeout);
if (relayResponse && relayResponse.ok) {
response = relayResponse;
}
}
const data = await response.text();
return new Response(data, {
status: response.status,
headers: {
'Content-Type': response.headers.get('content-type') || 'application/xml',
'Cache-Control': response.headers.get('cache-control') || 'public, max-age=600, s-maxage=600, stale-while-revalidate=300',
...corsHeaders,
},
});
} catch (error) {
const isTimeout = error.name === 'AbortError';
console.error('RSS proxy error:', feedUrl, error.message);
return new Response(JSON.stringify({
error: isTimeout ? 'Feed timeout' : 'Failed to fetch feed',
details: error.message,
url: feedUrl
}), {
status: isTimeout ? 504 : 502,
headers: { 'Content-Type': 'application/json', ...corsHeaders },
});
}
}