Files
worldmonitor/api/fwdstart.js
Elie Habib fe67111dc9 feat: harness engineering P0 - linting, testing, architecture docs (#1587)
* feat: harness engineering P0 - linting, testing, architecture docs

Add foundational infrastructure for agent-first development:

- AGENTS.md: agent entry point with progressive disclosure to deeper docs
- ARCHITECTURE.md: 12-section system reference with source-file refs and ownership rule
- Biome 2.4.7 linter with project-tuned rules, CI workflow (lint-code.yml)
- Architectural boundary lint enforcing forward-only dependency direction (lint-boundaries.mjs)
- Unit test CI workflow (test.yml), all 1083 tests passing
- Fixed 9 pre-existing test failures (bootstrap sync, deploy-config headers, globe parity, redis mocks, geometry URL, import.meta.env null safety)
- Fixed 12 architectural boundary violations (types moved to proper layers)
- Added 3 missing cache tier entries in gateway.ts
- Synced cache-keys.ts with bootstrap.js
- Renamed docs/architecture.mdx to "Design Philosophy" with cross-references
- Deprecated legacy docs/Docs_To_Review/ARCHITECTURE.md
- Harness engineering roadmap tracking doc

* fix: address PR review feedback on harness-engineering-p0

- countries-geojson.test.mjs: skip gracefully when CDN unreachable
  instead of failing CI on network issues
- country-geometry-overrides.test.mts: relax timing assertion
  (250ms -> 2000ms) for constrained CI environments
- lint-boundaries.mjs: implement the documented api/ boundary check
  (was documented but missing, causing false green)

* fix(lint): scan api/ .ts files in boundary check

The api/ boundary check only scanned .js/.mjs files, missing the 25
sebuf RPC .ts edge functions. Now scans .ts files with correct rules:
- Legacy .js: fully self-contained (no server/ or src/ imports)
- RPC .ts: may import server/ and src/generated/ (bundled at deploy),
  but blocks imports from src/ application code

* fix(lint): detect import() type expressions in boundary lint

- Move AppContext back to app/app-context.ts (aggregate type that
  references components/services/utils belongs at the top, not types/)
- Move HappyContentCategory and TechHQ to types/ (simple enums/interfaces)
- Boundary lint now catches import('@/layer') expressions, not just
  from '@/layer' imports
- correlation-engine imports of AppContext marked boundary-ignore
  (type-only imports of top-level aggregate)
2026-03-14 21:29:21 +04:00

111 lines
3.7 KiB
JavaScript

// Non-sebuf: returns XML/HTML, stays as standalone Vercel function
import { getCorsHeaders, isDisallowedOrigin } from './_cors.js';
export const config = { runtime: 'edge' };
// Scrape FwdStart newsletter archive and return as RSS
export default async function handler(req) {
const cors = getCorsHeaders(req);
if (isDisallowedOrigin(req)) {
return new Response(JSON.stringify({ error: 'Origin not allowed' }), { status: 403, headers: cors });
}
try {
const response = await fetch('https://www.fwdstart.me/archive', {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
'Accept': 'text/html,application/xhtml+xml',
},
signal: AbortSignal.timeout(15000),
});
if (!response.ok) {
throw new Error(`HTTP ${response.status}`);
}
const html = await response.text();
const items = [];
const seenUrls = new Set();
// Split by embla__slide to get each post block
const slideBlocks = html.split('embla__slide');
for (const block of slideBlocks) {
// Extract URL
const urlMatch = block.match(/href="(\/p\/[^"]+)"/);
if (!urlMatch) continue;
const url = `https://www.fwdstart.me${urlMatch[1]}`;
if (seenUrls.has(url)) continue;
seenUrls.add(url);
// Extract title from alt attribute
const altMatch = block.match(/alt="([^"]+)"/);
const title = altMatch ? altMatch[1] : '';
if (!title || title.length < 5) continue;
// Extract date - look for "Mon DD, YYYY" pattern
const dateMatch = block.match(/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d{1,2}),?\s+(\d{4})/i);
let pubDate = new Date();
if (dateMatch) {
const dateStr = `${dateMatch[1]} ${dateMatch[2]}, ${dateMatch[3]}`;
const parsed = new Date(dateStr);
if (!Number.isNaN(parsed.getTime())) {
pubDate = parsed;
}
}
// Extract subtitle/description if available
let description = '';
const subtitleMatch = block.match(/line-clamp-3[^>]*>.*?<span[^>]*>([^<]{20,})<\/span>/s);
if (subtitleMatch) {
description = subtitleMatch[1].trim();
}
items.push({ title, link: url, date: pubDate.toISOString(), description });
}
// Build RSS XML
const rssItems = items.slice(0, 30).map(item => `
<item>
<title><![CDATA[${item.title}]]></title>
<link>${item.link}</link>
<guid>${item.link}</guid>
<pubDate>${new Date(item.date).toUTCString()}</pubDate>
<description><![CDATA[${item.description}]]></description>
<source url="https://www.fwdstart.me">FwdStart Newsletter</source>
</item>`).join('');
const rss = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
<channel>
<title>FwdStart Newsletter</title>
<link>https://www.fwdstart.me</link>
<description>Forward-thinking startup and VC news from MENA and beyond</description>
<language>en-us</language>
<lastBuildDate>${new Date().toUTCString()}</lastBuildDate>
<atom:link href="https://worldmonitor.app/api/fwdstart" rel="self" type="application/rss+xml"/>
${rssItems}
</channel>
</rss>`;
return new Response(rss, {
headers: {
'Content-Type': 'application/xml; charset=utf-8',
...cors,
'Cache-Control': 'public, max-age=1800, s-maxage=1800, stale-while-revalidate=300',
},
});
} catch (error) {
console.error('FwdStart scraper error:', error);
return new Response(JSON.stringify({
error: 'Failed to fetch FwdStart archive',
details: error.message
}), {
status: 502,
headers: {
'Content-Type': 'application/json',
...cors,
},
});
}
}