mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
fix(brief): unblock whyMatters analyst endpoint (middleware 403) + DIGEST_ONLY_USER filter (#3255)
* fix(brief): unblock whyMatters analyst endpoint + add DIGEST_ONLY_USER filter Three changes, all operational for PR #3248's brief-why-matters feature. 1. middleware.ts PUBLIC_API_PATHS allowlist Railway logs post-#3248 merge showed every cron call to /api/internal/brief-why-matters returning 403 — middleware's "short UA" guard (~L183) rejects Node undici's default UA before the endpoint's own Bearer-auth runs. The feature never executed in prod; three-layer fallback silently shipped legacy Gemini output. Same class as /api/seed-contract-probe (2026-04-15). Endpoint still carries its own subtle-crypto HMAC auth, so bypassing the UA gate is safe. 2. Explicit UA on callAnalystWhyMatters fetch Defense-in-depth. Explicit 'worldmonitor-digest-notifications/1.0' keeps the endpoint reachable if PUBLIC_API_PATHS is ever refactored, and makes cron traffic distinguishable from ops curl in logs. 3. DIGEST_ONLY_USER=user_xxx filter Operator single-user test flag. Set on Railway to run compose + send for one user on the next tick (then unset) — validates new features end-to-end without fanning out. Empty/unset = normal fan-out. Applied right after rule fetch so both compose and dispatch paths respect it. Regression tests: 15 new cases in tests/middleware-bot-gate.test.mts pin every PUBLIC_API_PATHS entry against 3 triggers (empty/short/curl UA) plus a negative sibling-path suite so a future prefix-match refactor can't silently unblock /api/internal/. Tests: 6043 pass. typecheck + typecheck:api clean. biome: pre-existing main() complexity warning bumped 74→78 by the filter block (unchanged in character from pre-PR). * test(middleware): expand sibling-path negatives to cover all 3 trigger UAs Greptile flagged: `SIBLING_PATHS` was only tested with `EMPTY_UA`. Under the current middleware chain this is sufficient (sibling paths hit the short-UA OR BOT_UA 403 regardless), but it doesn't pin *which* guard fires. A future refactor that moves `PUBLIC_API_PATHS.has(path)` later in the chain could let a curl or undici UA pass on a sibling path without this suite failing. Fix: iterate the 3 sibling paths against all 3 trigger UAs (empty, short/undici, curl). Every combination must still 403 regardless of which guard catches it. 6 new test cases. Tests: 35 pass in the middleware-bot-gate suite (was 29).
This commit is contained in:
@@ -14,7 +14,18 @@ const SOCIAL_PREVIEW_PATHS = new Set(['/api/story', '/api/og-story']);
|
||||
// UptimeRobot + ops curl. Was blocked by the curl/bot UA regex before this
|
||||
// exception landed (Vercel log 2026-04-15: "Middleware 403 Forbidden" on
|
||||
// /api/seed-contract-probe).
|
||||
const PUBLIC_API_PATHS = new Set(['/api/version', '/api/health', '/api/seed-contract-probe']);
|
||||
// - /api/internal/brief-why-matters: requires RELAY_SHARED_SECRET Bearer
|
||||
// (subtle-crypto HMAC timing-safe compare in server/_shared/internal-auth.ts).
|
||||
// Called from the Railway digest-notifications cron whose fetch() uses the
|
||||
// Node undici default UA, which is short enough to trip the "no UA or
|
||||
// suspiciously short" 403 below (Railway log 2026-04-21 post-#3248 merge:
|
||||
// every cron call returned 403 and silently fell back to legacy Gemini).
|
||||
const PUBLIC_API_PATHS = new Set([
|
||||
'/api/version',
|
||||
'/api/health',
|
||||
'/api/seed-contract-probe',
|
||||
'/api/internal/brief-why-matters',
|
||||
]);
|
||||
|
||||
const SOCIAL_IMAGE_UA =
|
||||
/Slack-ImgProxy|Slackbot|twitterbot|facebookexternalhit|linkedinbot|telegrambot|whatsapp|discordbot|redditbot/i;
|
||||
|
||||
@@ -156,6 +156,13 @@ async function callAnalystWhyMatters(story) {
|
||||
headers: {
|
||||
Authorization: `Bearer ${RELAY_SECRET}`,
|
||||
'Content-Type': 'application/json',
|
||||
// Explicit UA — Node undici's default is short/empty enough to
|
||||
// trip middleware.ts's "No user-agent or suspiciously short"
|
||||
// 403 path. Defense-in-depth alongside the PUBLIC_API_PATHS
|
||||
// allowlist. Distinct from ops curl / UptimeRobot so log grep
|
||||
// disambiguates cron traffic from operator traffic.
|
||||
'User-Agent': 'worldmonitor-digest-notifications/1.0',
|
||||
Accept: 'application/json',
|
||||
},
|
||||
body: JSON.stringify({ story }),
|
||||
signal: AbortSignal.timeout(15_000),
|
||||
@@ -1299,6 +1306,25 @@ async function main() {
|
||||
return;
|
||||
}
|
||||
|
||||
// Operator single-user test filter. Set DIGEST_ONLY_USER=user_xxx on
|
||||
// the Railway service to run the compose + send paths for exactly
|
||||
// one user on the next cron tick, then unset. Intended for
|
||||
// validating new features (brief enrichment, rendering, email
|
||||
// template changes) end-to-end without fanning out to every PRO user.
|
||||
// Empty string / unset = normal fan-out (production default).
|
||||
const onlyUser = (process.env.DIGEST_ONLY_USER ?? '').trim();
|
||||
if (onlyUser) {
|
||||
const before = rules.length;
|
||||
rules = rules.filter((r) => r && r.userId === onlyUser);
|
||||
console.log(
|
||||
`[digest] DIGEST_ONLY_USER=${onlyUser} — filtered ${before} rules → ${rules.length}`,
|
||||
);
|
||||
if (rules.length === 0) {
|
||||
console.log(`[digest] No rules matched userId=${onlyUser} — nothing to do`);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Compose per-user brief envelopes once per run (extracted so main's
|
||||
// complexity score stays in the biome budget). Failures MUST NOT
|
||||
// block digest sends — we carry counters forward and apply the
|
||||
|
||||
@@ -129,3 +129,69 @@ describe('middleware bot gate / carousel allowlist', () => {
|
||||
assert.equal(res.status, 403);
|
||||
});
|
||||
});
|
||||
|
||||
// ── PUBLIC_API_PATHS allowlist (secret-authed internal endpoints) ────────────
|
||||
// The middleware's "no UA or suspiciously short" 403 guard (middleware.ts:
|
||||
// ~L183) blocks Node/undici default-UA callers. Internal endpoints that carry
|
||||
// their own Bearer-auth must be in PUBLIC_API_PATHS to bypass the gate.
|
||||
//
|
||||
// History:
|
||||
// - /api/seed-contract-probe hit this 2026-04-15 (UptimeRobot + ops curl).
|
||||
// - /api/internal/brief-why-matters hit this 2026-04-21 immediately after
|
||||
// PR #3248 merge — every Railway cron call returned 403 and silently
|
||||
// fell back to legacy Gemini. No functional breakage (3-layer fallback
|
||||
// absorbed it) but the new feature never ran in prod.
|
||||
//
|
||||
// These tests pin the allowlist so a future middleware refactor (e.g. the
|
||||
// BOT_UA regex being narrowed, or PUBLIC_API_PATHS being reorganized) can't
|
||||
// silently drop an entry.
|
||||
|
||||
describe('middleware PUBLIC_API_PATHS — secret-authed internal endpoints bypass UA gate', () => {
|
||||
// UAs that would normally 403 on any other API route.
|
||||
const EMPTY_UA = '';
|
||||
const UNDICI_UA = 'undici'; // Too short (<10 chars) — triggers short-UA 403.
|
||||
const CURL_UA = GENERIC_CURL_UA; // Matches curl/ in BOT_UA regex.
|
||||
|
||||
const TRIGGERS = [
|
||||
{ label: 'empty UA (middleware short-UA gate)', ua: EMPTY_UA },
|
||||
{ label: 'short UA (Node undici default-ish)', ua: UNDICI_UA },
|
||||
{ label: 'curl UA (BOT_UA regex hit)', ua: CURL_UA },
|
||||
];
|
||||
|
||||
const ALLOWED_PATHS = [
|
||||
'/api/version',
|
||||
'/api/health',
|
||||
'/api/seed-contract-probe',
|
||||
'/api/internal/brief-why-matters',
|
||||
];
|
||||
|
||||
for (const path of ALLOWED_PATHS) {
|
||||
for (const { label, ua } of TRIGGERS) {
|
||||
it(`${path} bypasses the UA gate (${label})`, () => {
|
||||
const res = call(path, ua);
|
||||
assert.equal(res, undefined, `${path} must pass through the middleware (no 403); its own auth gate handles access`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Negative case: a sibling path that is NOT in the allowlist must still 403
|
||||
// under EACH of the 3 triggers. This catches a future refactor that moves
|
||||
// the PUBLIC_API_PATHS check later in the chain (e.g. behind a broadened
|
||||
// prefix-match) and might let one of the trigger UAs slip through on a
|
||||
// sibling path without this suite failing. Pin all three guard paths.
|
||||
const SIBLING_PATHS = [
|
||||
'/api/internal/brief-why-matters-v2', // near-miss suffix
|
||||
'/api/internal/', // directory only
|
||||
'/api/internal/other', // different leaf
|
||||
];
|
||||
|
||||
for (const path of SIBLING_PATHS) {
|
||||
for (const { label, ua } of TRIGGERS) {
|
||||
it(`${path} does NOT bypass the UA gate — ${label}`, () => {
|
||||
const res = call(path, ua);
|
||||
assert.ok(res instanceof Response, `${path} must still hit the 403 guard under ${label}`);
|
||||
assert.equal(res.status, 403);
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user