// WorldMonitor Brief compose library. // // Pure helpers for producing the per-user brief envelope that the // hosted magazine route (api/brief/*) + dashboard panel + future // channels all consume. Shared between: // - scripts/seed-digest-notifications.mjs (the consolidated cron; // composes a brief for every user it's about to dispatch a // digest to, so the magazine URL can be injected into the // notification output). // - future tests + ad-hoc tools. // // Deliberately has NO top-level side effects: no env guards, no // process.exit, no main(). Import anywhere. // // History: this file used to include a stand-alone Railway cron // (`seed-brief-composer.mjs`). That path was retired in the // consolidation PR — the digest cron now owns the compose+send // pipeline so there is exactly one cron writing brief:{userId}: // {issueDate} keys. import { assembleStubbedBriefEnvelope, filterTopStories, issueDateInTz, } from '../../shared/brief-filter.js'; // ── Rule dedupe (one brief per user, not per variant) ─────────────────────── const SENSITIVITY_RANK = { all: 0, high: 1, critical: 2 }; // Exported so the cron orchestration's two-pass winner walk // (sortedDue / sortedAll) can sort each pass identically to how // `groupEligibleRulesByUser` already orders candidates here. Kept as // a same-shape function so callers can reuse it without re-deriving // the priority key. export function compareRules(a, b) { const aFull = a.variant === 'full' ? 0 : 1; const bFull = b.variant === 'full' ? 0 : 1; if (aFull !== bFull) return aFull - bFull; // Default missing sensitivity to 'high' (NOT 'all') so the rank // matches what compose/buildDigest/cache/log actually treat the // rule as. Otherwise a legacy undefined-sensitivity rule would be // ranked as the most-permissive 'all' and tried first, but compose // would then apply a 'high' filter — shipping a narrow brief while // an explicit 'all' rule for the same user is never tried. // See PR #3387 review (P2). const aRank = SENSITIVITY_RANK[a.sensitivity ?? 'high'] ?? 0; const bRank = SENSITIVITY_RANK[b.sensitivity ?? 'high'] ?? 0; if (aRank !== bRank) return aRank - bRank; return (a.updatedAt ?? 0) - (b.updatedAt ?? 0); } /** * Group eligible (not-opted-out) rules by userId with each user's * candidates sorted in preference order. Callers walk the candidate * list and take the first that produces non-empty stories — falls * back across variants cleanly. */ export function groupEligibleRulesByUser(rules) { const byUser = new Map(); for (const rule of rules) { if (!rule || typeof rule.userId !== 'string') continue; if (rule.aiDigestEnabled === false) continue; const list = byUser.get(rule.userId); if (list) list.push(rule); else byUser.set(rule.userId, [rule]); } for (const list of byUser.values()) list.sort(compareRules); return byUser; } /** * @deprecated Kept for existing test imports. Prefer * groupEligibleRulesByUser + per-user fallback at call sites. */ export function dedupeRulesByUser(rules) { const out = []; for (const candidates of groupEligibleRulesByUser(rules).values()) { if (candidates.length > 0) out.push(candidates[0]); } return out; } // ── Failure gate ───────────────────────────────────────────────────────────── /** * Decide whether the consolidated cron should exit non-zero because * the brief-write failure rate is structurally bad (not just a * transient blip). Denominator is ATTEMPTED writes, not eligible * users: skipped-empty users never reach the write path and must not * dilute the ratio. * * @param {{ success: number; failed: number; thresholdRatio?: number }} counters */ export function shouldExitNonZero({ success, failed, thresholdRatio = 0.05 }) { if (failed <= 0) return false; const attempted = success + failed; if (attempted <= 0) return false; const threshold = Math.max(1, Math.floor(attempted * thresholdRatio)); return failed >= threshold; } // ── Insights fetch ─────────────────────────────────────────────────────────── /** Unwrap news:insights:v1 envelope and project the fields the brief needs. */ export function extractInsights(raw) { const data = raw?.data ?? raw; const topStories = Array.isArray(data?.topStories) ? data.topStories : []; const clusterCount = Number.isFinite(data?.clusterCount) ? data.clusterCount : topStories.length; const multiSourceCount = Number.isFinite(data?.multiSourceCount) ? data.multiSourceCount : 0; return { topStories, numbers: { clusters: clusterCount, multiSource: multiSourceCount }, }; } // ── Date + display helpers ─────────────────────────────────────────────────── const MONTH_NAMES = [ 'January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December', ]; export function dateLongFromIso(iso) { const [y, m, d] = iso.split('-').map(Number); return `${d} ${MONTH_NAMES[m - 1]} ${y}`; } export function issueCodeFromIso(iso) { const [, m, d] = iso.split('-'); return `${d}.${m}`; } export function localHourInTz(nowMs, timezone) { try { const fmt = new Intl.DateTimeFormat('en-US', { timeZone: timezone, hour: 'numeric', hour12: false, }); const hour = fmt.formatToParts(new Date(nowMs)).find((p) => p.type === 'hour')?.value; const n = Number(hour); return Number.isFinite(n) ? n : 9; } catch { return 9; } } export function userDisplayNameFromId(userId) { // Clerk IDs look like "user_2abc…". Phase 3b will hydrate real // names via a Convex query; for now a generic placeholder so the // magazine's greeting reads naturally. void userId; return 'Reader'; } // ── Compose a full brief for a single rule ────────────────────────────────── // Cap on stories shown per user per brief. // // Default 12 — kept at the historical value because the offline sweep // harness (scripts/sweep-topic-thresholds.mjs) showed bumping the cap // to 16 against 2026-04-24 production replay data DROPPED visible // quality at the active 0.45 threshold (visible_quality 0.916 → 0.716; // positions 13-16 are mostly singletons or members of "should-separate" // clusters at this threshold, so they dilute without helping adjacency). // // Env-tunable via DIGEST_MAX_STORIES_PER_USER so future sweep evidence // (different threshold, different label set, different pool composition) // can be acted on with a Railway env flip without a redeploy. Any // invalid / non-positive value falls back to the 12 default. // // "Are we getting better" signal: re-run scripts/sweep-topic-thresholds.mjs // with --cap N before flipping the env, and the daily // scripts/brief-quality-report.mjs after. function readMaxStoriesPerUser() { const raw = process.env.DIGEST_MAX_STORIES_PER_USER; if (raw == null || raw === '') return 12; const n = Number.parseInt(raw, 10); return Number.isFinite(n) && n > 0 ? n : 12; } // Exported so brief-llm.mjs (buildDigestPrompt + hashDigestInput) can // slice to the same cap. Hard-coding `slice(0, 12)` there would mean // the LLM prose only references the first 12 stories even when the // brief envelope carries more — a quiet mismatch between what the // reader sees as story cards vs the AI summary above them. Reviewer // P1 on PR #3389. export const MAX_STORIES_PER_USER = readMaxStoriesPerUser(); /** * Filter + assemble a BriefEnvelope for one alert rule from a * prebuilt upstream top-stories list (news:insights:v1 shape). * * @deprecated The live path is composeBriefFromDigestStories(), which * reads from the same digest:accumulator pool as the email. This * entry point is kept only for tests that stub a news:insights payload * directly — real runs would ship a brief with a different story * list than the email and should use the digest-stories path. * * @param {object} rule — enabled alertRule row * @param {{ topStories: unknown[]; numbers: { clusters: number; multiSource: number } }} insights * @param {{ nowMs: number }} [opts] */ export function composeBriefForRule(rule, insights, { nowMs = Date.now() } = {}) { // Default to 'high' (NOT 'all') for parity with composeBriefFromDigestStories, // buildDigest, the digestFor cache key, and the per-attempt log line. // See PR #3387 review (P2). const sensitivity = rule.sensitivity ?? 'high'; const tz = rule.digestTimezone ?? 'UTC'; const stories = filterTopStories({ stories: insights.topStories, sensitivity, maxStories: MAX_STORIES_PER_USER, }); if (stories.length === 0) return null; const issueDate = issueDateInTz(nowMs, tz); return assembleStubbedBriefEnvelope({ user: { name: userDisplayNameFromId(rule.userId), tz }, stories, issueDate, dateLong: dateLongFromIso(issueDate), issue: issueCodeFromIso(issueDate), insightsNumbers: insights.numbers, // Same nowMs as the rest of the envelope so the function stays // deterministic for a given input — tests + retries see identical // output. issuedAt: nowMs, localHour: localHourInTz(nowMs, tz), }); } // ── Compose from digest-accumulator stories (the live path) ───────────────── // RSS titles routinely end with " - " / " | " / // " — " (Google News normalised form + most major wires). // Leaving the suffix in place means the brief headline reads like // "... as Iran reimposes restrictions - AP News" instead of "... as // Iran reimposes restrictions", and the source attribution underneath // ends up duplicated. We strip the suffix ONLY when it matches the // primarySource we're about to attribute anyway — so we never strip // a real subtitle that happens to look like "foo - bar". const HEADLINE_SUFFIX_RE_PART = /\s+[-\u2013\u2014|]\s+([^\s].*)$/; /** * @param {string} title * @param {string} publisher * @returns {string} */ export function stripHeadlineSuffix(title, publisher) { if (typeof title !== 'string' || title.length === 0) return ''; if (typeof publisher !== 'string' || publisher.length === 0) return title.trim(); const trimmed = title.trim(); const m = trimmed.match(HEADLINE_SUFFIX_RE_PART); if (!m) return trimmed; const tail = m[1].trim(); // Case-insensitive full-string match. We're conservative: only strip // when the tail EQUALS the publisher — a tail that merely contains // it (e.g. "- AP News analysis") is editorial content and stays. if (tail.toLowerCase() !== publisher.toLowerCase()) return trimmed; return trimmed.slice(0, m.index).trimEnd(); } /** * Adapter: the digest accumulator hydrates stories from * story:track:v1:{hash} (title / link / severity / lang / score / * mentionCount / description?) + story:sources:v1:{hash} SMEMBERS. It * does NOT carry a category or country-code — those fields are optional * in the upstream brief-filter shape and default cleanly. * * Since envelope v2, the story's `link` field is carried through as * `primaryLink` so filterTopStories can emit a BriefStory.sourceUrl. * Stories without a valid link are still passed through here — the * filter drops them at the validation boundary rather than this adapter. * * Description plumbing (post RSS-description fix, 2026-04-24): * When the ingested story:track row carries a cleaned RSS description, * it rides here as `s.description` and becomes the brief's baseline * description. When absent (old rows inside the 48h bleed, or feeds * without a description), we fall back to the cleaned headline — * preserving today's behavior and letting Phase 3b's LLM enrichment * still operate over something, not nothing. * * @param {object} s — digest-shaped story from buildDigest() */ function digestStoryToUpstreamTopStory(s) { const sources = Array.isArray(s?.sources) ? s.sources : []; const primarySource = sources.length > 0 ? sources[0] : 'Multiple wires'; const rawTitle = typeof s?.title === 'string' ? s.title : ''; const cleanTitle = stripHeadlineSuffix(rawTitle, primarySource); const rawDescription = typeof s?.description === 'string' ? s.description.trim() : ''; return { primaryTitle: cleanTitle, // When upstream persists a real RSS description (via story:track:v1 // post-fix), forward it; otherwise fall back to the cleaned headline // so downstream consumers (brief filter, Phase 3b LLM) always have // something to ground on. description: rawDescription || cleanTitle, primarySource, primaryLink: typeof s?.link === 'string' ? s.link : undefined, threatLevel: s?.severity, // story:track:v1 carries neither field, so the brief falls back // to 'General' / 'Global' via filterTopStories defaults. category: typeof s?.category === 'string' ? s.category : undefined, countryCode: typeof s?.countryCode === 'string' ? s.countryCode : undefined, // Stable digest story hash. Carried through so: // (a) the canonical synthesis prompt can emit `rankedStoryHashes` // referencing each story by hash (not position, not title), // (b) `filterTopStories` can re-order the pool by ranking BEFORE // applying the MAX_STORIES_PER_USER cap, so the model's // editorial judgment of importance survives the cap. // Falls back to titleHash when the digest path didn't materialise // a primary `hash` (rare; shape varies across producer versions). hash: typeof s?.hash === 'string' && s.hash.length > 0 ? s.hash : (typeof s?.titleHash === 'string' ? s.titleHash : undefined), }; } /** * Compose a BriefEnvelope from a per-rule digest-accumulator pool * (same stories the email digest uses), plus global insights numbers * for the stats page. * * Returns null when no story survives the sensitivity filter — caller * falls back to another variant or skips the user. * * Pure / synchronous. The cron orchestration layer pre-resolves the * canonical synthesis (`exec` from `generateDigestProse`) and the * non-personalised `publicLead` (`generateDigestProsePublic`) and * passes them in via `opts.synthesis` — this module performs no LLM * I/O. * * @param {object} rule — enabled alertRule row * @param {unknown[]} digestStories — output of buildDigest(rule, windowStart) * @param {{ clusters: number; multiSource: number }} insightsNumbers * @param {{ * nowMs?: number, * onDrop?: import('../../shared/brief-filter.js').DropMetricsFn, * synthesis?: { * lead?: string, * threads?: Array<{ tag: string, teaser: string }>, * signals?: string[], * rankedStoryHashes?: string[], * publicLead?: string, * publicSignals?: string[], * publicThreads?: Array<{ tag: string, teaser: string }>, * }, * }} [opts] * `onDrop` is forwarded to filterTopStories so the seeder can * aggregate per-user filter-drop counts without this module knowing * how they are reported. * `synthesis` (when provided) substitutes envelope.digest.lead / * threads / signals / publicLead with the canonical synthesis from * the orchestration layer, and re-orders the candidate pool by * `synthesis.rankedStoryHashes` before applying the cap. */ export function composeBriefFromDigestStories(rule, digestStories, insightsNumbers, { nowMs = Date.now(), onDrop, synthesis } = {}) { if (!Array.isArray(digestStories) || digestStories.length === 0) return null; // Default to 'high' (NOT 'all') for undefined sensitivity, aligning // with buildDigest at scripts/seed-digest-notifications.mjs:392 and // the digestFor cache key. The live cron path pre-filters the pool // to {critical, high}, so this default is a no-op for production // calls — but a non-prefiltered caller with undefined sensitivity // would otherwise silently widen to {medium, low} stories while the // operator log labels the attempt as 'high', misleading telemetry. // See PR #3387 review (P2) and Defect 2 / Solution 1 in // docs/plans/2026-04-24-004-fix-brief-topic-adjacency-defects-plan.md. const sensitivity = rule.sensitivity ?? 'high'; const tz = rule.digestTimezone ?? 'UTC'; const upstreamLike = digestStories.map(digestStoryToUpstreamTopStory); const stories = filterTopStories({ stories: upstreamLike, sensitivity, maxStories: MAX_STORIES_PER_USER, onDrop, rankedStoryHashes: synthesis?.rankedStoryHashes, }); if (stories.length === 0) return null; const issueDate = issueDateInTz(nowMs, tz); const envelope = assembleStubbedBriefEnvelope({ user: { name: userDisplayNameFromId(rule.userId), tz }, stories, issueDate, dateLong: dateLongFromIso(issueDate), issue: issueCodeFromIso(issueDate), insightsNumbers, issuedAt: nowMs, localHour: localHourInTz(nowMs, tz), }); // Splice canonical synthesis into the envelope's digest. Done as a // shallow merge so the assembleStubbedBriefEnvelope path stays the // single source for greeting/numbers/threads-default. We only // override the LLM-driven fields when the orchestrator supplied // them; missing fields fall back to the stub for graceful // degradation when synthesis fails. if (synthesis && envelope?.data?.digest) { if (typeof synthesis.lead === 'string' && synthesis.lead.length > 0) { envelope.data.digest.lead = synthesis.lead; } if (Array.isArray(synthesis.threads) && synthesis.threads.length > 0) { envelope.data.digest.threads = synthesis.threads; } if (Array.isArray(synthesis.signals)) { envelope.data.digest.signals = synthesis.signals; } if (typeof synthesis.publicLead === 'string' && synthesis.publicLead.length > 0) { envelope.data.digest.publicLead = synthesis.publicLead; } // Public signals/threads are non-personalised siblings produced by // generateDigestProsePublic. Captured separately from the // personalised signals/threads above so the share-URL renderer // never has to choose between leaking and omitting a whole page. if (Array.isArray(synthesis.publicSignals) && synthesis.publicSignals.length > 0) { envelope.data.digest.publicSignals = synthesis.publicSignals; } if (Array.isArray(synthesis.publicThreads) && synthesis.publicThreads.length > 0) { envelope.data.digest.publicThreads = synthesis.publicThreads; } } return envelope; }