From 1a2295157e91a6bb653d1eb2689755398ffcf76c Mon Sep 17 00:00:00 2001 From: Elie Habib Date: Mon, 20 Apr 2026 10:19:03 +0400 Subject: [PATCH] feat(digest): DIGEST_SCORE_MIN absolute score floor for the brief (#3224) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(digest): DIGEST_SCORE_MIN absolute score floor for the brief Problem ------- The 2026-04-20 08:00 brief contained 12 stories, 7 of which were duplicates of 4 events, alongside low-importance filler (niche commodity, domestic crime). notification-relay's IMPORTANCE_SCORE_MIN gate (#3223, set to 63) only applies to the realtime fanout path. The digest cron reads the same story:track:*.currentScore but has NO absolute score floor — it just ranks and slices(0, 30), so on slow news days low-importance items bubble up to fill slots. Change ------ scripts/seed-digest-notifications.mjs: - New getDigestScoreMin() reads DIGEST_SCORE_MIN env at call time (Railway flips apply on the next cron tick, no redeploy). - Default 0 = no-op, so this PR is behaviour-neutral until the env var is set on Railway. - Filter runs AFTER deduplicateStories() so it drops clusters by the REPRESENTATIVE's score (which is the highest-scoring member of its cluster per materializeCluster's sort). - One-line operator log when the floor fires: [digest] score floor dropped N of M clusters (DIGEST_SCORE_MIN=X) tests/digest-score-floor.test.mjs (6 regressions): - getDigestScoreMin reads from process.env (not a module const) - default is 0 (no-op) - rejects non-integer / negative values (degrades to 0) - filter runs AFTER dedup, BEFORE slice(0, DIGEST_MAX_ITEMS) - short-circuits when floor is 0 (no wasted filter pass) - log line emits "dropped N of M clusters" Operator activation ------------------- Set on Railway seed-digest-notifications service: DIGEST_SCORE_MIN=63 Start at 63 to match the realtime gate, then nudge up/down based on the log lines over ~24h. Unset = off (pre-PR behaviour). Why not bundle a cosine-threshold bump -------------------------------------- The cosine-threshold tuning (0.60 -> 0.55 per the threshold probe) is an env-only flip already supported by the dedup orchestrator. Bundling an env-default change into this PR would slow rollback. Operator sets DIGEST_DEDUP_COSINE_THRESHOLD=0.55 on Railway as a separate action; this PR stays scoped to the score floor. Verification ------------ - npm run test:data 5825/5825 pass - tests/digest-score-floor 6/6 pass - tests/edge-functions 171/171 pass - typecheck + typecheck:api clean - biome check on changed files clean (pre-existing main() complexity warning on this file is unchanged) - lint:md 0 errors - version:check OK Post-Deploy Monitoring & Validation ----------------------------------- - **What to monitor** after setting DIGEST_SCORE_MIN on Railway: - `[digest] score floor dropped` lines — expect ~5-25% of clusters dropped on bulk-send ticks (stories=700+) - `[digest] Cron run complete: N digest(s) sent` stays > 0 - **Expected healthy behaviour** - 0-5 clusters dropped on normal ~80-story ticks - 50-200 dropped on bulk 700+ story ticks - brief still reports 10-30 stories for PRO users - **Failure signals / rollback** - 0 digests sent for 24h after flipping the var - user-visible brief now has < 10 stories - Rollback: unset DIGEST_SCORE_MIN on Railway dashboard (instant, no deploy), next cron tick reverts to unfiltered behaviour - **Validation window**: 24h - **Owner**: koala73 Related ------- - #3218 LLM prompt upgrade (source of importanceScore quality) - #3221 geopolitical scope for critical - #3223 notification-relay realtime gate (mirror knob) - #3200 embedding-based dedup (the other half of brief quality) * fix(digest): return null (not []) when score floor drains every cluster Greptile P2 finding on PR #3224. When DIGEST_SCORE_MIN is set high enough to filter every cluster, buildDigest previously returned [] (empty array). The caller's `if (!stories)` guard only catches falsy values, so [] slipped past the "No stories in window" skip-log and the run reached formatDigest([], nowMs) which returns null, then silently continued at the !storyListPlain check. Flow was still correct (no digest sent) but operators lost the observability signal to distinguish "floor too high" from "no news today" from "dedup ate everything". Fix: - buildDigest now returns null when the post-floor list is empty, matching the pre-dedup-empty path. Caller's existing !stories guard fires the canonical skip-log. - Emits a distinct `[digest] score floor dropped ALL N clusters (DIGEST_SCORE_MIN=X) — skipping user` line BEFORE the return, so operators can spot an over-aggressive floor in the logs. - Test added covering both the null-return contract and the distinct "dropped ALL" log line. 7/7 dedup-score-floor tests pass. --- scripts/seed-digest-notifications.mjs | 44 ++++++++++- tests/digest-score-floor.test.mjs | 104 ++++++++++++++++++++++++++ 2 files changed, 147 insertions(+), 1 deletion(-) create mode 100644 tests/digest-score-floor.test.mjs diff --git a/scripts/seed-digest-notifications.mjs b/scripts/seed-digest-notifications.mjs index f78b9072f..21909a52f 100644 --- a/scripts/seed-digest-notifications.mjs +++ b/scripts/seed-digest-notifications.mjs @@ -80,6 +80,19 @@ const AI_SUMMARY_CACHE_TTL = 3600; // 1h const AI_DIGEST_ENABLED = process.env.AI_DIGEST_ENABLED !== '0'; const ENTITLEMENT_CACHE_TTL = 900; // 15 min +// Absolute importance-score floor applied to the digest AFTER dedup. +// Mirrors the realtime notification-relay gate (IMPORTANCE_SCORE_MIN) +// but lives on the brief/digest side so operators can tune them +// independently — e.g. let realtime page at score>=63 while the brief +// digest drops anything <50. Default 0 = no filtering; ship disabled +// so this PR is a no-op until Railway flips the env. Setting the var +// to any positive integer drops every cluster whose representative +// currentScore is below it. +function getDigestScoreMin() { + const raw = Number.parseInt(process.env.DIGEST_SCORE_MIN ?? '0', 10); + return Number.isInteger(raw) && raw >= 0 ? raw : 0; +} + // ── Brief composer (consolidation of the retired seed-brief-composer) ────── const BRIEF_URL_SIGNING_SECRET = process.env.BRIEF_URL_SIGNING_SECRET ?? ''; @@ -276,7 +289,36 @@ async function buildDigest(rule, windowStartMs) { if (stories.length === 0) return null; stories.sort((a, b) => b.currentScore - a.currentScore); - const deduped = await deduplicateStories(stories); + const dedupedAll = await deduplicateStories(stories); + // Apply the absolute-score floor AFTER dedup so the floor runs on + // the representative's score (mentionCount-sum doesn't change the + // score field; the rep is the highest-scoring member of its + // cluster). At DIGEST_SCORE_MIN=0 this is a no-op. + const scoreFloor = getDigestScoreMin(); + const deduped = scoreFloor > 0 + ? dedupedAll.filter((s) => Number(s.currentScore ?? 0) >= scoreFloor) + : dedupedAll; + if (scoreFloor > 0 && dedupedAll.length !== deduped.length) { + console.log( + `[digest] score floor dropped ${dedupedAll.length - deduped.length} ` + + `of ${dedupedAll.length} clusters (DIGEST_SCORE_MIN=${scoreFloor})`, + ); + } + // If the floor drained every cluster, return null with a distinct + // log line so operators can tell "floor too high" apart from "no + // stories in window" (the caller treats both as a skip but the + // root causes are different — without this line the main-loop + // "No stories in window" message never fires because [] is truthy + // and silences the diagnostic at the caller's guard). + if (deduped.length === 0) { + if (scoreFloor > 0 && dedupedAll.length > 0) { + console.log( + `[digest] score floor dropped ALL ${dedupedAll.length} clusters ` + + `(DIGEST_SCORE_MIN=${scoreFloor}) — skipping user`, + ); + } + return null; + } const top = deduped.slice(0, DIGEST_MAX_ITEMS); const allSourceCmds = []; diff --git a/tests/digest-score-floor.test.mjs b/tests/digest-score-floor.test.mjs new file mode 100644 index 000000000..1b36073b6 --- /dev/null +++ b/tests/digest-score-floor.test.mjs @@ -0,0 +1,104 @@ +/** + * Regression tests for the DIGEST_SCORE_MIN floor applied after the + * dedup step in scripts/seed-digest-notifications.mjs. + * + * Matches the repo's existing pattern for digest-mode regression + * tests (read the source, assert structural invariants) — the cron + * has a top-level env-exit block that makes importing it at test + * time fragile, so we guard on shape instead. + * + * Run: node --test tests/digest-score-floor.test.mjs + */ + +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const src = readFileSync( + resolve(__dirname, '../scripts/seed-digest-notifications.mjs'), + 'utf-8', +); + +describe('DIGEST_SCORE_MIN env floor', () => { + it('reads DIGEST_SCORE_MIN from process.env at call time', () => { + // Function-based read — not a module-level constant — so Railway + // env flips take effect on the next cron tick without a redeploy. + assert.match(src, /function\s+getDigestScoreMin\s*\(\)\s*\{/); + assert.match(src, /process\.env\.DIGEST_SCORE_MIN/); + }); + + it('default is 0 (no-op) so this PR is a behaviour-neutral ship', () => { + assert.match(src, /process\.env\.DIGEST_SCORE_MIN\s*\?\?\s*['"]0['"]/); + }); + + it('rejects non-integer / negative values', () => { + // The parser returns 0 on NaN / negative so a misconfigured env + // value degrades to "no floor" rather than blowing up the cron. + assert.match(src, /Number\.isInteger\(raw\)\s*&&\s*raw\s*>=\s*0\s*\?\s*raw\s*:\s*0/); + }); + + it('filter runs AFTER deduplicateStories (score is the rep cluster score)', () => { + // The representative's currentScore is the max within its cluster + // (materializeCluster sorts by currentScore DESC and takes items[0]), + // so filtering after dedup only drops clusters whose HIGHEST-scoring + // member is below the floor. + const dedupIdx = src.indexOf('await deduplicateStories(stories)'); + const filterIdx = src.indexOf('dedupedAll.filter'); + const sliceIdx = src.indexOf('DIGEST_MAX_ITEMS'); + assert.ok(dedupIdx > 0, 'deduplicateStories call must exist'); + assert.ok(filterIdx > 0, 'score-floor filter must exist'); + assert.ok(dedupIdx < filterIdx, 'filter must run after dedup'); + assert.ok( + filterIdx < src.indexOf('.slice(0, DIGEST_MAX_ITEMS)'), + 'filter must run before the top-30 slice', + ); + void sliceIdx; + }); + + it('short-circuits when floor is 0 (no wasted filter pass)', () => { + assert.match( + src, + /scoreFloor\s*>\s*0\s*\n?\s*\?\s*dedupedAll\.filter/, + ); + }); + + it('logs a "dropped N of M clusters" line when the floor fires', () => { + // Operators need to know how aggressive the floor is. Silent + // filtering on a per-tick basis would make it impossible to + // notice that the floor is dropping too much. The log spans + // two template fragments (concatenated with +) so we assert on + // the fragments independently rather than a cross-line regex. + assert.ok( + src.includes('score floor dropped'), + 'log fragment "score floor dropped" must be present', + ); + assert.ok( + src.includes('clusters (DIGEST_SCORE_MIN=${scoreFloor})'), + 'log fragment with the scoreFloor value must be present', + ); + }); + + it('returns null when floor drains every cluster (caller skips cleanly)', () => { + // Greptile P2 regression: if buildDigest returned [] rather than + // null when the floor emptied the list, the caller's `if (!stories)` + // guard (which checks falsiness, so [] slips through) would stop + // logging the canonical "No stories in window" line, and the + // only skip-signal would be a swallowed formatDigest=>null at the + // `!storyListPlain` check. Contract is: empty-after-floor returns + // null so the caller takes the same path as pre-dedup-empty. + assert.match(src, /if\s*\(\s*deduped\.length\s*===\s*0\s*\)\s*\{/); + // A distinct log line fires BEFORE the return so operators can + // tell "floor too high" apart from "no news today". + assert.ok( + src.includes('score floor dropped ALL'), + 'distinct "dropped ALL" log line must fire when the floor drains everything', + ); + assert.ok( + src.includes('skipping user'), + 'log line must mention the user is being skipped', + ); + }); +});