From 1958b34f554d67727f8fea710bdd0e4374a140c6 Mon Sep 17 00:00:00 2001 From: Elie Habib Date: Thu, 23 Apr 2026 11:25:05 +0400 Subject: [PATCH] fix(digest-dedup): CLUSTERING typo fallback fails closed to complete-link (#3331) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DIGEST_DEDUP_CLUSTERING previously fell to 'single' on unrecognised values, which silently defeated the documented kill switch. A typo like `DIGEST_DEDUP_CLUSTERING=complet` during an over-merge incident would stick with the aggressive single-link merger instead of rolling back to the conservative complete-link algorithm. Mirror the DIGEST_DEDUP_MODE typo pattern (PR #3247): - Unrecognised value → fall to 'complete' (SAFE / conservative). - Surface the raw value via new `invalidClusteringRaw` config field. - Emit a warn line on the dedup orchestrator's entry path so operators see the typo alongside the kill-switch-took-effect message. Valid values 'single' (default), 'complete', unset, empty, and any case variation all behave unchanged. Only true typos change behaviour — and the new behaviour is the kill-switch-safe one. Tests: updated the existing case that codified the old behaviour plus added coverage for (a) multiple typo variants falling to complete with invalidClusteringRaw set, (b) case-insensitive valid values not triggering the typo path, and (c) the orchestrator emitting the warn line even on the jaccard-kill-switch codepath (since CLUSTERING intent applies to both modes). 81/81 dedup tests pass. --- scripts/lib/brief-dedup.mjs | 40 +++++++++++++++++------ tests/brief-dedup-embedding.test.mjs | 48 ++++++++++++++++++++++++++-- 2 files changed, 76 insertions(+), 12 deletions(-) diff --git a/scripts/lib/brief-dedup.mjs b/scripts/lib/brief-dedup.mjs index bbc765554..0d736d14e 100644 --- a/scripts/lib/brief-dedup.mjs +++ b/scripts/lib/brief-dedup.mjs @@ -60,6 +60,7 @@ import { defaultRedisPipeline } from './_upstash-pipeline.mjs'; * topicGroupingEnabled: boolean, * topicThreshold: number, * invalidModeRaw: string | null, + * invalidClusteringRaw: string | null, * }} */ export function readOrchestratorConfig(env = process.env) { @@ -82,16 +83,30 @@ export function readOrchestratorConfig(env = process.env) { invalidModeRaw = modeRaw; } - // DIGEST_DEDUP_CLUSTERING = 'single' (default) | 'complete'. - // Single-link chains wire variants that share a strong - // intermediate headline (calibrated F1 0.73 vs complete-link 0.53 - // on real brief output). Flip to 'complete' for instant kill - // switch if single-link ever over-merges in production. + // DIGEST_DEDUP_CLUSTERING = 'single' (default when unset) | 'complete'. + // Single-link chains wire variants that share a strong intermediate + // headline (calibrated F1 0.73 vs complete-link 0.53 on real brief + // output). 'complete' is the documented kill switch for when single- + // link over-merges in production. + // + // Typo handling mirrors the MODE branch above: an unrecognised value + // falls to 'complete' (the SAFE / conservative algorithm), not back + // to 'single'. Rationale: if an operator is typing + // `DIGEST_DEDUP_CLUSTERING=complet` during an over-merge incident, + // silently sticking with the aggressive merger defeats the kill + // switch. The invalidClusteringRaw warn surfaces the typo so it's + // fixed, but the fail-closed default protects the cron meanwhile. const clusteringRaw = (env.DIGEST_DEDUP_CLUSTERING ?? '').toLowerCase(); - const clustering = - clusteringRaw === 'complete' ? 'complete' - : clusteringRaw === 'single' || clusteringRaw === '' ? 'single' - : 'single'; + let clustering; + let invalidClusteringRaw = null; + if (clusteringRaw === '' || clusteringRaw === 'single') { + clustering = 'single'; + } else if (clusteringRaw === 'complete') { + clustering = 'complete'; + } else { + clustering = 'complete'; + invalidClusteringRaw = clusteringRaw; + } const cosineRaw = Number.parseFloat(env.DIGEST_DEDUP_COSINE_THRESHOLD ?? ''); const cosineThreshold = @@ -123,6 +138,7 @@ export function readOrchestratorConfig(env = process.env) { topicGroupingEnabled, topicThreshold, invalidModeRaw, + invalidClusteringRaw, }; } @@ -160,6 +176,12 @@ export async function deduplicateStories(stories, deps = {}) { 'falling back to jaccard (safe rollback path). Valid values: embed | jaccard.', ); } + if (cfg.invalidClusteringRaw !== null) { + warn( + `[digest] dedup unrecognised DIGEST_DEDUP_CLUSTERING=${cfg.invalidClusteringRaw} — ` + + 'falling back to complete-link (safe / conservative). Valid values: single | complete.', + ); + } if (!Array.isArray(stories) || stories.length === 0) { return { reps: [], embeddingByHash: new Map(), logSummary: '' }; diff --git a/tests/brief-dedup-embedding.test.mjs b/tests/brief-dedup-embedding.test.mjs index fc65bb6f3..fb8a6d7be 100644 --- a/tests/brief-dedup-embedding.test.mjs +++ b/tests/brief-dedup-embedding.test.mjs @@ -685,10 +685,52 @@ describe('readOrchestratorConfig — DIGEST_DEDUP_CLUSTERING', () => { const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'complete' }); assert.equal(cfg.clustering, 'complete'); }); - it('unrecognised values fall back to single (default, safe)', async () => { + it('unrecognised values fall back to COMPLETE (fail-closed kill switch), surfaces invalidClusteringRaw', async () => { + // Mirrors the MODE typo contract: a typo like CLUSTERING=complet + // during an over-merge incident must NOT silently stick with the + // aggressive 'single' merger — that defeats the kill switch. Fall + // to the SAFE conservative algorithm ('complete') and surface the + // raw value so the typo is visible in logs. const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); - const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'average' }); - assert.equal(cfg.clustering, 'single'); + for (const raw of ['average', 'complet', 'SINGLE ', 'xyz']) { + const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: raw }); + assert.equal(cfg.clustering, 'complete', `raw=${JSON.stringify(raw)} should fall to complete`); + assert.equal(cfg.invalidClusteringRaw, raw.toLowerCase(), `raw=${JSON.stringify(raw)} should surface as invalidClusteringRaw`); + } + }); + it('case-insensitive on valid values (single/SINGLE/Complete all work)', async () => { + const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); + assert.equal(readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'SINGLE' }).clustering, 'single'); + assert.equal(readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'Complete' }).clustering, 'complete'); + assert.equal(readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'complete' }).invalidClusteringRaw, null); + }); + it('explicit "single" and unset produce invalidClusteringRaw=null', async () => { + const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); + for (const env of [{}, { DIGEST_DEDUP_CLUSTERING: 'single' }, { DIGEST_DEDUP_CLUSTERING: '' }]) { + const cfg = readOrchestratorConfig(env); + assert.equal(cfg.clustering, 'single'); + assert.equal(cfg.invalidClusteringRaw, null); + } + }); + it('explicit "complete" produces invalidClusteringRaw=null', async () => { + const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); + const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'complete' }); + assert.equal(cfg.clustering, 'complete'); + assert.equal(cfg.invalidClusteringRaw, null); + }); + it('deduplicateStories emits warn line on CLUSTERING typo', async () => { + const { deduplicateStories } = await import('../scripts/lib/brief-dedup.mjs'); + const warns = []; + await deduplicateStories([], { + env: { DIGEST_DEDUP_MODE: 'jaccard', DIGEST_DEDUP_CLUSTERING: 'complet' }, + warn: (line) => warns.push(line), + }); + // Even the jaccard-kill-switch path must surface the CLUSTERING typo + // since the operator intent (conservative path) is valid in both modes. + assert.ok( + warns.some((w) => /DIGEST_DEDUP_CLUSTERING=complet/.test(w) && /complete-link/.test(w)), + `expected typo warn; got: ${JSON.stringify(warns)}`, + ); }); it('structured logSummary includes clustering=', async () => { const { deduplicateStories } = await import('../scripts/lib/brief-dedup.mjs');