diff --git a/scripts/lib/brief-dedup.mjs b/scripts/lib/brief-dedup.mjs index bbc765554..0d736d14e 100644 --- a/scripts/lib/brief-dedup.mjs +++ b/scripts/lib/brief-dedup.mjs @@ -60,6 +60,7 @@ import { defaultRedisPipeline } from './_upstash-pipeline.mjs'; * topicGroupingEnabled: boolean, * topicThreshold: number, * invalidModeRaw: string | null, + * invalidClusteringRaw: string | null, * }} */ export function readOrchestratorConfig(env = process.env) { @@ -82,16 +83,30 @@ export function readOrchestratorConfig(env = process.env) { invalidModeRaw = modeRaw; } - // DIGEST_DEDUP_CLUSTERING = 'single' (default) | 'complete'. - // Single-link chains wire variants that share a strong - // intermediate headline (calibrated F1 0.73 vs complete-link 0.53 - // on real brief output). Flip to 'complete' for instant kill - // switch if single-link ever over-merges in production. + // DIGEST_DEDUP_CLUSTERING = 'single' (default when unset) | 'complete'. + // Single-link chains wire variants that share a strong intermediate + // headline (calibrated F1 0.73 vs complete-link 0.53 on real brief + // output). 'complete' is the documented kill switch for when single- + // link over-merges in production. + // + // Typo handling mirrors the MODE branch above: an unrecognised value + // falls to 'complete' (the SAFE / conservative algorithm), not back + // to 'single'. Rationale: if an operator is typing + // `DIGEST_DEDUP_CLUSTERING=complet` during an over-merge incident, + // silently sticking with the aggressive merger defeats the kill + // switch. The invalidClusteringRaw warn surfaces the typo so it's + // fixed, but the fail-closed default protects the cron meanwhile. const clusteringRaw = (env.DIGEST_DEDUP_CLUSTERING ?? '').toLowerCase(); - const clustering = - clusteringRaw === 'complete' ? 'complete' - : clusteringRaw === 'single' || clusteringRaw === '' ? 'single' - : 'single'; + let clustering; + let invalidClusteringRaw = null; + if (clusteringRaw === '' || clusteringRaw === 'single') { + clustering = 'single'; + } else if (clusteringRaw === 'complete') { + clustering = 'complete'; + } else { + clustering = 'complete'; + invalidClusteringRaw = clusteringRaw; + } const cosineRaw = Number.parseFloat(env.DIGEST_DEDUP_COSINE_THRESHOLD ?? ''); const cosineThreshold = @@ -123,6 +138,7 @@ export function readOrchestratorConfig(env = process.env) { topicGroupingEnabled, topicThreshold, invalidModeRaw, + invalidClusteringRaw, }; } @@ -160,6 +176,12 @@ export async function deduplicateStories(stories, deps = {}) { 'falling back to jaccard (safe rollback path). Valid values: embed | jaccard.', ); } + if (cfg.invalidClusteringRaw !== null) { + warn( + `[digest] dedup unrecognised DIGEST_DEDUP_CLUSTERING=${cfg.invalidClusteringRaw} — ` + + 'falling back to complete-link (safe / conservative). Valid values: single | complete.', + ); + } if (!Array.isArray(stories) || stories.length === 0) { return { reps: [], embeddingByHash: new Map(), logSummary: '' }; diff --git a/tests/brief-dedup-embedding.test.mjs b/tests/brief-dedup-embedding.test.mjs index fc65bb6f3..fb8a6d7be 100644 --- a/tests/brief-dedup-embedding.test.mjs +++ b/tests/brief-dedup-embedding.test.mjs @@ -685,10 +685,52 @@ describe('readOrchestratorConfig — DIGEST_DEDUP_CLUSTERING', () => { const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'complete' }); assert.equal(cfg.clustering, 'complete'); }); - it('unrecognised values fall back to single (default, safe)', async () => { + it('unrecognised values fall back to COMPLETE (fail-closed kill switch), surfaces invalidClusteringRaw', async () => { + // Mirrors the MODE typo contract: a typo like CLUSTERING=complet + // during an over-merge incident must NOT silently stick with the + // aggressive 'single' merger — that defeats the kill switch. Fall + // to the SAFE conservative algorithm ('complete') and surface the + // raw value so the typo is visible in logs. const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); - const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'average' }); - assert.equal(cfg.clustering, 'single'); + for (const raw of ['average', 'complet', 'SINGLE ', 'xyz']) { + const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: raw }); + assert.equal(cfg.clustering, 'complete', `raw=${JSON.stringify(raw)} should fall to complete`); + assert.equal(cfg.invalidClusteringRaw, raw.toLowerCase(), `raw=${JSON.stringify(raw)} should surface as invalidClusteringRaw`); + } + }); + it('case-insensitive on valid values (single/SINGLE/Complete all work)', async () => { + const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); + assert.equal(readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'SINGLE' }).clustering, 'single'); + assert.equal(readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'Complete' }).clustering, 'complete'); + assert.equal(readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'complete' }).invalidClusteringRaw, null); + }); + it('explicit "single" and unset produce invalidClusteringRaw=null', async () => { + const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); + for (const env of [{}, { DIGEST_DEDUP_CLUSTERING: 'single' }, { DIGEST_DEDUP_CLUSTERING: '' }]) { + const cfg = readOrchestratorConfig(env); + assert.equal(cfg.clustering, 'single'); + assert.equal(cfg.invalidClusteringRaw, null); + } + }); + it('explicit "complete" produces invalidClusteringRaw=null', async () => { + const { readOrchestratorConfig } = await import('../scripts/lib/brief-dedup.mjs'); + const cfg = readOrchestratorConfig({ DIGEST_DEDUP_CLUSTERING: 'complete' }); + assert.equal(cfg.clustering, 'complete'); + assert.equal(cfg.invalidClusteringRaw, null); + }); + it('deduplicateStories emits warn line on CLUSTERING typo', async () => { + const { deduplicateStories } = await import('../scripts/lib/brief-dedup.mjs'); + const warns = []; + await deduplicateStories([], { + env: { DIGEST_DEDUP_MODE: 'jaccard', DIGEST_DEDUP_CLUSTERING: 'complet' }, + warn: (line) => warns.push(line), + }); + // Even the jaccard-kill-switch path must surface the CLUSTERING typo + // since the operator intent (conservative path) is valid in both modes. + assert.ok( + warns.some((w) => /DIGEST_DEDUP_CLUSTERING=complet/.test(w) && /complete-link/.test(w)), + `expected typo warn; got: ${JSON.stringify(warns)}`, + ); }); it('structured logSummary includes clustering=', async () => { const { deduplicateStories } = await import('../scripts/lib/brief-dedup.mjs');