diff --git a/scripts/lib/_upstash-pipeline.mjs b/scripts/lib/_upstash-pipeline.mjs
new file mode 100644
index 000000000..50241785b
--- /dev/null
+++ b/scripts/lib/_upstash-pipeline.mjs
@@ -0,0 +1,39 @@
+/**
+ * Shared Upstash pipeline helper for scripts/lib/* modules.
+ *
+ * The canonical helper for api/* code is api/_upstash-json.js:redisPipeline.
+ * scripts/lib/* modules historically avoid importing from api/, so this
+ * file exposes the same behaviour (single POST to /pipeline, 10s timeout,
+ * returns null on failure) without the cross-dir import.
+ *
+ * Keep the shape identical to api/_upstash-json.js:redisPipeline so
+ * callers can be swapped if we ever relax the boundary.
+ */
+
+/**
+ * @param {Array<unknown[]>} commands   Upstash pipeline commands
+ * @param {object} [opts]
+ * @param {number} [opts.timeoutMs=10_000]
+ * @returns {Promise<Array<{result: unknown}> | null>}  null on any failure
+ */
+export async function defaultRedisPipeline(commands, { timeoutMs = 10_000 } = {}) {
+  const url = process.env.UPSTASH_REDIS_REST_URL;
+  const token = process.env.UPSTASH_REDIS_REST_TOKEN;
+  if (!url || !token || commands.length === 0) return null;
+  try {
+    const resp = await fetch(`${url}/pipeline`, {
+      method: 'POST',
+      headers: {
+        Authorization: `Bearer ${token}`,
+        'Content-Type': 'application/json',
+        'User-Agent': 'worldmonitor-digest/1.0',
+      },
+      body: JSON.stringify(commands),
+      signal: AbortSignal.timeout(timeoutMs),
+    });
+    if (!resp.ok) return null;
+    return await resp.json();
+  } catch {
+    return null;
+  }
+}
diff --git a/scripts/lib/brief-dedup-consts.mjs b/scripts/lib/brief-dedup-consts.mjs
new file mode 100644
index 000000000..a7f45fc98
--- /dev/null
+++ b/scripts/lib/brief-dedup-consts.mjs
@@ -0,0 +1,39 @@
+/**
+ * Tunables for brief-dedup (Jaccard legacy + embedding replacement).
+ *
+ * Env-driven helpers are exported as functions so the orchestrator
+ * reads them at call time, not at module load — Railway env-var flips
+ * must take effect without a redeploy.
+ *
+ * See docs/plans/2026-04-19-001-feat-embedding-based-story-dedup-plan.md.
+ */
+
+// ── Jaccard (legacy path, kept as permanent fallback) ───────────────────
+// Preserves origin/main behaviour byte-for-byte under MODE=jaccard.
+// Threshold 0.55 matches the production implementation prior to this PR.
+export const JACCARD_MERGE_THRESHOLD = 0.55;
+
+// ── Embedding / complete-link clustering ────────────────────────────────
+export const EMBED_MODEL = 'openai/text-embedding-3-small';
+export const EMBED_DIMS = 512;
+
+// Cache key prefix — version segment MUST bump on model or dimension
+// change. Silent threshold drift on model upgrade is the documented
+// #1 production regression; don't rely on TTL expiry to drain stale
+// vectors.
+export const CACHE_VERSION = 'v1:text-3-small-512';
+export const CACHE_KEY_PREFIX = `brief:emb:${CACHE_VERSION}`;
+export const CACHE_TTL_SECONDS = 14 * 24 * 60 * 60; // 14 days
+
+// OpenRouter embeddings endpoint (OpenAI-compatible passthrough).
+export const OPENROUTER_EMBEDDINGS_URL = 'https://openrouter.ai/api/v1/embeddings';
+
+// Env-driven runtime knobs live in brief-dedup.mjs:readOrchestratorConfig
+// — a single source of truth, read at call entry so Railway env flips
+// take effect on the next tick. An earlier version exported getter
+// helpers here too; they had zero callers and were deleted.
+
+// An earlier iteration exposed an `__constants` bag so tests could
+// assert against tunables in one deepEqual. Once the regex-extraction
+// harness was removed, named imports became cleaner — the bag got
+// deleted. If you need to assert a constant, import it directly.
diff --git a/scripts/lib/brief-dedup-embed.mjs b/scripts/lib/brief-dedup-embed.mjs
new file mode 100644
index 000000000..cf11a5a2e
--- /dev/null
+++ b/scripts/lib/brief-dedup-embed.mjs
@@ -0,0 +1,221 @@
+/**
+ * Pure clustering + entity-veto logic for the embedding dedup path.
+ *
+ * This module is intentionally pure and side-effect free:
+ *   - No Redis.
+ *   - No fetch.
+ *   - No env lookups (orchestrator reads env and passes thresholds in).
+ *
+ * The orchestrator in brief-dedup.mjs wires these helpers to the
+ * embedding client and the legacy Jaccard fallback.
+ */
+
+import { cosineSimilarity } from './brief-embedding.mjs';
+import {
+  COMMON_CAPITALIZED,
+  LOCATION_GAZETTEER,
+} from './entity-gazetteer.mjs';
+
+// ── Entity extraction / veto ───────────────────────────────────────────
+
+const CAPITALIZED_TOKEN_RE = /^[A-Z][a-zA-Z\-'.]{1,}$/;
+
+// Longest multi-word entry in the gazetteer (e.g. "ho chi minh city"
+// is 4 tokens). Precomputed once; the sliding window in
+// extractEntities never tries phrases longer than this, so the cost
+// stays O(N * MAX_PHRASE_LEN) rather than O(N²).
+const MAX_LOCATION_PHRASE_LEN = (() => {
+  let max = 1;
+  for (const entry of LOCATION_GAZETTEER) {
+    const len = entry.split(/\s+/).length;
+    if (len > max) max = len;
+  }
+  return max;
+})();
+
+function cleanToken(t) {
+  return t.replace(/[.,;:!?"')\]]+$/g, '').replace(/^["'([]+/g, '');
+}
+
+/**
+ * Pull proper-noun-like entities from a headline and classify them
+ * against the gazetteer.
+ *
+ * Locations are matched as **whole phrases** — single tokens like
+ * "Tokyo" AND multi-token phrases like "Red Sea", "Strait of Hormuz",
+ * "New York", "Abu Dhabi" all work. An earlier version tokenized on
+ * whitespace and only checked single tokens, which silently made
+ * ~30% of the gazetteer unreachable (bodies of water, regions,
+ * compound city names). That turned off the veto for a whole class
+ * of real headlines — hence the sliding-window greedy match below.
+ *
+ * Rules:
+ *   1. Tokenize on whitespace, strip surrounding punctuation.
+ *   2. Greedy match: at each position, try the longest multi-word
+ *      location phrase first, down to 2 tokens. A phrase matches
+ *      only when its first AND last tokens are capitalized (so
+ *      "the middle east" in lowercase prose doesn't match, but
+ *      "Middle East" in a headline does). Lowercase connectors
+ *      like "of" / "and" may appear between them.
+ *   3. If no multi-word match: fall back to single-token lookup.
+ *      Capitalized + not in COMMON_CAPITALIZED → Location if in
+ *      gazetteer, Actor otherwise.
+ *
+ * Sentence-start tokens are intentionally kept — news headlines
+ * front-load the anchor entity ("Iran...", "Trump...").
+ *
+ * @param {string} title
+ * @returns {{ locations: string[], actors: string[] }}
+ */
+export function extractEntities(title) {
+  if (typeof title !== 'string' || title.length === 0) {
+    return { locations: [], actors: [] };
+  }
+  const tokens = title.split(/\s+/).map(cleanToken).filter(Boolean);
+
+  const locations = new Set();
+  const actors = new Set();
+  let i = 0;
+  while (i < tokens.length) {
+    // Greedy longest-phrase scan for multi-word locations.
+    let matchedLen = 0;
+    const maxTry = Math.min(MAX_LOCATION_PHRASE_LEN, tokens.length - i);
+    for (let L = maxTry; L >= 2; L--) {
+      const first = tokens[i];
+      const last = tokens[i + L - 1];
+      if (!CAPITALIZED_TOKEN_RE.test(first) || !CAPITALIZED_TOKEN_RE.test(last)) {
+        continue;
+      }
+      const phrase = tokens.slice(i, i + L).join(' ').toLowerCase();
+      if (LOCATION_GAZETTEER.has(phrase)) {
+        locations.add(phrase);
+        matchedLen = L;
+        break;
+      }
+    }
+    if (matchedLen > 0) {
+      i += matchedLen;
+      continue;
+    }
+    // Single-token classification.
+    const tok = tokens[i];
+    if (CAPITALIZED_TOKEN_RE.test(tok)) {
+      const lower = tok.toLowerCase();
+      if (!COMMON_CAPITALIZED.has(lower)) {
+        if (LOCATION_GAZETTEER.has(lower)) {
+          locations.add(lower);
+        } else {
+          actors.add(lower);
+        }
+      }
+    }
+    i += 1;
+  }
+  return {
+    locations: [...locations],
+    actors: [...actors],
+  };
+}
+
+/**
+ * Pairwise merge-veto.
+ *
+ * Fires when two titles share at least one location AND each side
+ * has at least one actor the other doesn't — "same venue, different
+ * protagonists" (canonical case: "Biden meets Xi in Tokyo" vs
+ * "Biden meets Putin in Tokyo").
+ *
+ * Empty proper-noun sets on either side → defer to cosine (return false).
+ *
+ * @param {string} titleA
+ * @param {string} titleB
+ * @returns {boolean}
+ */
+export function shouldVeto(titleA, titleB) {
+  const a = extractEntities(titleA);
+  const b = extractEntities(titleB);
+
+  if (a.actors.length === 0 && b.actors.length === 0) return false;
+
+  const bLocSet = new Set(b.locations);
+  const sharedLocation = a.locations.some((loc) => bLocSet.has(loc));
+  if (!sharedLocation) return false;
+
+  const aActorSet = new Set(a.actors);
+  const bActorSet = new Set(b.actors);
+  const aHasUnique = a.actors.some((act) => !bActorSet.has(act));
+  const bHasUnique = b.actors.some((act) => !aActorSet.has(act));
+  return aHasUnique && bHasUnique;
+}
+
+// ── Complete-link clustering ───────────────────────────────────────────
+
+/**
+ * Greedy first-fit complete-link clustering.
+ *
+ * Admission rule: a candidate joins an existing cluster ONLY IF, for
+ * every member already in that cluster:
+ *   1. cosine(candidate.embedding, member.embedding) >= cosineThreshold
+ *   2. vetoFn(candidate, member) === false  (if vetoFn provided)
+ *
+ * Single-link would admit C into {A,B} as long as C~B clears the bar,
+ * even if cosine(A,C) is low — the transitive chaining that re-
+ * created the bridge-pollution failure mode on the Jaccard side. We
+ * do NOT want that.
+ *
+ * Input items MUST be pre-sorted by the caller (the orchestrator in
+ * brief-dedup.mjs sorts by [currentScore DESC, sha256(title) ASC]).
+ * Changing input order changes cluster composition; the orchestrator
+ * owns the determinism contract.
+ *
+ * @param {Array<{title:string, embedding:number[]}>} items
+ * @param {object} opts
+ * @param {number} opts.cosineThreshold
+ * @param {((a: {title:string}, b: {title:string}) => boolean) | null} [opts.vetoFn]
+ * @returns {{ clusters: number[][], vetoFires: number }}
+ */
+export function completeLinkCluster(items, { cosineThreshold, vetoFn = null }) {
+  if (!Array.isArray(items)) {
+    return { clusters: [], vetoFires: 0 };
+  }
+
+  const clusters = [];
+  let vetoFires = 0;
+
+  for (let i = 0; i < items.length; i++) {
+    const candidate = items[i];
+    if (!candidate || !Array.isArray(candidate.embedding)) {
+      // Defensive: if an item somehow lacks an embedding, it goes in
+      // its own cluster rather than poisoning the whole batch.
+      clusters.push([i]);
+      continue;
+    }
+
+    let joined = false;
+    for (const cluster of clusters) {
+      let admissible = true;
+      for (const j of cluster) {
+        const member = items[j];
+        const cos = cosineSimilarity(candidate.embedding, member.embedding);
+        if (cos < cosineThreshold) {
+          admissible = false;
+          break;
+        }
+        if (vetoFn?.(candidate, member)) {
+          admissible = false;
+          vetoFires += 1;
+          break;
+        }
+      }
+      if (admissible) {
+        cluster.push(i);
+        joined = true;
+        break;
+      }
+    }
+    if (!joined) clusters.push([i]);
+  }
+
+  return { clusters, vetoFires };
+}
+
diff --git a/scripts/lib/brief-dedup-jaccard.mjs b/scripts/lib/brief-dedup-jaccard.mjs
new file mode 100644
index 000000000..39c08f3c9
--- /dev/null
+++ b/scripts/lib/brief-dedup-jaccard.mjs
@@ -0,0 +1,114 @@
+/**
+ * Bag-of-words Jaccard dedup — extracted verbatim from the earlier
+ * inline implementation in scripts/seed-digest-notifications.mjs so
+ * the embedding orchestrator can fall back to the exact historical
+ * behaviour on any failure (provider outage, wall-clock overrun,
+ * REMOTE_EMBED_ENABLED=0, MODE=jaccard).
+ *
+ * DO NOT tune the threshold here. If embedding accuracy is still
+ * short of the flip criterion at the end of the shadow window, fix
+ * calibration or the cosine threshold — not this fallback. This
+ * function's contract is "whatever production did before the
+ * embedding path landed".
+ */
+
+// ── Stop-word set ────────────────────────────────────────────────────
+// Pruned list of highly-common tokens that dominate Jaccard numerators
+// without carrying topical signal. Extracted unchanged from the
+// pre-embedding seed-digest-notifications.mjs.
+const STOP_WORDS = new Set([
+  'the', 'a', 'an', 'in', 'on', 'at', 'to', 'for', 'of', 'is', 'are', 'was', 'were',
+  'has', 'have', 'had', 'be', 'been', 'by', 'from', 'with', 'as', 'it', 'its',
+  'says', 'say', 'said', 'according', 'reports', 'report', 'officials', 'official',
+  'us', 'new', 'will', 'can', 'could', 'would', 'may', 'also', 'who', 'that', 'this',
+  'after', 'about', 'over', 'more', 'up', 'out', 'into', 'than', 'some', 'other',
+]);
+
+/**
+ * Strip wire-service attribution suffixes like " - Reuters" /
+ * " | AP News" / " - reuters.com" so headlines from the same event
+ * are comparable across outlets.
+ */
+export function stripSourceSuffix(title) {
+  return title
+    .replace(/\s*[-–—]\s*[\w\s.]+\.(?:com|org|net|co\.uk)\s*$/i, '')
+    .replace(/\s*[-–—]\s*(?:Reuters|AP News|BBC|CNN|Al Jazeera|France 24|DW News|PBS NewsHour|CBS News|NBC|ABC|Associated Press|The Guardian|NOS Nieuws|Tagesschau|CNBC|The National)\s*$/i, '');
+}
+
+/**
+ * Tokenise a headline into a lower-cased Set of content words, with
+ * stop-words and 1–2 char tokens dropped. The Set shape is what the
+ * Jaccard function expects.
+ */
+export function extractTitleWords(title) {
+  return new Set(
+    stripSourceSuffix(title)
+      .toLowerCase()
+      .replace(/[^\p{L}\p{N}\s]/gu, '')
+      .split(/\s+/)
+      .filter((w) => w.length > 2 && !STOP_WORDS.has(w)),
+  );
+}
+
+/**
+ * Classic Jaccard coefficient on two Sets. |A∩B| / |A∪B|. Returns 0
+ * when either Set is empty (no arithmetic surprise on Set(0)).
+ */
+export function jaccardSimilarity(setA, setB) {
+  if (setA.size === 0 || setB.size === 0) return 0;
+  let intersection = 0;
+  for (const w of setA) if (setB.has(w)) intersection++;
+  return intersection / (setA.size + setB.size - intersection);
+}
+
+/**
+ * Representative-selection + mentionCount-sum + mergedHashes contract
+ * that composeBriefFromDigestStories / sources-population rely on.
+ *
+ * Shared helper so the orchestrator's embed path and the Jaccard
+ * fallback apply identical semantics — drift here silently breaks
+ * downstream. Accepts an array of story refs (already a single
+ * cluster) and returns one story object.
+ *
+ * @param {Array<{hash:string, currentScore:number, mentionCount:number}>} items
+ */
+export function materializeCluster(items) {
+  const sorted = [...items].sort(
+    (a, b) => b.currentScore - a.currentScore || b.mentionCount - a.mentionCount,
+  );
+  const best = { ...sorted[0] };
+  if (sorted.length > 1) {
+    best.mentionCount = sorted.reduce((sum, s) => sum + s.mentionCount, 0);
+  }
+  best.mergedHashes = sorted.map((s) => s.hash);
+  return best;
+}
+
+/**
+ * Greedy single-link clustering by Jaccard > 0.55. Preserves the
+ * representative-selection + mentionCount-sum + mergedHashes contract
+ * that composeBriefFromDigestStories / sources-population rely on.
+ *
+ * Threshold is a hard-coded literal (not env-tunable) on purpose —
+ * this is the permanent fallback. If the number needs to change,
+ * the right answer is to flip the caller to MODE=embed with a
+ * properly-calibrated cosine threshold, not to fiddle with Jaccard.
+ *
+ * @param {Array<{title:string, currentScore:number, mentionCount:number, hash:string}>} stories
+ */
+export function deduplicateStoriesJaccard(stories) {
+  const clusters = [];
+  for (const story of stories) {
+    const words = extractTitleWords(story.title);
+    let merged = false;
+    for (const cluster of clusters) {
+      if (jaccardSimilarity(words, cluster.words) > 0.55) {
+        cluster.items.push(story);
+        merged = true;
+        break;
+      }
+    }
+    if (!merged) clusters.push({ words, items: [story] });
+  }
+  return clusters.map(({ items }) => materializeCluster(items));
+}
diff --git a/scripts/lib/brief-dedup.mjs b/scripts/lib/brief-dedup.mjs
new file mode 100644
index 000000000..a93083d43
--- /dev/null
+++ b/scripts/lib/brief-dedup.mjs
@@ -0,0 +1,205 @@
+/**
+ * Dedup orchestrator — the single entry point the digest cron calls
+ * to cluster its story list.
+ *
+ * Public: deduplicateStories(stories, deps?) returns the same shape
+ * the earlier inline Jaccard produced:
+ *   [{ ...representativeStoryFields, mentionCount, mergedHashes }, ...]
+ *
+ * Env knobs (read at call entry — Railway env flips take effect on
+ * the next cron tick without a redeploy):
+ *   DIGEST_DEDUP_MODE                 = 'embed' (default) | 'jaccard'
+ *                                       (jaccard = instant kill switch)
+ *   DIGEST_DEDUP_ENTITY_VETO_ENABLED  = '0' to bypass the actor/
+ *                                       location veto; default on
+ *   DIGEST_DEDUP_COSINE_THRESHOLD     = float in (0, 1], default 0.60
+ *   DIGEST_DEDUP_WALL_CLOCK_MS        = int ms, default 45000
+ *
+ * Anything non-{embed,jaccard} in MODE = jaccard with a loud warn so
+ * a typo can't stay hidden.
+ *
+ * All-or-nothing fallback: if the embed path throws for any reason
+ * (provider outage, timeout, missing API key, malformed response),
+ * the orchestrator falls back to Jaccard for the entire batch and
+ * emits a warn with `reason=<ErrorName>`. The cron NEVER fails
+ * because embeddings flaked.
+ */
+
+import { createHash } from 'node:crypto';
+
+import {
+  deduplicateStoriesJaccard,
+  materializeCluster,
+  stripSourceSuffix,
+} from './brief-dedup-jaccard.mjs';
+import {
+  completeLinkCluster,
+  shouldVeto,
+} from './brief-dedup-embed.mjs';
+import {
+  embedBatch,
+  normalizeForEmbedding,
+} from './brief-embedding.mjs';
+import { defaultRedisPipeline } from './_upstash-pipeline.mjs';
+
+// ── Config resolution (env read at call entry) ─────────────────────────
+
+/**
+ * @param {Record<string, string | undefined>} [env]
+ * @returns {{
+ *   mode: 'jaccard' | 'embed',
+ *   entityVetoEnabled: boolean,
+ *   cosineThreshold: number,
+ *   wallClockMs: number,
+ *   invalidModeRaw: string | null,
+ * }}
+ */
+export function readOrchestratorConfig(env = process.env) {
+  const modeRaw = (env.DIGEST_DEDUP_MODE ?? '').toLowerCase();
+  let mode;
+  let invalidModeRaw = null;
+  if (modeRaw === '' || modeRaw === 'embed') {
+    mode = 'embed';
+  } else if (modeRaw === 'jaccard') {
+    mode = 'jaccard';
+  } else {
+    // Unrecognised value — default to embed (the normal prod path)
+    // but surface so a DIGEST_DEDUP_MODE=embbed typo is obvious.
+    mode = 'embed';
+    invalidModeRaw = modeRaw;
+  }
+
+  const cosineRaw = Number.parseFloat(env.DIGEST_DEDUP_COSINE_THRESHOLD ?? '');
+  const cosineThreshold =
+    Number.isFinite(cosineRaw) && cosineRaw > 0 && cosineRaw <= 1 ? cosineRaw : 0.60;
+
+  const wallClockRaw = Number.parseInt(env.DIGEST_DEDUP_WALL_CLOCK_MS ?? '', 10);
+  const wallClockMs =
+    Number.isInteger(wallClockRaw) && wallClockRaw > 0 ? wallClockRaw : 45_000;
+
+  return {
+    mode,
+    entityVetoEnabled: env.DIGEST_DEDUP_ENTITY_VETO_ENABLED !== '0',
+    cosineThreshold,
+    wallClockMs,
+    invalidModeRaw,
+  };
+}
+
+// ── Helpers ────────────────────────────────────────────────────────────
+
+function titleHashHex(normalizedTitle) {
+  return createHash('sha256').update(normalizedTitle).digest('hex');
+}
+
+// ── Public entry point ─────────────────────────────────────────────────
+
+/**
+ * @param {Array<{hash:string, title:string, currentScore:number, mentionCount:number}>} stories
+ * @param {object} [deps]
+ * @param {Record<string,string|undefined>} [deps.env]
+ * @param {typeof embedBatch} [deps.embedBatch]
+ * @param {typeof deduplicateStoriesJaccard} [deps.jaccard]
+ * @param {typeof defaultRedisPipeline} [deps.redisPipeline]
+ * @param {() => number} [deps.now]
+ * @param {(line: string) => void} [deps.log]
+ * @param {(line: string) => void} [deps.warn]
+ */
+export async function deduplicateStories(stories, deps = {}) {
+  const cfg = readOrchestratorConfig(deps.env ?? process.env);
+  const jaccard = deps.jaccard ?? deduplicateStoriesJaccard;
+  const log = deps.log ?? ((line) => console.log(line));
+  const warn = deps.warn ?? ((line) => console.warn(line));
+
+  if (cfg.invalidModeRaw !== null) {
+    warn(
+      `[digest] dedup unrecognised DIGEST_DEDUP_MODE=${cfg.invalidModeRaw} — ` +
+        'defaulting to embed. Valid values: embed | jaccard.',
+    );
+  }
+
+  if (!Array.isArray(stories) || stories.length === 0) return [];
+
+  // Kill switch: Railway operator sets MODE=jaccard to instantly
+  // revert to the legacy deduper without a redeploy.
+  if (cfg.mode === 'jaccard') {
+    return jaccard(stories);
+  }
+
+  const embedImpl = deps.embedBatch ?? embedBatch;
+  const pipelineImpl = deps.redisPipeline ?? defaultRedisPipeline;
+  const nowImpl = deps.now ?? (() => Date.now());
+  const started = nowImpl();
+
+  try {
+    // Normalize + deterministic pre-sort so greedy first-fit is
+    // permutation-invariant (property-tested in the embed test file).
+    const prepared = stories.map((story, originalIndex) => {
+      const normalizedTitle = normalizeForEmbedding(story.title);
+      // `title` here is used as the veto input — must be case-
+      // preserving (extractEntities looks at capitalised tokens)
+      // but MUST NOT carry wire-source suffixes (" - Reuters" etc.)
+      // that would otherwise leak into the actor set and fire the
+      // veto on two copies of the same event from different outlets.
+      const vetoTitle = stripSourceSuffix(story.title);
+      return {
+        story,
+        originalIndex,
+        hash: story.hash,
+        title: vetoTitle,
+        normalizedTitle,
+        titleHashHex: titleHashHex(normalizedTitle),
+        currentScore: Number(story.currentScore ?? 0),
+        mentionCount: Number(story.mentionCount ?? 1),
+      };
+    });
+    prepared.sort(
+      (a, b) =>
+        b.currentScore - a.currentScore ||
+        (a.titleHashHex < b.titleHashHex ? -1 : a.titleHashHex > b.titleHashHex ? 1 : 0),
+    );
+
+    const embeddings = await embedImpl(
+      prepared.map((p) => p.normalizedTitle),
+      {
+        redisPipeline: pipelineImpl,
+        wallClockMs: cfg.wallClockMs,
+        now: nowImpl,
+      },
+    );
+    if (!Array.isArray(embeddings) || embeddings.length !== prepared.length) {
+      throw new Error('embedBatch returned unexpected result');
+    }
+    const items = prepared.map((p, i) => ({ ...p, embedding: embeddings[i] }));
+
+    const vetoFn = cfg.entityVetoEnabled
+      ? (a, b) => shouldVeto(a.title, b.title)
+      : null;
+    const clusterResult = completeLinkCluster(items, {
+      cosineThreshold: cfg.cosineThreshold,
+      vetoFn,
+    });
+
+    const embedClusters = clusterResult.clusters;
+    const embedOutput = embedClusters.map((cluster) =>
+      materializeCluster(cluster.map((i) => items[i].story)),
+    );
+
+    log(
+      `[digest] dedup mode=embed stories=${items.length} clusters=${embedClusters.length} ` +
+        `veto_fires=${clusterResult.vetoFires} ms=${nowImpl() - started} ` +
+        `threshold=${cfg.cosineThreshold} fallback=false`,
+    );
+    return embedOutput;
+  } catch (err) {
+    const reason =
+      err instanceof Error && typeof err.name === 'string' && err.name !== 'Error'
+        ? err.name
+        : 'other';
+    const msg = err instanceof Error ? err.message : String(err);
+    warn(
+      `[digest] dedup embed path failed, falling back to Jaccard reason=${reason} msg=${msg}`,
+    );
+    return jaccard(stories);
+  }
+}
diff --git a/scripts/lib/brief-embedding.mjs b/scripts/lib/brief-embedding.mjs
new file mode 100644
index 000000000..09d89a897
--- /dev/null
+++ b/scripts/lib/brief-embedding.mjs
@@ -0,0 +1,300 @@
+/**
+ * Embedding client for brief-dedup.
+ *
+ * Exports:
+ *   - normalizeForEmbedding(title): the SINGLE function that produces
+ *     both the embedded string and the cache-key input. No aliasing
+ *     possible (plan's "normalization contract").
+ *   - embedBatch(normalizedTitles, deps): batched, cached, all-or-
+ *     nothing. Throws EmbeddingTimeoutError on wall-clock overrun and
+ *     EmbeddingProviderError on any upstream failure. Never returns a
+ *     partial result.
+ *
+ * Contract details:
+ *   - Cache: brief:emb:v1:text-3-small-512:<sha256(normalized)>,
+ *     14-day TTL, JSON array of 512 numbers.
+ *   - Deterministic: same input → same output vectors (cache hits)
+ *     or same OpenRouter call (cache misses).
+ *   - `deps` is for tests — prod callers pass nothing and get the
+ *     real fetch / Upstash / AbortSignal wired in.
+ */
+
+import { createHash } from 'node:crypto';
+
+import {
+  CACHE_KEY_PREFIX,
+  CACHE_TTL_SECONDS,
+  EMBED_DIMS,
+  EMBED_MODEL,
+  OPENROUTER_EMBEDDINGS_URL,
+} from './brief-dedup-consts.mjs';
+import { stripSourceSuffix } from './brief-dedup-jaccard.mjs';
+import { defaultRedisPipeline } from './_upstash-pipeline.mjs';
+
+export class EmbeddingProviderError extends Error {
+  constructor(message, { status, cause } = {}) {
+    super(message);
+    this.name = 'EmbeddingProviderError';
+    if (status !== undefined) this.status = status;
+    if (cause !== undefined) this.cause = cause;
+  }
+}
+
+export class EmbeddingTimeoutError extends Error {
+  constructor(message = 'Embedding wall-clock budget exceeded') {
+    super(message);
+    this.name = 'EmbeddingTimeoutError';
+  }
+}
+
+/**
+ * The ONE normalisation function. Cache-key input = embed-request
+ * input. Any caller that embeds outside this function will drift.
+ *
+ *   1. Strip wire-service suffixes (" - Reuters", " | AP News", etc.)
+ *      via the shared stripSourceSuffix so the outlet allow-list is
+ *      single-sourced with the Jaccard fallback. Adding a new outlet
+ *      updates both paths at once.
+ *   2. Trim.
+ *   3. Collapse internal whitespace.
+ *   4. Lowercase.
+ */
+export function normalizeForEmbedding(title) {
+  if (typeof title !== 'string') return '';
+  return stripSourceSuffix(title).trim().replace(/\s+/g, ' ').toLowerCase();
+}
+
+export function cacheKeyFor(normalizedTitle) {
+  const hash = createHash('sha256').update(normalizedTitle).digest('hex');
+  return `${CACHE_KEY_PREFIX}:${hash}`;
+}
+
+// Default (production) deps wiring lives in ./_upstash-pipeline.mjs so
+// the orchestrator and the embedding client share one implementation.
+
+/**
+ * Look up a set of cache keys via the redis pipeline and return a
+ * Map of key → vector for the hits. Misses, corrupt cells, pipeline
+ * failures are all treated as "not in cache" — the caller falls
+ * through to the API.
+ *
+ * Kept as a helper so embedBatch's cognitive complexity stays
+ * reviewable; there's no other caller.
+ */
+async function cacheGetBatched(uniqueKeys, pipelineImpl) {
+  const hits = new Map();
+  if (uniqueKeys.length === 0) return hits;
+  const getResults = await pipelineImpl(uniqueKeys.map((k) => ['GET', k]));
+  if (!Array.isArray(getResults)) return hits;
+  for (let i = 0; i < uniqueKeys.length; i++) {
+    const cell = getResults[i];
+    const raw = cell && typeof cell === 'object' && 'result' in cell ? cell.result : null;
+    if (typeof raw !== 'string') continue;
+    try {
+      const parsed = JSON.parse(raw);
+      if (Array.isArray(parsed) && parsed.length === EMBED_DIMS) {
+        hits.set(uniqueKeys[i], parsed);
+      }
+    } catch {
+      // Corrupt cache cell: treat as miss. Don't error — next
+      // successful API call will overwrite.
+    }
+  }
+  return hits;
+}
+
+/**
+ * Single batched OpenRouter /embeddings call for `missingTitles`.
+ * Returns a number[N] where N = missingTitles.length. Throws
+ * EmbeddingTimeoutError on abort/timeout, EmbeddingProviderError on
+ * any other upstream failure. NEVER returns a partial result.
+ */
+async function callEmbeddingsApi({ fetchImpl, apiKey, missingTitles, timeoutMs }) {
+  // Negative / zero remaining-budget means the deadline is already past.
+  // Bail to the orchestrator's all-or-nothing fallback rather than open a
+  // doomed HTTP connection that blows the wall-clock cap by the floor.
+  if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
+    throw new EmbeddingTimeoutError();
+  }
+  let resp;
+  try {
+    resp = await fetchImpl(OPENROUTER_EMBEDDINGS_URL, {
+      method: 'POST',
+      headers: {
+        Authorization: `Bearer ${apiKey}`,
+        'Content-Type': 'application/json',
+        'HTTP-Referer': 'https://worldmonitor.app',
+        'X-Title': 'World Monitor',
+        'User-Agent': 'worldmonitor-digest/1.0',
+      },
+      body: JSON.stringify({
+        model: EMBED_MODEL,
+        input: missingTitles,
+        dimensions: EMBED_DIMS,
+      }),
+      signal: AbortSignal.timeout(timeoutMs),
+    });
+  } catch (err) {
+    if (err && (err.name === 'TimeoutError' || err.name === 'AbortError')) {
+      throw new EmbeddingTimeoutError();
+    }
+    throw new EmbeddingProviderError(
+      `embedBatch: fetch failed — ${err instanceof Error ? err.message : String(err)}`,
+      { cause: err },
+    );
+  }
+  if (!resp.ok) {
+    throw new EmbeddingProviderError(
+      `embedBatch: OpenRouter returned HTTP ${resp.status}`,
+      { status: resp.status },
+    );
+  }
+  let body;
+  try {
+    body = await resp.json();
+  } catch (err) {
+    throw new EmbeddingProviderError(
+      `embedBatch: response JSON parse failed — ${err instanceof Error ? err.message : String(err)}`,
+      { cause: err },
+    );
+  }
+  const data = Array.isArray(body?.data) ? body.data : null;
+  if (!data || data.length !== missingTitles.length) {
+    throw new EmbeddingProviderError(
+      `embedBatch: expected ${missingTitles.length} embeddings, got ${data?.length ?? 'none'}`,
+    );
+  }
+  // Honour entry.index if the provider re-orders; fall back to i.
+  const out = new Array(missingTitles.length);
+  for (let i = 0; i < data.length; i++) {
+    const entry = data[i];
+    const idx = typeof entry?.index === 'number' ? entry.index : i;
+    const vector = entry?.embedding;
+    if (!Array.isArray(vector) || vector.length !== EMBED_DIMS) {
+      throw new EmbeddingProviderError(
+        `embedBatch: embedding[${idx}] has unexpected length ${vector?.length ?? 'n/a'}`,
+      );
+    }
+    out[idx] = vector;
+  }
+  return out;
+}
+
+/**
+ * Embed a batch of already-normalised titles with cache look-through.
+ *
+ * @param {string[]} normalizedTitles  output of normalizeForEmbedding for each title
+ * @param {object} [deps]
+ * @param {typeof fetch} [deps.fetch]
+ * @param {(commands: Array<unknown[]>) => Promise<Array<{result: unknown}> | null>} [deps.redisPipeline]
+ * @param {() => number} [deps.now]
+ * @param {number} [deps.wallClockMs]
+ * @param {string} [deps._apiKey]  OPENROUTER_API_KEY override (tests only;
+ *   prefixed to discourage accidental spread from user-controlled objects)
+ * @returns {Promise<number[][]>}  one 512-dim vector per input, in order
+ *
+ * Throws EmbeddingTimeoutError on wall-clock overrun.
+ * Throws EmbeddingProviderError on any upstream / parse failure.
+ * NEVER returns a partial batch — the orchestrator relies on this to
+ * collapse the entire run to Jaccard on any failure.
+ */
+export async function embedBatch(normalizedTitles, deps = {}) {
+  if (!Array.isArray(normalizedTitles)) {
+    throw new EmbeddingProviderError('embedBatch: normalizedTitles must be an array');
+  }
+  if (normalizedTitles.length === 0) return [];
+
+  // Wrap rather than assign: bare `fetch` captures the current global
+  // binding at lookup time, so later monkey-patches (instrumentation,
+  // Edge-runtime shims) don't see the wrapper. See AGENTS.md's
+  // "fetch.bind(globalThis) is BANNED" rule — same class of bug.
+  const fetchImpl = deps.fetch ?? ((...args) => globalThis.fetch(...args));
+  const pipelineImpl = deps.redisPipeline ?? defaultRedisPipeline;
+  const nowImpl = deps.now ?? (() => Date.now());
+  const wallClockMs = deps.wallClockMs ?? 45_000;
+  const apiKey = deps._apiKey ?? process.env.OPENROUTER_API_KEY ?? '';
+
+  if (!apiKey) {
+    // Provider failure so the orchestrator falls back to Jaccard rather
+    // than silently embedding with no auth.
+    throw new EmbeddingProviderError('OPENROUTER_API_KEY not configured');
+  }
+
+  const deadline = nowImpl() + wallClockMs;
+
+  // Deduped cache-key table. Same normalised title → same cache cell.
+  const keyByIndex = normalizedTitles.map((t) => cacheKeyFor(t));
+  const uniqueKeys = [...new Set(keyByIndex)];
+
+  const vectorByKey = await cacheGetBatched(uniqueKeys, pipelineImpl);
+  if (nowImpl() > deadline) throw new EmbeddingTimeoutError();
+
+  // Build the miss list, preserving the first normalised title we
+  // saw for each unique key.
+  const missingKeys = uniqueKeys.filter((k) => !vectorByKey.has(k));
+  if (missingKeys.length > 0) {
+    const missingTitleByKey = new Map();
+    for (let i = 0; i < normalizedTitles.length; i++) {
+      if (!vectorByKey.has(keyByIndex[i]) && !missingTitleByKey.has(keyByIndex[i])) {
+        missingTitleByKey.set(keyByIndex[i], normalizedTitles[i]);
+      }
+    }
+    const missingTitles = missingKeys.map((k) => missingTitleByKey.get(k) ?? '');
+    const freshVectors = await callEmbeddingsApi({
+      fetchImpl,
+      apiKey,
+      missingTitles,
+      timeoutMs: deadline - nowImpl(),
+    });
+    const cacheWrites = [];
+    for (let i = 0; i < freshVectors.length; i++) {
+      const key = missingKeys[i];
+      vectorByKey.set(key, freshVectors[i]);
+      cacheWrites.push(['SET', key, JSON.stringify(freshVectors[i]), 'EX', String(CACHE_TTL_SECONDS)]);
+    }
+    // Cache writes are best-effort — a failure costs us a re-embed
+    // on the next run, never a correctness bug.
+    try {
+      await pipelineImpl(cacheWrites);
+    } catch {
+      // swallow
+    }
+  }
+
+  // Map back to input order; duplicated titles share a vector.
+  const out = new Array(normalizedTitles.length);
+  for (let i = 0; i < normalizedTitles.length; i++) {
+    const v = vectorByKey.get(keyByIndex[i]);
+    if (!v) {
+      throw new EmbeddingProviderError(
+        `embedBatch: missing vector for index ${i} after API call`,
+      );
+    }
+    out[i] = v;
+  }
+  return out;
+}
+
+/**
+ * Cosine similarity for two equal-length vectors. Returns a value
+ * in [-1, 1]; 1 = identical direction.
+ *
+ * Exported so the clusterer and tests share one implementation.
+ */
+export function cosineSimilarity(a, b) {
+  if (!Array.isArray(a) || !Array.isArray(b) || a.length !== b.length || a.length === 0) {
+    return 0;
+  }
+  let dot = 0;
+  let normA = 0;
+  let normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    const ai = a[i];
+    const bi = b[i];
+    dot += ai * bi;
+    normA += ai * ai;
+    normB += bi * bi;
+  }
+  if (normA === 0 || normB === 0) return 0;
+  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
diff --git a/scripts/lib/entity-gazetteer.mjs b/scripts/lib/entity-gazetteer.mjs
new file mode 100644
index 000000000..347f8451b
--- /dev/null
+++ b/scripts/lib/entity-gazetteer.mjs
@@ -0,0 +1,144 @@
+/**
+ * Static gazetteer for the dedup entity veto.
+ *
+ * Pure data, no network. Loaded once at module init. Small enough to
+ * audit in a diff; drift is visible in `git log -- scripts/lib/entity-gazetteer.mjs`.
+ *
+ * Classification rule used by brief-dedup-embed:
+ *   - token ∈ LOCATION_GAZETTEER → Location class
+ *   - capitalized token ∉ LOCATION_GAZETTEER ∉ COMMON_CAPITALIZED → Actor class
+ *
+ * All entries are lowercase; the veto lowercases tokens before lookup.
+ *
+ * Known heuristic limitation: coreferential pairs (Iran/Tehran, US/Washington,
+ * UK/London, Russia/Kremlin) are NOT collapsed — they'd need a name-normaliser
+ * which is explicitly out of scope for v1. The resulting false-negative
+ * ("same event in different capital-name vocabulary stays separate") is the
+ * same class we already tolerate (see plan: "Documented failure classes").
+ */
+
+// ── Locations: countries, major cities, regions, bodies of water ───────
+
+const COUNTRIES = [
+  // ISO-3166 short names (lowercase). Not exhaustive — top ~80 by wire
+  // frequency; add on demand. Full ISO list is cheap to drop in later
+  // if we see gaps in the calibration data.
+  'afghanistan', 'albania', 'algeria', 'argentina', 'armenia', 'australia',
+  'austria', 'azerbaijan', 'bahrain', 'bangladesh', 'belarus', 'belgium',
+  'bolivia', 'bosnia', 'brazil', 'bulgaria', 'cambodia', 'cameroon', 'canada',
+  'chile', 'china', 'colombia', 'congo', 'croatia', 'cuba', 'cyprus', 'czechia',
+  'denmark', 'ecuador', 'egypt', 'eritrea', 'estonia', 'ethiopia', 'finland',
+  'france', 'georgia', 'germany', 'ghana', 'greece', 'guatemala', 'haiti',
+  'honduras', 'hungary', 'iceland', 'india', 'indonesia', 'iran', 'iraq',
+  'ireland', 'israel', 'italy', 'japan', 'jordan', 'kazakhstan', 'kenya',
+  'kuwait', 'kyrgyzstan', 'laos', 'latvia', 'lebanon', 'libya', 'lithuania',
+  'luxembourg', 'malaysia', 'maldives', 'mali', 'malta', 'mexico', 'moldova',
+  'mongolia', 'montenegro', 'morocco', 'mozambique', 'myanmar', 'nepal',
+  'netherlands', 'nicaragua', 'niger', 'nigeria', 'norway', 'oman', 'pakistan',
+  'panama', 'paraguay', 'peru', 'philippines', 'poland', 'portugal', 'qatar',
+  'romania', 'russia', 'rwanda', 'serbia', 'singapore', 'slovakia', 'slovenia',
+  'somalia', 'spain', 'sudan', 'sweden', 'switzerland', 'syria', 'taiwan',
+  'tajikistan', 'tanzania', 'thailand', 'tunisia', 'turkey', 'turkmenistan',
+  'uganda', 'ukraine', 'uruguay', 'uzbekistan', 'venezuela', 'vietnam',
+  'yemen', 'zambia', 'zimbabwe',
+  // Common short forms / alternate spellings used in wire headlines.
+  'us', 'usa', 'uk', 'uae', 'drc', 'prc', 'rok', 'dprk',
+];
+
+const CITIES = [
+  // Top ~120 cities by wire-headline frequency. Ordered roughly by
+  // region for diff-readability, not alphabetical.
+  // North America
+  'washington', 'new york', 'los angeles', 'chicago', 'houston', 'miami',
+  'atlanta', 'boston', 'seattle', 'philadelphia', 'detroit', 'dallas',
+  'ottawa', 'toronto', 'montreal', 'vancouver', 'mexico city', 'havana',
+  // Europe
+  'london', 'paris', 'berlin', 'brussels', 'amsterdam', 'rome', 'madrid',
+  'lisbon', 'dublin', 'vienna', 'prague', 'warsaw', 'budapest', 'athens',
+  'stockholm', 'copenhagen', 'oslo', 'helsinki', 'zurich', 'geneva', 'bern',
+  'milan', 'naples', 'barcelona', 'munich', 'frankfurt', 'hamburg',
+  'edinburgh', 'glasgow', 'manchester', 'belfast',
+  // Eastern Europe / Russia
+  'moscow', 'st petersburg', 'kyiv', 'kiev', 'odesa', 'odessa', 'kharkiv',
+  'lviv', 'mariupol', 'donetsk', 'luhansk', 'minsk', 'chisinau', 'tbilisi',
+  'yerevan', 'baku', 'bucharest', 'sofia', 'belgrade', 'zagreb', 'sarajevo',
+  'pristina', 'skopje', 'tirana', 'vilnius', 'riga', 'tallinn',
+  // Middle East / North Africa
+  'tehran', 'baghdad', 'damascus', 'beirut', 'amman', 'riyadh', 'doha',
+  'dubai', 'abu dhabi', 'kuwait city', 'muscat', 'manama', 'sanaa', 'aden',
+  'jerusalem', 'tel aviv', 'gaza', 'ramallah', 'cairo', 'alexandria',
+  'tripoli', 'benghazi', 'tunis', 'algiers', 'casablanca', 'rabat',
+  'ankara', 'istanbul', 'izmir',
+  // Africa (sub-Saharan)
+  'lagos', 'abuja', 'accra', 'nairobi', 'addis ababa', 'khartoum', 'juba',
+  'kampala', 'kigali', 'dakar', 'bamako', 'ouagadougou', 'niamey',
+  'johannesburg', 'cape town', 'pretoria', 'harare', 'lusaka', 'maputo',
+  'kinshasa', 'brazzaville', 'luanda', 'antananarivo', 'mogadishu',
+  // Asia
+  'beijing', 'shanghai', 'hong kong', 'taipei', 'tokyo', 'osaka', 'kyoto',
+  'seoul', 'pyongyang', 'delhi', 'new delhi', 'mumbai', 'kolkata', 'chennai',
+  'bengaluru', 'hyderabad', 'islamabad', 'karachi', 'lahore', 'kabul',
+  'dhaka', 'colombo', 'kathmandu', 'rangoon', 'yangon', 'naypyidaw',
+  'bangkok', 'phnom penh', 'hanoi', 'ho chi minh city', 'saigon',
+  'vientiane', 'kuala lumpur', 'jakarta', 'manila', 'singapore',
+  // Oceania
+  'canberra', 'sydney', 'melbourne', 'auckland', 'wellington',
+  // Latin America (south)
+  'brasilia', 'rio de janeiro', 'sao paulo', 'buenos aires', 'santiago',
+  'lima', 'bogota', 'caracas', 'quito', 'la paz', 'asuncion', 'montevideo',
+];
+
+const REGIONS = [
+  // Geopolitical / geographic regions that travel as wire headline entities.
+  'middle east', 'north africa', 'sub-saharan africa', 'horn of africa',
+  'west bank', 'gaza strip', 'sinai',
+  'balkans', 'caucasus', 'central asia', 'south asia', 'southeast asia',
+  'east asia', 'latin america', 'caribbean', 'scandinavia', 'baltics',
+  'eu', 'nato', 'asean', 'gulf',
+  // Bodies of water / straits that name-drive news events.
+  'hormuz', 'strait of hormuz', 'bab el-mandeb', 'red sea', 'black sea',
+  'south china sea', 'east china sea', 'baltic sea', 'mediterranean',
+  'persian gulf', 'arabian gulf', 'gulf of aden', 'gulf of oman',
+  'taiwan strait', 'english channel',
+  // Commonly-named disputed / conflict zones.
+  'donbas', 'donbass', 'crimea', 'kashmir', 'tibet', 'xinjiang',
+  'nagorno-karabakh', 'transnistria',
+];
+
+// Country names are INTENTIONALLY NOT in LOCATION_GAZETTEER — in news
+// headlines the country is usually the political actor ("Iran closes
+// Hormuz") not the venue, so the veto classifies country tokens as
+// actors. The COUNTRIES array is still exported below so a caller
+// that needs the list (e.g. a future NER pass) can consume it.
+export const LOCATION_GAZETTEER = new Set([
+  ...CITIES,
+  ...REGIONS,
+]);
+
+export const COUNTRY_NAMES = new Set(COUNTRIES);
+
+// ── Common capitalized English words that are NOT entities ─────────────
+// Sentence-initial capitalisation and a few idiom-openers that the
+// veto would otherwise pick up as proper nouns.
+
+export const COMMON_CAPITALIZED = new Set([
+  // Articles / determiners / pronouns
+  'the', 'a', 'an', 'this', 'that', 'these', 'those', 'some', 'any',
+  'all', 'every', 'each', 'no', 'none', 'other', 'another',
+  'i', 'you', 'he', 'she', 'it', 'we', 'they', 'my', 'your', 'his',
+  'her', 'its', 'our', 'their',
+  // Prepositions / conjunctions / common sentence starters
+  'in', 'on', 'at', 'by', 'to', 'for', 'of', 'with', 'from', 'up',
+  'down', 'out', 'into', 'onto', 'over', 'under', 'after', 'before',
+  'during', 'since', 'until', 'as', 'than', 'then', 'so', 'but', 'and',
+  'or', 'nor', 'yet', 'if', 'because', 'while', 'when', 'where',
+  'why', 'how', 'who', 'what', 'which', 'whose',
+  // Common news-headline sentence starters
+  'breaking', 'report', 'reports', 'new', 'latest', 'today', 'yesterday',
+  'now', 'live', 'updates', 'update', 'analysis', 'opinion', 'exclusive',
+  'video', 'watch', 'listen', 'read',
+  // Auxiliary / modal verbs that may lead a headline
+  'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has',
+  'had', 'do', 'does', 'did', 'can', 'could', 'will', 'would', 'shall',
+  'should', 'may', 'might', 'must',
+]);
diff --git a/scripts/seed-digest-notifications.mjs b/scripts/seed-digest-notifications.mjs
index d33fe6cfe..9fc617eaf 100644
--- a/scripts/seed-digest-notifications.mjs
+++ b/scripts/seed-digest-notifications.mjs
@@ -39,6 +39,8 @@ import {
 import { enrichBriefEnvelopeWithLLM } from './lib/brief-llm.mjs';
 import { assertBriefEnvelope } from '../server/_shared/brief-render.js';
 import { signBriefUrl, BriefUrlError } from './lib/brief-url-sign.mjs';
+import { deduplicateStories } from './lib/brief-dedup.mjs';
+import { stripSourceSuffix } from './lib/brief-dedup-jaccard.mjs';
 
 // ── Config ────────────────────────────────────────────────────────────────────
 
@@ -224,66 +226,15 @@ function matchesSensitivity(ruleSensitivity, severity) {
   return severity === 'critical';
 }
 
-// ── Fuzzy deduplication ──────────────────────────────────────────────────────
-
-const STOP_WORDS = new Set([
-  'the','a','an','in','on','at','to','for','of','is','are','was','were',
-  'has','have','had','be','been','by','from','with','as','it','its',
-  'says','say','said','according','reports','report','officials','official',
-  'us','new','will','can','could','would','may','also','who','that','this',
-  'after','about','over','more','up','out','into','than','some','other',
-]);
-
-function stripSourceSuffix(title) {
-  return title
-    .replace(/\s*[-–—]\s*[\w\s.]+\.(?:com|org|net|co\.uk)\s*$/i, '')
-    .replace(/\s*[-–—]\s*(?:Reuters|AP News|BBC|CNN|Al Jazeera|France 24|DW News|PBS NewsHour|CBS News|NBC|ABC|Associated Press|The Guardian|NOS Nieuws|Tagesschau|CNBC|The National)\s*$/i, '');
-}
-
-function extractTitleWords(title) {
-  return new Set(
-    stripSourceSuffix(title)
-      .toLowerCase()
-      .replace(/[^\p{L}\p{N}\s]/gu, '')
-      .split(/\s+/)
-      .filter(w => w.length > 2 && !STOP_WORDS.has(w)),
-  );
-}
-
-function jaccardSimilarity(setA, setB) {
-  if (setA.size === 0 || setB.size === 0) return 0;
-  let intersection = 0;
-  for (const w of setA) if (setB.has(w)) intersection++;
-  return intersection / (setA.size + setB.size - intersection);
-}
-
-function deduplicateStories(stories) {
-  const clusters = [];
-  for (const story of stories) {
-    const words = extractTitleWords(story.title);
-    let merged = false;
-    for (const cluster of clusters) {
-      if (jaccardSimilarity(words, cluster.words) > 0.55) {
-        cluster.items.push(story);
-        merged = true;
-        break;
-      }
-    }
-    if (!merged) clusters.push({ words, items: [story] });
-  }
-  return clusters.map(({ items }) => {
-    items.sort((a, b) => b.currentScore - a.currentScore || b.mentionCount - a.mentionCount);
-    const best = { ...items[0] };
-    if (items.length > 1) {
-      best.mentionCount = items.reduce((sum, s) => sum + s.mentionCount, 0);
-    }
-    best.mergedHashes = items.map(s => s.hash);
-    return best;
-  });
-}
-
 // ── Digest content ────────────────────────────────────────────────────────────
 
+// Dedup lives in scripts/lib/brief-dedup.mjs (orchestrator) with the
+// legacy Jaccard in scripts/lib/brief-dedup-jaccard.mjs. The orchestrator
+// reads DIGEST_DEDUP_MODE at call time — default 'jaccard' keeps
+// behaviour identical to pre-embedding production. stripSourceSuffix
+// is imported from the Jaccard module so the text/HTML formatters
+// below keep their current per-story title cleanup.
+
 async function buildDigest(rule, windowStartMs) {
   const variant = rule.variant ?? 'full';
   const lang = rule.lang ?? 'en';
@@ -324,7 +275,7 @@ async function buildDigest(rule, windowStartMs) {
   if (stories.length === 0) return null;
 
   stories.sort((a, b) => b.currentScore - a.currentScore);
-  const deduped = deduplicateStories(stories);
+  const deduped = await deduplicateStories(stories);
   const top = deduped.slice(0, DIGEST_MAX_ITEMS);
 
   const allSourceCmds = [];
diff --git a/tests/brief-dedup-embedding.test.mjs b/tests/brief-dedup-embedding.test.mjs
new file mode 100644
index 000000000..ad0a24da6
--- /dev/null
+++ b/tests/brief-dedup-embedding.test.mjs
@@ -0,0 +1,556 @@
+/**
+ * Embedding-dedup integration tests against a deterministic stub
+ * embedder — no network. Covers the 9 scenarios enumerated in
+ * docs/plans/2026-04-19-001-feat-embedding-based-story-dedup-plan.md:
+ *
+ *   1. Happy path
+ *   2. Cold-cache timeout → Jaccard fallback
+ *   3. Provider outage → Jaccard fallback
+ *   4. Shadow mode
+ *   5. Entity veto fires
+ *   6. Complete-link non-chaining
+ *   7. Cluster-level fixture
+ *   8. Remote-embed-disabled bypass
+ *   9. Permutation-invariance property test
+ *
+ * The live-embedder golden-pair validator lives in a separate nightly
+ * CI job (.github/workflows/dedup-golden-pairs.yml) — it's NOT run
+ * from the brief cron and NOT in this file.
+ *
+ * Run: node --test tests/brief-dedup-embedding.test.mjs
+ */
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+
+import { deduplicateStories } from '../scripts/lib/brief-dedup.mjs';
+import { deduplicateStoriesJaccard } from '../scripts/lib/brief-dedup-jaccard.mjs';
+import {
+  EmbeddingProviderError,
+  EmbeddingTimeoutError,
+  cosineSimilarity,
+  normalizeForEmbedding,
+} from '../scripts/lib/brief-embedding.mjs';
+import {
+  completeLinkCluster,
+  extractEntities,
+  shouldVeto,
+} from '../scripts/lib/brief-dedup-embed.mjs';
+
+// ── Fixture helpers ───────────────────────────────────────────────────────────
+
+function story(title, score = 10, mentions = 1, hash = undefined) {
+  return {
+    title,
+    currentScore: score,
+    mentionCount: mentions,
+    sources: [],
+    severity: 'critical',
+    hash: hash ?? `h-${title.slice(0, 16).replace(/\W+/g, '-')}`,
+  };
+}
+
+// Orchestrator env that turns on the embed path without shadow-archive
+// dependencies.
+const EMBED_MODE = { DIGEST_DEDUP_MODE: 'embed', DIGEST_DEDUP_COSINE_THRESHOLD: '0.5' };
+
+/**
+ * Build a stub embedBatch that looks up each normalised title in a
+ * provided map. Captures call count for assertion-based tests. Any
+ * title missing from the map is embedded as the zero vector — which
+ * will fail cosine similarity > 0, so the test will notice.
+ */
+function stubEmbedder(vectorByNormalizedTitle) {
+  const calls = [];
+  async function embedBatch(normalizedTitles) {
+    calls.push(normalizedTitles.slice());
+    return normalizedTitles.map((t) => {
+      const v = vectorByNormalizedTitle.get(t);
+      if (!v) throw new Error(`stubEmbedder: no vector for "${t}"`);
+      return v;
+    });
+  }
+  return { embedBatch, calls };
+}
+
+function noopPipeline() {
+  return null;
+}
+
+/**
+ * Captures log lines emitted by the orchestrator so tests can assert
+ * on observability output without swallowing real console output.
+ */
+function lineCollector() {
+  const lines = [];
+  return {
+    lines,
+    log: (line) => lines.push({ level: 'log', line }),
+    warn: (line) => lines.push({ level: 'warn', line }),
+  };
+}
+
+// ── Scenario 1 — Happy path ───────────────────────────────────────────────────
+
+describe('Scenario 1 — happy path: embed clusters near-duplicates', () => {
+  it('merges two near-duplicate stories into one cluster when embed mode is on', async () => {
+    const titles = [
+      'iran closes strait of hormuz',
+      'iran shuts strait of hormuz',
+      'myanmar coup leader elected president',
+    ];
+    // Near-parallel vectors for 0/1 (cos ≈ 0.95), orthogonal for 2.
+    const vecByTitle = new Map([
+      [titles[0], [1, 0, 0]],
+      [titles[1], [0.95, Math.sqrt(1 - 0.95 * 0.95), 0]],
+      [titles[2], [0, 0, 1]],
+    ]);
+    const embedder = stubEmbedder(vecByTitle);
+    const collector = lineCollector();
+
+    const stories = [
+      story('Iran closes Strait of Hormuz', 90, 1, 'h0'),
+      story('Iran shuts Strait of Hormuz', 85, 1, 'h1'),
+      story('Myanmar coup leader elected president', 80, 1, 'h2'),
+    ];
+    const out = await deduplicateStories(stories, {
+      env: EMBED_MODE,
+      embedBatch: embedder.embedBatch,
+      redisPipeline: noopPipeline,
+      ...collector,
+    });
+
+    assert.equal(embedder.calls.length, 1, 'exactly one batched embedBatch call');
+    assert.equal(out.length, 2, 'two clusters (merged pair + singleton)');
+
+    const merged = out.find((c) => c.mergedHashes.length === 2);
+    assert.ok(merged, 'one cluster contains the two Hormuz variants');
+    assert.deepEqual(new Set(merged.mergedHashes), new Set(['h0', 'h1']));
+    assert.equal(merged.mentionCount, 2);
+
+    const singleton = out.find((c) => c.mergedHashes.length === 1);
+    assert.ok(singleton);
+    assert.equal(singleton.mergedHashes[0], 'h2');
+
+    // Structured log line emitted.
+    assert.ok(collector.lines.some((l) => l.line.includes('mode=embed')));
+    assert.ok(collector.lines.some((l) => l.line.includes('fallback=false')));
+  });
+});
+
+// ── Scenario 2 — timeout ──────────────────────────────────────────────────────
+
+describe('Scenario 2 — cold-cache timeout collapses to Jaccard', () => {
+  it('EmbeddingTimeoutError falls back to Jaccard for the whole batch', async () => {
+    const throwingEmbedder = async () => {
+      throw new EmbeddingTimeoutError();
+    };
+    const stories = [
+      story('Iran closes Strait of Hormuz', 90, 1, 'h0'),
+      story('Iran shuts Strait of Hormuz', 85, 1, 'h1'),
+    ];
+    const collector = lineCollector();
+
+    const out = await deduplicateStories(stories, {
+      env: EMBED_MODE,
+      embedBatch: throwingEmbedder,
+      redisPipeline: noopPipeline,
+      ...collector,
+    });
+
+    // Jaccard output is the ground truth under fallback — deep-equal
+    // cluster shape, not just length, so a regression that preserves
+    // count but changes membership or representative can't slip.
+    const expected = deduplicateStoriesJaccard(stories);
+    assert.equal(out.length, expected.length);
+    for (let i = 0; i < out.length; i++) {
+      assert.equal(out[i].hash, expected[i].hash);
+      assert.deepEqual(out[i].mergedHashes, expected[i].mergedHashes);
+      assert.equal(out[i].mentionCount, expected[i].mentionCount);
+    }
+    // Fallback warn line must carry a filterable reason= field.
+    const fallbackWarn = collector.lines.find(
+      (l) => l.level === 'warn' && l.line.includes('falling back to Jaccard'),
+    );
+    assert.ok(fallbackWarn, 'warn line on fallback');
+    assert.match(fallbackWarn.line, /reason=EmbeddingTimeoutError\b/);
+  });
+});
+
+// ── Scenario 3 — provider outage ──────────────────────────────────────────────
+
+describe('Scenario 3 — provider outage collapses to Jaccard', () => {
+  it('EmbeddingProviderError (HTTP 503) falls back', async () => {
+    const throwingEmbedder = async () => {
+      throw new EmbeddingProviderError('OpenRouter returned HTTP 503', { status: 503 });
+    };
+    const stories = [story('a', 10, 1, 'a1'), story('b', 10, 1, 'b1')];
+    const collector = lineCollector();
+
+    const out = await deduplicateStories(stories, {
+      env: EMBED_MODE,
+      embedBatch: throwingEmbedder,
+      redisPipeline: noopPipeline,
+      ...collector,
+    });
+
+    const expected = deduplicateStoriesJaccard(stories);
+    assert.equal(out.length, expected.length);
+    for (let i = 0; i < out.length; i++) {
+      assert.equal(out[i].hash, expected[i].hash);
+      assert.deepEqual(out[i].mergedHashes, expected[i].mergedHashes);
+      assert.equal(out[i].mentionCount, expected[i].mentionCount);
+    }
+    const fallbackWarn = collector.lines.find((l) => l.level === 'warn');
+    assert.ok(fallbackWarn, 'warn line on fallback');
+    assert.match(fallbackWarn.line, /reason=EmbeddingProviderError\b/);
+  });
+});
+
+// ── Scenario 4 / 8 — shadow mode and remote-embed kill switch were
+// removed when the rollout was simplified to "ship embed directly".
+// MODE=jaccard is the only rollback path; covered in
+// tests/brief-dedup-jaccard.test.mjs.
+
+// ── Scenario 5 — entity veto ──────────────────────────────────────────────────
+
+describe('Scenario 5 — entity veto blocks same-location, different-actor merges', () => {
+  it('shouldVeto fires on canonical Biden/Xi vs Biden/Putin case', () => {
+    assert.equal(
+      shouldVeto('Biden meets Xi in Tokyo', 'Biden meets Putin in Tokyo'),
+      true,
+    );
+  });
+
+  it('defers to cosine on Iran/Tehran + Hormuz (documented heuristic limitation)', () => {
+    // Capital-country coreference is not resolved in v1. The plan's
+    // original spec claimed the veto would fire here via "unique
+    // actors {Iran} vs {Tehran}", but the classification rule is:
+    //   - Iran → actor (country, not in gazetteer)
+    //   - Tehran → location (capital city IS in the gazetteer)
+    //   - Hormuz → location
+    // With the two anchors on different sides of the actor/location
+    // boundary, there's no symmetric "unique actor on each side"
+    // signal and the veto can't conclude. Behaviour falls through
+    // to cosine — which on real text may merge (false positive)
+    // or split (false negative) depending on wording. Accepted for
+    // v1 as the documented limitation; a name-normaliser is the
+    // future fix.
+    assert.equal(
+      shouldVeto('Iran closes Hormuz', 'Tehran shuts Hormuz'),
+      false,
+    );
+  });
+
+  it('shouldVeto does NOT fire when actors fully match', () => {
+    assert.equal(shouldVeto('Trump meets Xi', 'Trump Xi summit'), false);
+  });
+
+  it('shouldVeto defers to cosine when proper-noun sets are empty on both sides', () => {
+    assert.equal(shouldVeto('the meeting concludes', 'the meeting ends'), false);
+  });
+
+  it('veto blocks cluster admission end-to-end', async () => {
+    // High cosine (0.99) but disagreeing actors → veto fires and
+    // the stories stay in separate clusters.
+    const stories = [
+      story('Biden meets Xi in Tokyo', 90, 1, 'xi'),
+      story('Biden meets Putin in Tokyo', 85, 1, 'putin'),
+    ];
+    const vecByTitle = new Map([
+      [normalizeForEmbedding(stories[0].title), [1, 0, 0]],
+      [normalizeForEmbedding(stories[1].title), [0.99, Math.sqrt(1 - 0.99 * 0.99), 0]],
+    ]);
+    const embedder = stubEmbedder(vecByTitle);
+
+    const out = await deduplicateStories(stories, {
+      env: EMBED_MODE,
+      embedBatch: embedder.embedBatch,
+      redisPipeline: noopPipeline,
+    });
+
+    assert.equal(out.length, 2, 'veto keeps the two titles in separate clusters');
+  });
+
+  it('DIGEST_DEDUP_ENTITY_VETO_ENABLED=0 disables the veto at runtime', async () => {
+    const stories = [
+      story('Biden meets Xi in Tokyo', 90, 1, 'xi'),
+      story('Biden meets Putin in Tokyo', 85, 1, 'putin'),
+    ];
+    const vecByTitle = new Map([
+      [normalizeForEmbedding(stories[0].title), [1, 0, 0]],
+      [normalizeForEmbedding(stories[1].title), [0.99, Math.sqrt(1 - 0.99 * 0.99), 0]],
+    ]);
+    const embedder = stubEmbedder(vecByTitle);
+
+    const out = await deduplicateStories(stories, {
+      env: { ...EMBED_MODE, DIGEST_DEDUP_ENTITY_VETO_ENABLED: '0' },
+      embedBatch: embedder.embedBatch,
+      redisPipeline: noopPipeline,
+    });
+
+    assert.equal(out.length, 1, 'without the veto, cosine alone merges the two titles');
+  });
+});
+
+// ── Scenario 6 — complete-link non-chaining ───────────────────────────────────
+
+describe('Scenario 6 — complete-link blocks transitive chaining', () => {
+  it('A~B=0.65, B~C=0.65, A~C=0.30 → {A,B} and {C}, NOT {A,B,C}', () => {
+    // Constructed so pairwise cosines are exact (see plan for derivation).
+    const a = [1, 0, 0, 0];
+    const b = [0.65, Math.sqrt(1 - 0.65 * 0.65), 0, 0];
+    // c must satisfy: a·c = 0.30, b·c = 0.65, |c| = 1.
+    // Solving: cx=0.30; cy=(0.65 - 0.65*0.30)/sqrt(1-0.4225) = 0.4550/0.7599 = 0.599;
+    // cz = sqrt(1 - 0.09 - 0.359) = sqrt(0.551) = 0.7423
+    const cx = 0.3;
+    const cy = (0.65 - 0.65 * 0.3) / Math.sqrt(1 - 0.65 * 0.65);
+    const cz = Math.sqrt(1 - cx * cx - cy * cy);
+    const c = [cx, cy, cz, 0];
+
+    // Sanity-check the construction so a regression in the derivation
+    // can't mask a real bug.
+    assert.ok(Math.abs(cosineSimilarity(a, b) - 0.65) < 1e-6);
+    assert.ok(Math.abs(cosineSimilarity(b, c) - 0.65) < 1e-6);
+    assert.ok(Math.abs(cosineSimilarity(a, c) - 0.3) < 1e-6);
+
+    const items = [
+      { title: 'A', embedding: a },
+      { title: 'B', embedding: b },
+      { title: 'C', embedding: c },
+    ];
+    const { clusters } = completeLinkCluster(items, { cosineThreshold: 0.5 });
+
+    // {A,B} should be one cluster, {C} separate — not {A,B,C}.
+    assert.equal(clusters.length, 2);
+    const abCluster = clusters.find((cl) => cl.length === 2);
+    const cCluster = clusters.find((cl) => cl.length === 1);
+    assert.ok(abCluster && cCluster, 'two clusters: the A+B pair and the C singleton');
+    assert.ok(abCluster.includes(0) && abCluster.includes(1));
+    assert.ok(cCluster.includes(2));
+  });
+});
+
+// ── Scenario 7 — cluster-level fixture ────────────────────────────────────────
+
+describe('Scenario 7 — cluster-level fixture', () => {
+  it('10-story fixture clusters into the expected shape', async () => {
+    // Four real wire-headline clusters plus two singletons = 6 clusters.
+    // Vectors are hand-crafted so only intended-cluster pairs clear 0.5.
+    const e1 = [1, 0, 0, 0, 0, 0];
+    const e2 = [0, 1, 0, 0, 0, 0];
+    const e3 = [0, 0, 1, 0, 0, 0];
+    const e4 = [0, 0, 0, 1, 0, 0];
+    const e5 = [0, 0, 0, 0, 1, 0];
+    const e6 = [0, 0, 0, 0, 0, 1];
+
+    function near(axis, epsilon = 0.03) {
+      // Same-direction vector at cosine > 0.99 to `axis` basis.
+      const out = axis.slice();
+      return out.map((v) => v * (1 - epsilon));
+    }
+
+    const fixtures = [
+      { title: 'Iran closes Strait of Hormuz', hash: 'a1', v: e1, expectCluster: 'A' },
+      { title: 'Iran shuts Strait of Hormuz', hash: 'a2', v: near(e1), expectCluster: 'A' },
+      { title: 'US fighter jet downed over Iran', hash: 'b1', v: e2, expectCluster: 'B' },
+      { title: 'American aircraft shot down in Iran', hash: 'b2', v: near(e2), expectCluster: 'B' },
+      { title: 'Myanmar coup leader sworn in', hash: 'c1', v: e3, expectCluster: 'C' },
+      { title: 'Myanmar junta chief takes office', hash: 'c2', v: near(e3), expectCluster: 'C' },
+      { title: 'Brent crude tops $140', hash: 'd1', v: e4, expectCluster: 'D' },
+      { title: 'Oil price surges past $140', hash: 'd2', v: near(e4), expectCluster: 'D' },
+      { title: 'Singleton 1', hash: 's1', v: e5, expectCluster: 'E' },
+      { title: 'Singleton 2', hash: 's2', v: e6, expectCluster: 'F' },
+    ];
+    const stories = fixtures.map((f) =>
+      story(f.title, 100 - fixtures.indexOf(f), 1, f.hash),
+    );
+    const vecByTitle = new Map(
+      fixtures.map((f) => [normalizeForEmbedding(f.title), f.v]),
+    );
+    const embedder = stubEmbedder(vecByTitle);
+
+    const out = await deduplicateStories(stories, {
+      env: EMBED_MODE,
+      embedBatch: embedder.embedBatch,
+      redisPipeline: noopPipeline,
+    });
+
+    // 6 clusters total: 4 pairs + 2 singletons.
+    assert.equal(out.length, 6);
+
+    // Each expected pair's hashes should land in the same cluster.
+    const pairs = [['a1', 'a2'], ['b1', 'b2'], ['c1', 'c2'], ['d1', 'd2']];
+    for (const [x, y] of pairs) {
+      const cluster = out.find((c) => c.mergedHashes.includes(x));
+      assert.ok(cluster?.mergedHashes.includes(y), `${x} and ${y} should cluster together`);
+    }
+    // Singletons stay alone.
+    const s1 = out.find((c) => c.mergedHashes.includes('s1'));
+    const s2 = out.find((c) => c.mergedHashes.includes('s2'));
+    assert.equal(s1.mergedHashes.length, 1);
+    assert.equal(s2.mergedHashes.length, 1);
+  });
+});
+
+// ── Scenario 9 — permutation-invariance property test ────────────────────────
+
+describe('Scenario 9 — permutation-invariance', () => {
+  it('10 random input orders of the same 15-story set produce identical clusters', async () => {
+    // Construct 15 stories in 5 clusters of 3. Each cluster shares a
+    // near-unit basis vector; clusters are pairwise orthogonal.
+    const N_CLUSTERS = 5;
+    const PER_CLUSTER = 3;
+    const fixtures = [];
+    for (let c = 0; c < N_CLUSTERS; c++) {
+      const basis = Array.from({ length: N_CLUSTERS }, (_, i) => (i === c ? 1 : 0));
+      for (let k = 0; k < PER_CLUSTER; k++) {
+        const jitter = basis.map((v, i) => (i === c ? v - k * 0.002 : v));
+        fixtures.push({
+          title: `Cluster ${c} item ${k}`,
+          hash: `c${c}-k${k}`,
+          v: jitter,
+          score: 100 - (c * PER_CLUSTER + k),
+        });
+      }
+    }
+    const stories = fixtures.map((f) => story(f.title, f.score, 1, f.hash));
+    const vecByTitle = new Map(
+      fixtures.map((f) => [normalizeForEmbedding(f.title), f.v]),
+    );
+
+    function sigFor(out) {
+      // Canonical representation: each cluster as a sorted hash list,
+      // overall list sorted.
+      return out.map((c) => [...c.mergedHashes].sort()).map((l) => l.join(',')).sort().join('|');
+    }
+
+    // Baseline run on the canonical input order.
+    const baseline = await deduplicateStories(stories, {
+      env: EMBED_MODE,
+      embedBatch: stubEmbedder(vecByTitle).embedBatch,
+      redisPipeline: noopPipeline,
+    });
+    const baselineSig = sigFor(baseline);
+
+    // Ten random permutations — each must produce the IDENTICAL cluster set.
+    let seed = 42;
+    function rand() {
+      seed = (seed * 1103515245 + 12345) & 0x7fffffff;
+      return seed / 0x7fffffff;
+    }
+    for (let run = 0; run < 10; run++) {
+      const shuffled = [...stories];
+      for (let i = shuffled.length - 1; i > 0; i--) {
+        const j = Math.floor(rand() * (i + 1));
+        [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
+      }
+      const out = await deduplicateStories(shuffled, {
+        env: EMBED_MODE,
+        embedBatch: stubEmbedder(vecByTitle).embedBatch,
+        redisPipeline: noopPipeline,
+      });
+      assert.equal(
+        sigFor(out),
+        baselineSig,
+        `permutation ${run} produced a different cluster set`,
+      );
+    }
+  });
+});
+
+// ── Entity extraction unit tests ──────────────────────────────────────────────
+
+describe('extractEntities', () => {
+  it('classifies country name as actor, strait as location', () => {
+    // Per plan intent: countries are geopolitical actors ("Iran does X"),
+    // physical geography is the venue.
+    const { locations, actors } = extractEntities('Iran closes Strait of Hormuz');
+    assert.ok(actors.includes('iran'));
+    // Multi-word match finds "strait of hormuz", NOT the single-token
+    // fallback "hormuz" — the full phrase is in the gazetteer.
+    assert.ok(
+      locations.includes('strait of hormuz') || locations.includes('hormuz'),
+      'hormuz location must be detected (as phrase or single token)',
+    );
+    assert.ok(!locations.includes('iran'));
+  });
+
+  it('classifies city as location, person as actor', () => {
+    const { locations, actors } = extractEntities('Biden meets Xi in Tokyo');
+    assert.ok(locations.includes('tokyo'));
+    assert.ok(actors.includes('biden'));
+    assert.ok(actors.includes('xi'));
+  });
+
+  it('skips common capitalized sentence-starters', () => {
+    const { locations, actors } = extractEntities('The meeting begins');
+    assert.equal(locations.length, 0);
+    assert.equal(actors.length, 0);
+  });
+
+  it('keeps sentence-start proper nouns', () => {
+    const { actors } = extractEntities('Trump to visit Japan');
+    assert.ok(actors.includes('trump'));
+    // Japan is a country → actor, not location
+    assert.ok(actors.includes('japan'));
+  });
+
+  // Regression: multi-word gazetteer entries are matched as whole
+  // phrases. An earlier implementation split on whitespace and only
+  // checked single tokens, so "Red Sea", "South China Sea", "New York",
+  // etc. silently fell through to the actor bucket and disabled the
+  // veto for a whole class of real headlines.
+  it('matches multi-word location: Red Sea', () => {
+    const { locations, actors } = extractEntities('Houthis strike ship in Red Sea');
+    assert.ok(locations.includes('red sea'));
+    assert.ok(!actors.includes('red'));
+    assert.ok(!actors.includes('sea'));
+    assert.ok(actors.includes('houthis'));
+  });
+
+  it('matches multi-word location: South China Sea', () => {
+    const { locations } = extractEntities('Tensions flare in South China Sea');
+    assert.ok(locations.includes('south china sea'));
+  });
+
+  it('matches multi-word location with lowercase connector: Strait of Hormuz', () => {
+    const { locations } = extractEntities('Iran closes Strait of Hormuz');
+    assert.ok(locations.includes('strait of hormuz'));
+  });
+
+  it('matches multi-word city: Abu Dhabi', () => {
+    const { locations } = extractEntities('Summit held in Abu Dhabi');
+    assert.ok(locations.includes('abu dhabi'));
+  });
+
+  it('matches multi-word city: New York', () => {
+    const { locations } = extractEntities('UN meeting in New York');
+    assert.ok(locations.includes('new york'));
+  });
+
+  // Veto end-to-end: reproducer from the P1 finding. Two Red-Sea
+  // headlines share a location and disagree on the actor — veto
+  // MUST fire (otherwise the main anti-overmerge guard is off for
+  // bodies-of-water / region headlines).
+  it('shouldVeto: Houthis vs US on Red Sea — location phrase match fires the veto', () => {
+    assert.equal(
+      shouldVeto('Houthis strike ship in Red Sea', 'US escorts convoy in Red Sea'),
+      true,
+    );
+  });
+});
+
+// ── Cosine helper ─────────────────────────────────────────────────────────────
+
+describe('cosineSimilarity', () => {
+  it('returns 1 for identical vectors', () => {
+    assert.equal(cosineSimilarity([1, 2, 3], [1, 2, 3]), 1);
+  });
+  it('returns 0 for orthogonal vectors', () => {
+    assert.equal(cosineSimilarity([1, 0], [0, 1]), 0);
+  });
+  it('handles a zero vector without throwing', () => {
+    assert.equal(cosineSimilarity([0, 0], [1, 1]), 0);
+  });
+});
diff --git a/tests/brief-dedup-jaccard.test.mjs b/tests/brief-dedup-jaccard.test.mjs
new file mode 100644
index 000000000..1f5644d36
--- /dev/null
+++ b/tests/brief-dedup-jaccard.test.mjs
@@ -0,0 +1,243 @@
+/**
+ * Regression tests for the permanent Jaccard fallback path.
+ *
+ * The earlier harness parsed scripts/seed-digest-notifications.mjs
+ * with regexes to extract the dedup helpers into a Function() sandbox.
+ * Now that the logic lives in its own module we import directly — no
+ * regex fragility, no drift when the seed script is refactored.
+ *
+ * Run: node --test tests/brief-dedup-jaccard.test.mjs
+ */
+
+import { describe, it } from 'node:test';
+import assert from 'node:assert/strict';
+
+import {
+  deduplicateStoriesJaccard,
+  extractTitleWords,
+  jaccardSimilarity,
+  stripSourceSuffix,
+} from '../scripts/lib/brief-dedup-jaccard.mjs';
+import {
+  CACHE_KEY_PREFIX,
+  JACCARD_MERGE_THRESHOLD,
+} from '../scripts/lib/brief-dedup-consts.mjs';
+
+// ── Constants ─────────────────────────────────────────────────────────────────
+
+describe('brief-dedup-consts', () => {
+  it('exposes the Jaccard merge threshold as a pure constant', () => {
+    assert.equal(JACCARD_MERGE_THRESHOLD, 0.55);
+  });
+
+  it('embedding cache key prefix is namespaced + versioned', () => {
+    // Bump when the embed model or dimension changes — silent threshold
+    // drift on model upgrade is the #1 documented regression mode.
+    assert.equal(CACHE_KEY_PREFIX, 'brief:emb:v1:text-3-small-512');
+  });
+});
+
+// ── stripSourceSuffix ─────────────────────────────────────────────────────────
+
+describe('stripSourceSuffix', () => {
+  it('strips "- reuters.com"', () => {
+    assert.equal(
+      stripSourceSuffix('US fighter jet shot down over Iran - reuters.com'),
+      'US fighter jet shot down over Iran',
+    );
+  });
+
+  it('strips "- Reuters"', () => {
+    assert.equal(
+      stripSourceSuffix('Downed planes spell new peril for Trump - Reuters'),
+      'Downed planes spell new peril for Trump',
+    );
+  });
+
+  it('strips "- AP News"', () => {
+    assert.equal(
+      stripSourceSuffix('US military jets hit in Iran war - AP News'),
+      'US military jets hit in Iran war',
+    );
+  });
+
+  it('strips "- apnews.com"', () => {
+    assert.equal(
+      stripSourceSuffix('US military jets hit in Iran war - apnews.com'),
+      'US military jets hit in Iran war',
+    );
+  });
+
+  it('preserves titles without source suffix', () => {
+    assert.equal(
+      stripSourceSuffix('Myanmar coup leader elected president'),
+      'Myanmar coup leader elected president',
+    );
+  });
+});
+
+// ── Pure helpers ──────────────────────────────────────────────────────────────
+
+describe('extractTitleWords', () => {
+  it('drops stop-words and 1-2 char tokens', () => {
+    const words = extractTitleWords('The US is at war with Iran');
+    // 'the', 'us', 'is', 'at', 'with' all drop. Remaining content
+    // words: 'war', 'iran'.
+    assert.ok(words.has('war'));
+    assert.ok(words.has('iran'));
+    assert.ok(!words.has('the'));
+    assert.ok(!words.has('us'));
+    assert.ok(!words.has('is'));
+    assert.ok(!words.has('at'));
+  });
+
+  it('applies stripSourceSuffix before tokenising', () => {
+    const words = extractTitleWords('Iran closes Hormuz - Reuters');
+    assert.ok(words.has('iran'));
+    assert.ok(words.has('closes'));
+    assert.ok(words.has('hormuz'));
+    assert.ok(!words.has('reuters'));
+  });
+});
+
+describe('jaccardSimilarity', () => {
+  it('returns 1 for identical Sets', () => {
+    const a = new Set(['iran', 'hormuz', 'strait']);
+    assert.equal(jaccardSimilarity(a, new Set(a)), 1);
+  });
+
+  it('returns 0 when either Set is empty', () => {
+    assert.equal(jaccardSimilarity(new Set(), new Set(['x'])), 0);
+    assert.equal(jaccardSimilarity(new Set(['x']), new Set()), 0);
+  });
+
+  it('is symmetric', () => {
+    const a = new Set(['a', 'b', 'c']);
+    const b = new Set(['b', 'c', 'd']);
+    assert.equal(jaccardSimilarity(a, b), jaccardSimilarity(b, a));
+  });
+});
+
+// ── deduplicateStoriesJaccard ─────────────────────────────────────────────────
+
+function story(title, score = 10, mentions = 1, hash = undefined) {
+  return {
+    title,
+    currentScore: score,
+    mentionCount: mentions,
+    sources: [],
+    severity: 'critical',
+    hash: hash ?? title.slice(0, 8),
+  };
+}
+
+describe('deduplicateStoriesJaccard', () => {
+  it('merges near-duplicate Reuters headlines about downed jet', () => {
+    const stories = [
+      story('US fighter jet shot down over Iran, search underway for crew, US official says - reuters.com', 90),
+      story('US fighter jet shot down over Iran, search underway for crew, US officials say - reuters.com', 85),
+      story('US fighter jet shot down over Iran, search under way for crew member, US officials say - reuters.com', 80),
+      story('US fighter jet shot down over Iran, search under way for crew member, US officials say - Reuters', 75),
+      story('US fighter jet shot down over Iran, search underway for crew member, US officials say - Reuters', 70),
+    ];
+    const result = deduplicateStoriesJaccard(stories);
+    assert.equal(result.length, 1, `Expected 1 cluster, got ${result.length}: ${result.map((r) => r.title).join(' | ')}`);
+    assert.equal(result[0].currentScore, 90);
+    assert.equal(result[0].mentionCount, 5);
+  });
+
+  it('keeps genuinely different stories separate', () => {
+    const stories = [
+      story('US fighter jet shot down over Iran', 90),
+      story('Myanmar coup leader Min Aung Hlaing elected president', 80),
+      story('Brent oil spot price soars to $141', 70),
+    ];
+    const result = deduplicateStoriesJaccard(stories);
+    assert.equal(result.length, 3);
+  });
+
+  it('merges same story reported by different outlets with different suffixes', () => {
+    const stories = [
+      story('Downed planes spell new peril for Trump as Tehran hunts missing US pilot - Reuters', 90),
+      story('Downed planes spell new peril for Trump as Tehran hunts missing US pilot - reuters.com', 85),
+    ];
+    const result = deduplicateStoriesJaccard(stories);
+    assert.equal(result.length, 1);
+    assert.equal(result[0].currentScore, 90);
+  });
+
+  it('merges stories with minor wording differences', () => {
+    const stories = [
+      story('US rescues airman whose F-15 was downed in Iran, US officials say - Reuters', 90),
+      story('Iran says several enemy aircraft destroyed during US pilot rescue mission - Reuters', 80),
+      story('Trump, Israel pressure Iran ahead of deadline as search continues for missing US airman - Reuters', 70),
+    ];
+    const result = deduplicateStoriesJaccard(stories);
+    // These are different enough events/angles that they should stay separate.
+    assert.ok(result.length >= 2, `Expected at least 2 clusters, got ${result.length}`);
+  });
+
+  it('carries mergedHashes from all clustered stories for source lookup', () => {
+    const stories = [
+      story('US fighter jet shot down - reuters.com', 90, 1, 'hash_a'),
+      story('US fighter jet shot down - Reuters', 80, 1, 'hash_b'),
+      story('US fighter jet shot down - AP News', 70, 1, 'hash_c'),
+    ];
+    const result = deduplicateStoriesJaccard(stories);
+    assert.equal(result.length, 1);
+    assert.deepEqual(result[0].mergedHashes, ['hash_a', 'hash_b', 'hash_c']);
+  });
+
+  it('preserves single stories without modification', () => {
+    const stories = [story('Only one story here', 50, 3)];
+    const result = deduplicateStoriesJaccard(stories);
+    assert.equal(result.length, 1);
+    assert.equal(result[0].mentionCount, 3);
+    assert.deepEqual(result[0].mergedHashes, [stories[0].hash]);
+  });
+});
+
+// ── Orchestrator kill-switch path ────────────────────────────────────────────
+
+describe('brief-dedup orchestrator — jaccard kill switch', () => {
+  it('DIGEST_DEDUP_MODE=jaccard routes straight through the fallback', async () => {
+    const { deduplicateStories } = await import('../scripts/lib/brief-dedup.mjs');
+    let embedCalls = 0;
+    const stubEmbed = async () => {
+      embedCalls++;
+      throw new Error('embedBatch must NOT be called under MODE=jaccard');
+    };
+    const stories = [
+      story('Iran closes Strait of Hormuz', 90, 1, 'h1'),
+      story('Iran shuts Strait of Hormuz - Reuters', 85, 1, 'h2'),
+      story('Myanmar coup leader elected president', 80, 1, 'h3'),
+    ];
+    const out = await deduplicateStories(stories, {
+      env: { DIGEST_DEDUP_MODE: 'jaccard' },
+      embedBatch: stubEmbed,
+    });
+    assert.equal(embedCalls, 0);
+    const expected = deduplicateStoriesJaccard(stories);
+    assert.equal(out.length, expected.length);
+    for (let i = 0; i < out.length; i++) {
+      assert.equal(out[i].hash, expected[i].hash);
+      assert.deepEqual(out[i].mergedHashes, expected[i].mergedHashes);
+      assert.equal(out[i].mentionCount, expected[i].mentionCount);
+    }
+  });
+
+  it('returns [] for empty input without invoking Jaccard', async () => {
+    const { deduplicateStories } = await import('../scripts/lib/brief-dedup.mjs');
+    let jaccardCalls = 0;
+    const stubJaccard = (s) => {
+      jaccardCalls++;
+      return deduplicateStoriesJaccard(s);
+    };
+    const out = await deduplicateStories([], {
+      env: { DIGEST_DEDUP_MODE: 'jaccard' },
+      jaccard: stubJaccard,
+    });
+    assert.deepEqual(out, []);
+    assert.equal(jaccardCalls, 0);
+  });
+});
diff --git a/tests/digest-dedup.test.mjs b/tests/digest-dedup.test.mjs
deleted file mode 100644
index 179f37e5a..000000000
--- a/tests/digest-dedup.test.mjs
+++ /dev/null
@@ -1,154 +0,0 @@
-/**
- * Test: digest fuzzy deduplication merges near-duplicate stories.
- *
- * Run: node --test tests/digest-dedup.test.mjs
- */
-
-import { describe, it } from 'node:test';
-import assert from 'node:assert/strict';
-import { readFileSync } from 'node:fs';
-import { resolve, dirname } from 'node:path';
-import { fileURLToPath } from 'node:url';
-
-const __dirname = dirname(fileURLToPath(import.meta.url));
-const src = readFileSync(
-  resolve(__dirname, '..', 'scripts', 'seed-digest-notifications.mjs'),
-  'utf-8',
-);
-
-// ── Extract the dedup functions via dynamic evaluation ────────────────────────
-// We extract the pure functions (no side-effects, no imports) to test them.
-
-const STOP_WORDS_BLOCK = src.match(/const STOP_WORDS = new Set\(\[[\s\S]*?\]\);/)?.[0];
-const stripSourceSuffix = src.match(/function stripSourceSuffix\(title\) \{[\s\S]*?\n\}/)?.[0];
-const extractTitleWords = src.match(/function extractTitleWords\(title\) \{[\s\S]*?\n\}/)?.[0];
-const jaccardSimilarity = src.match(/function jaccardSimilarity\(setA, setB\) \{[\s\S]*?\n\}/)?.[0];
-const deduplicateStories = src.match(/function deduplicateStories\(stories\) \{[\s\S]*?\n\}/)?.[0];
-
-assert.ok(STOP_WORDS_BLOCK, 'STOP_WORDS not found in source');
-assert.ok(stripSourceSuffix, 'stripSourceSuffix not found in source');
-assert.ok(extractTitleWords, 'extractTitleWords not found in source');
-assert.ok(jaccardSimilarity, 'jaccardSimilarity not found in source');
-assert.ok(deduplicateStories, 'deduplicateStories not found in source');
-
-const mod = {};
-new Function('mod', `
-  ${STOP_WORDS_BLOCK}
-  ${stripSourceSuffix}
-  ${extractTitleWords}
-  ${jaccardSimilarity}
-  ${deduplicateStories}
-  mod.stripSourceSuffix = stripSourceSuffix;
-  mod.extractTitleWords = extractTitleWords;
-  mod.jaccardSimilarity = jaccardSimilarity;
-  mod.deduplicateStories = deduplicateStories;
-`)(mod);
-
-// ── Tests ─────────────────────────────────────────────────────────────────────
-
-describe('stripSourceSuffix', () => {
-  it('strips "- reuters.com"', () => {
-    assert.equal(
-      mod.stripSourceSuffix('US fighter jet shot down over Iran - reuters.com'),
-      'US fighter jet shot down over Iran',
-    );
-  });
-
-  it('strips "- Reuters"', () => {
-    assert.equal(
-      mod.stripSourceSuffix('Downed planes spell new peril for Trump - Reuters'),
-      'Downed planes spell new peril for Trump',
-    );
-  });
-
-  it('strips "- AP News"', () => {
-    assert.equal(
-      mod.stripSourceSuffix('US military jets hit in Iran war - AP News'),
-      'US military jets hit in Iran war',
-    );
-  });
-
-  it('strips "- apnews.com"', () => {
-    assert.equal(
-      mod.stripSourceSuffix('US military jets hit in Iran war - apnews.com'),
-      'US military jets hit in Iran war',
-    );
-  });
-
-  it('preserves titles without source suffix', () => {
-    assert.equal(
-      mod.stripSourceSuffix('Myanmar coup leader elected president'),
-      'Myanmar coup leader elected president',
-    );
-  });
-});
-
-describe('deduplicateStories', () => {
-  function story(title, score = 10, mentions = 1, hash = undefined) {
-    return { title, currentScore: score, mentionCount: mentions, sources: [], severity: 'critical', hash: hash ?? title.slice(0, 8) };
-  }
-
-  it('merges near-duplicate Reuters headlines about downed jet', () => {
-    const stories = [
-      story('US fighter jet shot down over Iran, search underway for crew, US official says - reuters.com', 90),
-      story('US fighter jet shot down over Iran, search underway for crew, US officials say - reuters.com', 85),
-      story('US fighter jet shot down over Iran, search under way for crew member, US officials say - reuters.com', 80),
-      story('US fighter jet shot down over Iran, search under way for crew member, US officials say - Reuters', 75),
-      story('US fighter jet shot down over Iran, search underway for crew member, US officials say - Reuters', 70),
-    ];
-    const result = mod.deduplicateStories(stories);
-    assert.equal(result.length, 1, `Expected 1 cluster, got ${result.length}: ${result.map(r => r.title).join(' | ')}`);
-    assert.equal(result[0].currentScore, 90);
-    assert.equal(result[0].mentionCount, 5);
-  });
-
-  it('keeps genuinely different stories separate', () => {
-    const stories = [
-      story('US fighter jet shot down over Iran', 90),
-      story('Myanmar coup leader Min Aung Hlaing elected president', 80),
-      story('Brent oil spot price soars to $141', 70),
-    ];
-    const result = mod.deduplicateStories(stories);
-    assert.equal(result.length, 3);
-  });
-
-  it('merges same story reported by different outlets with different suffixes', () => {
-    const stories = [
-      story('Downed planes spell new peril for Trump as Tehran hunts missing US pilot - Reuters', 90),
-      story('Downed planes spell new peril for Trump as Tehran hunts missing US pilot - reuters.com', 85),
-    ];
-    const result = mod.deduplicateStories(stories);
-    assert.equal(result.length, 1);
-    assert.equal(result[0].currentScore, 90);
-  });
-
-  it('merges stories with minor wording differences', () => {
-    const stories = [
-      story('US rescues airman whose F-15 was downed in Iran, US officials say - Reuters', 90),
-      story('Iran says several enemy aircraft destroyed during US pilot rescue mission - Reuters', 80),
-      story('Trump, Israel pressure Iran ahead of deadline as search continues for missing US airman - Reuters', 70),
-    ];
-    const result = mod.deduplicateStories(stories);
-    // These are different enough events/angles that they should stay separate
-    assert.ok(result.length >= 2, `Expected at least 2 clusters, got ${result.length}`);
-  });
-
-  it('carries mergedHashes from all clustered stories for source lookup', () => {
-    const stories = [
-      story('US fighter jet shot down - reuters.com', 90, 1, 'hash_a'),
-      story('US fighter jet shot down - Reuters', 80, 1, 'hash_b'),
-      story('US fighter jet shot down - AP News', 70, 1, 'hash_c'),
-    ];
-    const result = mod.deduplicateStories(stories);
-    assert.equal(result.length, 1);
-    assert.deepEqual(result[0].mergedHashes, ['hash_a', 'hash_b', 'hash_c']);
-  });
-
-  it('preserves single stories without modification', () => {
-    const stories = [story('Only one story here', 50, 3)];
-    const result = mod.deduplicateStories(stories);
-    assert.equal(result.length, 1);
-    assert.equal(result[0].mentionCount, 3);
-    assert.deepEqual(result[0].mergedHashes, [stories[0].hash]);
-  });
-});
diff --git a/todos/193-complete-p1-dedup-regex-and-redis-pipeline-duplication.md b/todos/193-complete-p1-dedup-regex-and-redis-pipeline-duplication.md
new file mode 100644
index 000000000..124c8271f
--- /dev/null
+++ b/todos/193-complete-p1-dedup-regex-and-redis-pipeline-duplication.md
@@ -0,0 +1,82 @@
+---
+status: complete
+priority: p1
+issue_id: 193
+tags: [code-review, digest-dedup, phase-a, maintenance, dry]
+dependencies: []
+---
+
+# Duplicated regex + duplicated Redis pipeline helper in digest-dedup Phase A
+
+## Problem Statement
+
+Two convergent duplication findings from the Phase A review (commit `cdd7a124c`) — both are silent-divergence risks because the unit tests cover each module in isolation.
+
+1. **`stripSourceSuffix` regex appears verbatim in TWO files.**
+   - `scripts/lib/brief-embedding.mjs:60-64` inside `normalizeForEmbedding`
+   - `scripts/lib/brief-dedup-jaccard.mjs:32-36` as the exported `stripSourceSuffix`
+   - Adding a new outlet (e.g. `Bloomberg`) to one and not the other means the veto/embed input and the Jaccard fallback input quietly drift. The plan's "normalization contract" explicitly calls out that these MUST agree.
+
+2. **`defaultRedisPipeline` reimplemented in TWO new files.**
+   - `scripts/lib/brief-dedup.mjs:74-94`
+   - `scripts/lib/brief-embedding.mjs:77-97`
+   - The repo already exports a canonical `redisPipeline()` from `api/_upstash-json.js:88` (imported by `seed-digest-notifications.mjs:32`). The new lib/ modules avoid importing from `api/` on purpose — but the existing `scripts/lib/` convention is to share via `scripts/lib/_*.mjs` helpers.
+
+## Findings
+
+- **Embedding regex collision**: `brief-embedding.mjs:60` and `brief-dedup-jaccard.mjs:33` keep the same outlet allow-list in two places.
+- **Pipeline helper trio**: three copies of essentially the same 20-line `fetch('{url}/pipeline', …)` exist (the two above + the canonical one). Any fix to timeout / User-Agent / error shape has to touch all three.
+
+## Proposed Solutions
+
+### Option 1 — consolidate into brief-dedup-consts.mjs (lightest)
+Move `stripSourceSuffix` + its outlet list into `brief-dedup-consts.mjs` as the source of truth. Have both `brief-embedding.mjs` and `brief-dedup-jaccard.mjs` import from there. Extract `defaultRedisPipeline` into `scripts/lib/_upstash-pipeline.mjs`.
+
+**Pros:** single-sourced, small diff.
+**Cons:** mixes data + function in consts (plan called consts "pure data"); may need a `scripts/lib/_wire-suffixes.mjs` instead.
+**Effort:** Small
+**Risk:** Low
+
+### Option 2 — dedicated shared-helpers module
+Add `scripts/lib/_dedup-shared.mjs` exporting `stripSourceSuffix`, `defaultRedisPipeline`, `normalizeForEmbedding` (which currently calls stripSourceSuffix inline).
+
+**Pros:** cleaner separation.
+**Cons:** another file.
+**Effort:** Small
+**Risk:** Low
+
+### Option 3 — defer to a follow-up (not recommended)
+Ship Phase A as-is; clean up in Phase B.
+
+**Pros:** ship faster.
+**Cons:** two independent outlet lists on `main` makes the first production drift-incident hard to diagnose.
+**Effort:** zero now
+**Risk:** Medium — divergence is silent.
+
+## Recommended Action
+_To be filled during triage._
+
+## Technical Details
+
+Affected files:
+- `scripts/lib/brief-embedding.mjs`
+- `scripts/lib/brief-dedup-jaccard.mjs`
+- `scripts/lib/brief-dedup.mjs`
+- `scripts/lib/brief-dedup-consts.mjs`
+- `api/_upstash-json.js` (canonical `redisPipeline`)
+
+No schema or DB changes.
+
+## Acceptance Criteria
+- [ ] `stripSourceSuffix` regex exists in exactly ONE place; both consumer sites import from there.
+- [ ] `defaultRedisPipeline` (or equivalent) exists in exactly ONE place within the new modules.
+- [ ] Regression test asserts `normalizeForEmbedding("Foo - Reuters") === "foo"` and `normalizeForEmbedding` uses the same outlet list the Jaccard path uses (same-file import, guaranteed).
+- [ ] `npm run test:data` still 5825/5825.
+
+## Work Log
+_Empty — awaiting triage._
+
+## Resources
+- Review commit: `cdd7a124c`
+- Plan: `docs/plans/2026-04-19-001-feat-embedding-based-story-dedup-plan.md`
+- Reviewers: kieran-typescript-reviewer, security-sentinel, code-simplicity-reviewer, architecture-strategist
diff --git a/todos/194-complete-p1-embedding-timeout-floor-and-negative-budget.md b/todos/194-complete-p1-embedding-timeout-floor-and-negative-budget.md
new file mode 100644
index 000000000..c1b37b589
--- /dev/null
+++ b/todos/194-complete-p1-embedding-timeout-floor-and-negative-budget.md
@@ -0,0 +1,78 @@
+---
+status: complete
+priority: p1
+issue_id: 194
+tags: [code-review, digest-dedup, phase-a, correctness, performance]
+dependencies: []
+---
+
+# Embedding `AbortSignal.timeout` floor + negative-budget fast-path
+
+## Problem Statement
+
+In `scripts/lib/brief-embedding.mjs:153` the batched embeddings call uses:
+
+```js
+signal: AbortSignal.timeout(Math.max(250, timeoutMs)),
+```
+
+Two correctness concerns surfaced convergently by the perf + kieran-ts reviewers:
+
+1. **Floor allows total wall-clock overshoot.** The orchestrator's `DIGEST_DEDUP_WALL_CLOCK_MS` contract says "hard cap at 45s or abort the whole batch". If `deadline - nowImpl()` returns 40ms (cache lookup was slow), the floor lengthens the fetch to 250ms — the cron tick silently runs 210ms past budget.
+2. **Negative budget fast-pathed to a guaranteed timeout.** If cache lookup already exceeded the deadline, `timeoutMs` goes negative. `Math.max(250, negative)` → 250ms, so we open an HTTP connection to OpenRouter that is guaranteed to time out. Wastes a network round-trip and an OpenRouter quota slot on a request we already know is doomed.
+
+Neither is a functional bug (all-or-nothing fallback still works), but both are incorrect by the "all-or-nothing" contract.
+
+## Findings
+
+- `brief-embedding.mjs:153` `Math.max(250, timeoutMs)` — discussed above.
+- `brief-embedding.mjs:243` caller passes `timeoutMs: deadline - nowImpl()` without re-checking deadline. Line 242 throws on deadline-exceeded BEFORE cache lookup but not after, so the gap between cache lookup and API call is unchecked.
+
+## Proposed Solutions
+
+### Option 1 — pre-check and remove floor (recommended)
+```js
+if (timeoutMs <= 0) throw new EmbeddingTimeoutError();
+signal: AbortSignal.timeout(timeoutMs),
+```
+
+**Pros:** correct by construction; skips pointless round-trip.
+**Cons:** 250ms floor was defensive for tiny-positive values (e.g. 15ms) where a real OpenRouter RTT + TLS handshake wouldn't fit anyway. But the orchestrator can't do better than the math says.
+**Effort:** Small
+**Risk:** Low — every test still passes (scenario 2 throws synchronously via the stub).
+
+### Option 2 — re-check deadline in orchestrator before embed call
+In `brief-dedup.mjs:254-261`, bail to fallback before calling `embedImpl` when `nowImpl() >= deadline`.
+
+**Pros:** symmetry with cache-lookup check.
+**Cons:** pushes contract knowledge up one layer.
+**Effort:** Small
+**Risk:** Low
+
+### Option 3 — keep the floor, log once on overshoot
+Emit a `warn` when `timeoutMs < 250` so operators see it.
+
+**Pros:** visibility.
+**Cons:** doesn't fix the overshoot.
+**Effort:** Small
+**Risk:** Low
+
+## Recommended Action
+_Option 1 + Option 2 both; they're tiny._
+
+## Technical Details
+- `scripts/lib/brief-embedding.mjs:153` — the floor
+- `scripts/lib/brief-embedding.mjs:240-242` — the call site's deadline math
+- `scripts/lib/brief-dedup.mjs:247-254` — orchestrator embed call
+
+## Acceptance Criteria
+- [ ] Passing `wallClockMs: 10` through a stubbed slow cache (e.g. now() monotonic past deadline) throws `EmbeddingTimeoutError` without invoking `fetch`.
+- [ ] Passing `wallClockMs: 100` with a stub cache that takes 150ms throws `EmbeddingTimeoutError`.
+- [ ] All existing 64 dedup tests still green.
+
+## Work Log
+_Empty — awaiting triage._
+
+## Resources
+- Review commit: `cdd7a124c`
+- Reviewers: performance-oracle, kieran-typescript-reviewer
diff --git a/todos/195-complete-p1-dead-env-getters-in-dedup-consts.md b/todos/195-complete-p1-dead-env-getters-in-dedup-consts.md
new file mode 100644
index 000000000..8dd69d5c3
--- /dev/null
+++ b/todos/195-complete-p1-dead-env-getters-in-dedup-consts.md
@@ -0,0 +1,73 @@
+---
+status: complete
+priority: p1
+issue_id: 195
+tags: [code-review, digest-dedup, phase-a, dead-code, yagni]
+dependencies: []
+---
+
+# Dead env-getter API in brief-dedup-consts.mjs
+
+## Problem Statement
+
+`scripts/lib/brief-dedup-consts.mjs:37-58` exports five env-reader functions:
+- `getMode()`
+- `isRemoteEmbedEnabled()`
+- `isEntityVetoEnabled()`
+- `getCosineThreshold()`
+- `getWallClockMs()`
+
+None of them are imported by any other file (`grep` across `scripts/`, `server/`, `api/`, `tests/` returns zero hits). The orchestrator (`scripts/lib/brief-dedup.mjs:51-70`) reimplements every single one inline in `readOrchestratorConfig`. Flagged convergently by the simplicity + architecture + kieran-typescript reviewers.
+
+Two parallel parsers for the same five env vars is the classic "one will drift" setup. And the consts module's own header comment claims it's "Pure data, no network" — env readers violate that boundary.
+
+## Findings
+- `brief-dedup-consts.mjs:36-58`: 22 LOC of unreachable code.
+- `brief-dedup.mjs:51-70`: `readOrchestratorConfig` is the only real env-reader.
+- No drift risk today (no callers), but guaranteed drift the first time someone adds a knob to one path and forgets the other.
+
+## Proposed Solutions
+
+### Option 1 — delete the dead getters (recommended)
+Remove `getMode`, `isRemoteEmbedEnabled`, `isEntityVetoEnabled`, `getCosineThreshold`, `getWallClockMs` from consts. Leave consts as pure data + the `__constants` bag.
+
+**Pros:** removes 22 LOC of unreachable code; matches the module's stated purpose.
+**Cons:** none.
+**Effort:** Small
+**Risk:** Zero (no callers).
+
+### Option 2 — delete the inline `readOrchestratorConfig`, have it call the getters
+Keep the consts-module getters, wire `readOrchestratorConfig` to compose them.
+
+**Pros:** one env-parse implementation.
+**Cons:** muddies consts; every call now does 5 separate env reads instead of one pass; the module-init vs per-call semantics get surprising.
+**Effort:** Small
+**Risk:** Low
+
+### Option 3 — ship as-is, add a TODO comment
+Flag for Phase B cleanup.
+
+**Pros:** zero churn now.
+**Cons:** ships known dead code.
+**Effort:** Tiny
+**Risk:** Low
+
+## Recommended Action
+_Option 1. Deleting a dead surface that nothing imports is the safest change._
+
+## Technical Details
+- Delete lines 36-58 of `scripts/lib/brief-dedup-consts.mjs`.
+- Keep the static constants + `__constants` bag.
+- No test changes required.
+
+## Acceptance Criteria
+- [ ] `scripts/lib/brief-dedup-consts.mjs` exports only pure constants + the `__constants` bag.
+- [ ] `grep -r "getMode\b\|isRemoteEmbedEnabled\b\|isEntityVetoEnabled\b\|getCosineThreshold\b\|getWallClockMs\b"` finds 0 hits outside the orchestrator's inline body.
+- [ ] All 64 dedup tests still green.
+
+## Work Log
+_Empty — awaiting triage._
+
+## Resources
+- Review commit: `cdd7a124c`
+- Reviewers: code-simplicity-reviewer, architecture-strategist, kieran-typescript-reviewer
diff --git a/todos/196-complete-p2-orchestrator-cleanup-bundle.md b/todos/196-complete-p2-orchestrator-cleanup-bundle.md
new file mode 100644
index 000000000..b090aa2b1
--- /dev/null
+++ b/todos/196-complete-p2-orchestrator-cleanup-bundle.md
@@ -0,0 +1,78 @@
+---
+status: complete
+priority: p2
+issue_id: 196
+tags: [code-review, digest-dedup, phase-a, cleanup, quality]
+dependencies: []
+---
+
+# Orchestrator cleanup bundle (re-exports, materializeCluster, vetoWrapper, double-Jaccard, warn fields, mode validation)
+
+## Problem Statement
+
+Six independent but small `brief-dedup.mjs` orchestrator issues found by the reviewers — grouped because they're all <10 LOC each and one PR makes more sense than six.
+
+### 1. Re-exports create import-graph ambiguity
+`brief-dedup.mjs:312-317` re-exports `deduplicateStoriesJaccard`, `normalizeForEmbedding`, and `CACHE_TTL_SECONDS`. `normalizeForEmbedding` can now be pulled from both `brief-embedding.mjs` AND `brief-dedup.mjs`; the tools correctly pull from the source module but new callers can drift.
+
+### 2. `materializeCluster` duplicates Jaccard's representative logic
+`brief-dedup.mjs:107-117` and `brief-dedup-jaccard.mjs:90-98` independently sort members by `[currentScore DESC, mentionCount DESC]`, sum mentionCount, project `mergedHashes`. Same contract in two places.
+
+### 3. `clusterWithEntityVeto` is a 6-line wrapper used once
+`brief-dedup-embed.mjs:167-179` wraps `completeLinkCluster` with a pre-baked vetoFn. Called from exactly one site (`brief-dedup.mjs:261`). Inlining at the call site removes a function + export + indirection layer.
+
+### 4. Shadow mode runs Jaccard twice
+`brief-dedup.mjs:275` calls `jaccardClusterHashesFor(stories)` to derive cluster-hash arrays for the diff; line 295 calls `jaccard(stories)` again to produce the returned value. Both are Jaccard(stories) in original order — compute once, derive `mergedHashes` from the returned reps, return them directly.
+
+### 5. `warn` log on fallback lacks `err.name` for filtering
+`brief-dedup.mjs:303-306` emits `"[digest] dedup embed path failed, falling back to Jaccard: {msg}"`. Operators can't grep `reason=timeout` vs `reason=provider_5xx`. Add `err.name` / `err.status` to the structured log.
+
+### 6. Invalid `DIGEST_DEDUP_MODE` silently falls to `'jaccard'`
+`brief-dedup.mjs:52-53` — if someone sets `DIGEST_DEDUP_MODE=embbed` (typo), the orchestrator accepts it as jaccard with no warn. Operator expects embed mode, gets Jaccard. Emit a one-shot warn when the raw value is truthy but unrecognised.
+
+## Findings
+All six are convergent findings across kieran-ts, architecture, simplicity, and perf reviewers. Each is a <10 LOC change.
+
+## Proposed Solutions
+
+### Option 1 — single "orchestrator polish" commit covering all 6 (recommended)
+One small diff per subsection; all test-covered by existing suites.
+
+**Pros:** clear unit of work; one review.
+**Cons:** touches 3 files.
+**Effort:** Small
+**Risk:** Low
+
+### Option 2 — split into three commits
+(a) duplication (re-exports + materializeCluster + vetoWrapper); (b) shadow double-Jaccard; (c) observability (warn fields + mode validation).
+
+**Pros:** easier to revert one if something surprises.
+**Cons:** more churn.
+**Effort:** Small
+**Risk:** Low
+
+## Recommended Action
+_To be filled during triage._
+
+## Technical Details
+
+Affected files:
+- `scripts/lib/brief-dedup.mjs` — re-exports, materializeCluster, shadow double-run, warn log, mode validation
+- `scripts/lib/brief-dedup-embed.mjs` — delete `clusterWithEntityVeto`
+- `scripts/lib/brief-dedup-jaccard.mjs` — extract shared representative helper (for #2)
+
+## Acceptance Criteria
+- [ ] `brief-dedup.mjs` exports exactly `deduplicateStories` + `readOrchestratorConfig` + the types; no re-exports.
+- [ ] One shared helper for representative selection; used by both Jaccard fallback and orchestrator materialization.
+- [ ] `clusterWithEntityVeto` is deleted; orchestrator inlines the veto wiring.
+- [ ] Shadow mode runs Jaccard exactly once per tick; disagreement diff is derived from that single run.
+- [ ] Fallback warn line contains `reason={timeout|provider|other}` (or equivalent structured field).
+- [ ] Unrecognised `DIGEST_DEDUP_MODE` value emits `warn` once per cron run with the raw value masked.
+- [ ] All 64 dedup tests pass; add one new test per sub-fix.
+
+## Work Log
+_Empty — awaiting triage._
+
+## Resources
+- Review commit: `cdd7a124c`
+- Reviewers: all 5 (kieran-typescript, security-sentinel, performance-oracle, architecture-strategist, code-simplicity-reviewer)
diff --git a/todos/197-complete-p2-workflow-and-shadow-archive-hardening.md b/todos/197-complete-p2-workflow-and-shadow-archive-hardening.md
new file mode 100644
index 000000000..4a7fb40f3
--- /dev/null
+++ b/todos/197-complete-p2-workflow-and-shadow-archive-hardening.md
@@ -0,0 +1,83 @@
+---
+status: complete
+priority: p2
+issue_id: 197
+tags: [code-review, digest-dedup, phase-a, security, ci]
+dependencies: []
+---
+
+# Nightly workflow + shadow-archive hardening
+
+## Problem Statement
+
+Security-sentinel flagged two hardening items in the new CI workflow + shadow-mode archive — both are defence-in-depth, not exploitable today, but should land before the Phase C flip that actually writes archives.
+
+### 1. `.github/workflows/dedup-golden-pairs.yml:59` — unquoted heredoc
+```yaml
+BODY=$(cat <<EOF
+...
+$(tail -n 80 /tmp/golden-pair-output.txt ...)
+...
+EOF
+)
+```
+The `EOF` delimiter is unquoted, so shell command substitutions inside the heredoc are expanded before `gh issue create` receives them. Today the fixture is repo-controlled, but the plan explicitly says "append real-wire examples" at calibration time — a crafted headline containing `$(...)` / backticks could run as the runner. Easy fix: quote the delimiter as `<<'EOF'`.
+
+### 2. Shadow archive stores `normalizedTitles` alongside `storyIds` under a predictable key
+`brief-dedup.mjs:170-180` writes `brief:dedup:shadow:v1:<ISO>:<8-char contentHash>`. ISO timestamps + cadence make the prefix enumerable; story IDs are safe hashes but `normalizedTitles` is plaintext wire. Per data classification titles are public, so there's no secret leak — but two hardening moves would make the archive more defensible:
+- Drop `normalizedTitles` from the archive; have `shadow-sample.mjs` re-read from `story:track:v1:<hash>` at draw time.
+- Document the `brief:dedup:shadow:v1:*` prefix in the Upstash access-controls runbook so future changes know it holds wire titles.
+
+### 3. `shadow-sample.mjs:53` — no command allowlist on the Upstash helper
+The helper accepts `(command, ...params)` with `encodeURIComponent` on each segment. Safe today because only the tool calls it with `SCAN`/`GET`. If a future caller passes user input into `params`, the percent-encoding still protects path structure — but a dangerous command (`FLUSHDB`, `DEL`) could land. Hard-allowlist the `command` to `['SCAN', 'GET', 'EXISTS']`.
+
+## Findings
+All three are reviewer-flagged defensive hardening. None affect the Phase A no-op ship path.
+
+## Proposed Solutions
+
+### Option 1 — minimum fix (recommended)
+- Quote the heredoc delimiter in the workflow (1 char).
+- Add command allowlist in `shadow-sample.mjs` (6 lines).
+- Leave `normalizedTitles` in the archive; add a comment documenting the wire-text classification.
+
+**Pros:** small, focused, safe; defers the bigger archive-shape change.
+**Cons:** doesn't eliminate the wire-text enumeration concern, only documents it.
+**Effort:** Small
+**Risk:** Low
+
+### Option 2 — full hardening
+All three items above.
+
+**Pros:** maximal defence.
+**Cons:** archive-shape change touches `shadow-sample.mjs` (title lookup) and the orchestrator (drop field); slightly bigger diff.
+**Effort:** Small-Medium
+**Risk:** Low
+
+### Option 3 — defer to Phase C
+Ship Phase A as-is; harden before flipping to `shadow` mode.
+
+**Pros:** zero Phase A churn.
+**Cons:** the heredoc is a weak-positive attack surface; the workflow could run before Phase C if the manual `workflow_dispatch` is triggered.
+**Effort:** zero now
+**Risk:** Low-Medium
+
+## Recommended Action
+_To be filled during triage._
+
+## Technical Details
+- `.github/workflows/dedup-golden-pairs.yml:59`
+- `scripts/lib/brief-dedup.mjs:170-180` (archive shape)
+- `scripts/tools/shadow-sample.mjs:53` (command allowlist)
+
+## Acceptance Criteria
+- [ ] Heredoc delimiter is `<<'EOF'` (quoted).
+- [ ] `shadow-sample.mjs`'s Upstash helper rejects any command not in a hardcoded allowlist.
+- [ ] (Optional) Shadow archive no longer stores plaintext titles; sampler rehydrates from `story:track:v1:*`.
+
+## Work Log
+_Empty — awaiting triage._
+
+## Resources
+- Review commit: `cdd7a124c`
+- Reviewer: security-sentinel
diff --git a/todos/198-complete-p2-test-and-observability-polish.md b/todos/198-complete-p2-test-and-observability-polish.md
new file mode 100644
index 000000000..e344613ce
--- /dev/null
+++ b/todos/198-complete-p2-test-and-observability-polish.md
@@ -0,0 +1,66 @@
+---
+status: complete
+priority: p2
+issue_id: 198
+tags: [code-review, digest-dedup, phase-a, tests, observability]
+dependencies: []
+---
+
+# Test + observability polish on the Phase A embed path
+
+## Problem Statement
+
+Two small quality items the reviewers flagged.
+
+### 1. Embedding test Scenario 2 only asserts length, not content
+`tests/brief-dedup-embedding.test.mjs:164` — `assert.equal(out.length, expected.length)`. A regression that changed Jaccard's merging shape but kept cluster count constant would slip through. The equivalent parity test in `tests/brief-dedup-jaccard.test.mjs:214-220` does a deep per-cluster compare (hash, mergedHashes, mentionCount). Match that pattern in the timeout/outage fallback scenarios.
+
+### 2. Missing API-key path logs an exception every cron tick
+If `DIGEST_DEDUP_MODE=embed` is set but `OPENROUTER_API_KEY` is empty (rotated, forgotten, or misconfigured after a Railway env edit), the orchestrator catches the `EmbeddingProviderError('OPENROUTER_API_KEY not configured')` and warns. That's correct behaviour for the all-or-nothing fallback, but it emits the same warn every tick — noisy in Sentry/log search.
+
+Two options:
+- Add a pre-flight in `readOrchestratorConfig` that checks the key and short-circuits to jaccard with a ONE-SHOT warn (per process).
+- Add `reason=no_api_key` to the warn line so it's filterable.
+
+Option (a) is lower-volume, option (b) is easier to implement and scoped to the overlapping P2 #196 subsection 5 ("warn line missing err.name"). Pick one.
+
+## Findings
+Both items surfaced from the kieran-ts reviewer.
+
+## Proposed Solutions
+
+### Option 1 — tighten both (recommended)
+- Deep-equal the fallback clusters in Scenario 2 + Scenario 3 (same 6-line pattern as jaccard orchestrator test).
+- Add a `reason=` field to the fallback warn (dovetails with #196.5). Skip the one-shot warn; the structured field is enough.
+
+**Pros:** small, covered by existing harness.
+**Cons:** none.
+**Effort:** Small
+**Risk:** Low
+
+### Option 2 — one-shot warn per process
+Module-scope `_apiKeyWarned` flag; log only on first miss.
+
+**Pros:** quiet in log.
+**Cons:** cross-process: Railway cron each tick forks a new node process, so every cron tick IS a first miss — the flag doesn't help. Conclusion: do NOT implement this; rely on `reason=` labels instead.
+**Effort:** trivial
+**Risk:** wasted — the flag doesn't persist across cron tick lifetimes.
+
+## Recommended Action
+_Option 1._
+
+## Technical Details
+- `tests/brief-dedup-embedding.test.mjs:164` (Scenario 2)
+- `tests/brief-dedup-embedding.test.mjs:196` (Scenario 3)
+- `scripts/lib/brief-dedup.mjs:303-306` (warn log)
+
+## Acceptance Criteria
+- [ ] Scenarios 2 and 3 deep-equal the output clusters against the Jaccard expected shape (hash, mergedHashes, mentionCount).
+- [ ] Fallback warn line includes a filterable `reason=` field.
+
+## Work Log
+_Empty — awaiting triage._
+
+## Resources
+- Review commit: `cdd7a124c`
+- Reviewer: kieran-typescript-reviewer
diff --git a/todos/199-complete-p3-phase-a-nits-bundle.md b/todos/199-complete-p3-phase-a-nits-bundle.md
new file mode 100644
index 000000000..a630eda87
--- /dev/null
+++ b/todos/199-complete-p3-phase-a-nits-bundle.md
@@ -0,0 +1,84 @@
+---
+status: complete
+priority: p3
+issue_id: 199
+tags: [code-review, digest-dedup, phase-a, nits]
+dependencies: []
+---
+
+# Phase A nits bundle (constants bag, mocked-golden overlap, apiKey dep rename, tools/ convention, gazetteer JSON, unique-hash precondition)
+
+## Problem Statement
+
+Six P3 items across the reviewers. None of them block merge; several are "consider in Phase B/C".
+
+### 1. `__constants` test-bag is redundant
+`scripts/lib/brief-dedup-consts.mjs:64-74` exposes a frozen object used by `tests/brief-dedup-jaccard.test.mjs:27,33,37` to read three constants. A direct named import is identical and shorter. The bag's original purpose ("replace the regex-extraction harness") is already served by normal imports.
+
+### 2. `brief-dedup-golden.test.mjs` (138 LOC) largely mirrors scenarios in `brief-dedup-embedding.test.mjs`
+The mocked-embedder path tests with crafted vectors are structurally forced — they're testing the mock. The real value of "golden pairs" is the LIVE-embedder nightly workflow + `tests/fixtures/brief-dedup-golden-pairs.json`. Consider removing the mocked test file; keep the fixture + workflow.
+
+### 3. `deps.apiKey` override in `brief-embedding.mjs:228` shares the shape with prod wiring
+A future caller spreading unvalidated user data into `deps` could inject an attacker-controlled API key that goes to `openrouter.ai` in the `Authorization: Bearer` header. Currently unreachable. Rename to `deps._apiKey` or gate on a test-only `Symbol` to make misuse noisy.
+
+### 4. `scripts/tools/` is a new top-level directory
+No prior precedent in the repo; ops scripts have historically been `scripts/backfill-*.mjs`, `scripts/benchmark-*.mjs` at top level. A `tools/` subdir is defensible for these three related utilities but worth documenting in `AGENTS.md` ("anything not on the cron lives in tools/").
+
+### 5. `entity-gazetteer.mjs` as two inline `Set`s
+Fine for v1. For auditability and NER-pluggability consider promoting `LOCATION_GAZETTEER` / `COMMON_CAPITALIZED` to `scripts/shared/location-gazetteer.json` with a tiny loader. Non-code reviewers can diff entries; easier to publish to `shared/` if a future CJS consumer needs it.
+
+### 6. `diffClustersByHash` assumes unique story hashes
+`brief-dedup.mjs:126-148` builds hash→cluster-id maps. If two stories share a hash, the second silently overwrites the first. Safe today (upstream dedup guarantees uniqueness) but a JSDoc `@pre stories have unique .hash` would future-proof.
+
+## Findings
+All six are convergent or single-reviewer items. Each is independent.
+
+## Proposed Solutions
+
+### Option 1 — address as Phase B/C prep work (recommended)
+Do #1 and #3 now (tiny renames / deletions), defer #2 and #5 until Phase B/C validates the feature is shipping, document #4 in AGENTS.md during the next cleanup pass, add #6 as a one-line JSDoc.
+
+**Pros:** reflects the risk profile; doesn't churn Phase A for cosmetic wins.
+**Cons:** leaves some nits on `main`.
+**Effort:** Small for the now-items; negligible otherwise.
+**Risk:** Low
+
+### Option 2 — do everything now
+Single "Phase A nits" commit.
+
+**Pros:** closes all the follow-ups.
+**Cons:** #2 and #5 touch enough surface that they should have their own PR.
+**Effort:** Medium
+**Risk:** Low
+
+### Option 3 — defer all
+Track for Phase B.
+
+**Pros:** zero churn.
+**Cons:** accumulates debt.
+**Effort:** zero
+**Risk:** Low
+
+## Recommended Action
+_To be filled during triage._
+
+## Technical Details
+- `scripts/lib/brief-dedup-consts.mjs:64-74` (#1)
+- `tests/brief-dedup-golden.test.mjs` (#2, whole file)
+- `scripts/lib/brief-embedding.mjs:228` (#3)
+- `AGENTS.md` + `scripts/tools/*` (#4)
+- `scripts/lib/entity-gazetteer.mjs` → `scripts/shared/location-gazetteer.json` (#5)
+- `scripts/lib/brief-dedup.mjs:126-148` (#6 — one-line JSDoc)
+
+## Acceptance Criteria
+- [ ] (#1) `__constants` bag removed; jaccard test imports constants directly.
+- [ ] (#3) `deps.apiKey` renamed to a less spreadable name.
+- [ ] (#6) `diffClustersByHash` has an `@pre` JSDoc about unique hashes.
+- [ ] (#2 / #4 / #5) captured as Phase B/C todos or deferred with rationale.
+
+## Work Log
+_Empty — awaiting triage._
+
+## Resources
+- Review commit: `cdd7a124c`
+- Reviewers: code-simplicity, security-sentinel, architecture-strategist, kieran-typescript