feat(digest): brief lead parity log + extra acceptance tests

Adds the parity-contract observability line and supplementary
acceptance tests for the canonical synthesis path.

Parity log (per send, after successful delivery):
  [digest] brief lead parity user=<id> rule=<v>:<s>:<lang>
    synthesis_level=<1|2|3> exec_len=<n> brief_lead_len=<n>
    channels_equal=<bool> public_lead_len=<n>

When channels_equal=false an extra WARN line fires —
"PARITY REGRESSION user=… — email lead != envelope lead." Sentry's
existing console-breadcrumb hook lifts this without an explicit
captureMessage call. Plan acceptance criterion A5.

Tests added (tests/brief-llm.test.mjs, +9):
- generateDigestProsePublic: two distinct callers with identical
  (sensitivity, story-pool) hit the SAME cache row (per Codex
  Round-2 Medium #4 — "no PII in public cache key").
- public + private writes never collide on cache key (defensive).
- greeting bucket change re-keys the personalised cache (Brain B
  parity).
- profile change re-keys the personalised cache.
- v3 cache prefix used (no v2 writes).

Test results: 77/77 in brief-llm; full data suite 6971/6971
(was 6962 pre-Step-7; +9 new public-cache tests).

Plan: docs/plans/2026-04-25-002-fix-brief-email-two-brain-divergence-plan.md
Steps 6 (partial) + 7. Acceptance A5, A6.g, A6.f.
This commit is contained in:
Elie Habib
2026-04-25 14:24:07 +04:00
parent 51ba2ca4dc
commit 6ff2c3d9e4
2 changed files with 128 additions and 0 deletions

View File

@@ -1745,6 +1745,35 @@ async function main() {
console.log(
`[digest] Sent ${stories.length} stories to ${rule.userId} (${rule.variant}, ${rule.digestMode})`,
);
// Parity contract observability — the email's exec block string,
// the magazine's digest.lead, the channel-body lead, and the
// webhook's `summary` field MUST all be the same string. Plan
// acceptance criterion A5. Log on every send so ops can grep for
// `channels_equal=false` in Railway logs without manually opening
// the email + the magazine to compare.
const envLead = brief?.envelope?.data?.digest?.lead ?? '';
const channelsEqual = briefLead === envLead;
const publicLead = brief?.envelope?.data?.digest?.publicLead ?? '';
console.log(
`[digest] brief lead parity user=${rule.userId} ` +
`rule=${rule.variant ?? 'full'}:${rule.sensitivity ?? 'high'}:${rule.lang ?? 'en'} ` +
`synthesis_level=${synthesisLevel} ` +
`exec_len=${(briefLead ?? '').length} ` +
`brief_lead_len=${envLead.length} ` +
`channels_equal=${channelsEqual} ` +
`public_lead_len=${publicLead.length}`,
);
if (!channelsEqual) {
// Sentry alert candidate — channels_equal=false means the
// canonical-synthesis contract has regressed. Logged loudly so
// ops + a Sentry transport on stderr surfaces it without
// requiring an explicit captureMessage call from this script
// (Sentry's console-breadcrumb hook lifts WARN/ERROR lines).
console.warn(
`[digest] PARITY REGRESSION user=${rule.userId} — email lead != envelope lead. ` +
`Investigate: same compose tick, channels read from different sources?`,
);
}
}
}

View File

@@ -20,6 +20,7 @@ import {
parseDigestProse,
validateDigestProseShape,
generateDigestProse,
generateDigestProsePublic,
enrichBriefEnvelopeWithLLM,
buildStoryDescriptionPrompt,
parseStoryDescription,
@@ -542,6 +543,104 @@ describe('validateDigestProseShape', () => {
});
});
// ── generateDigestProsePublic + cache-key independence (Codex Round-2 #4) ──
describe('generateDigestProsePublic — public cache shared across users', () => {
const stories = [story(), story({ headline: 'Second', country: 'PS' })];
const validJson = JSON.stringify({
lead: 'A non-personalised editorial lead generated for the share-URL surface, free of profile context.',
threads: [{ tag: 'Energy', teaser: 'Hormuz tensions resurface today.' }],
signals: ['Watch for naval redeployment in the Gulf.'],
});
it('two distinct callers with identical (sensitivity, story-pool) hit the SAME cache row', async () => {
// The whole point of generateDigestProsePublic: when the share
// URL is opened by 1000 different anonymous readers, only the
// first call hits the LLM. Every subsequent call serves the
// same cached output. (Internally: hashDigestInput substitutes
// 'public' for userId when ctx.isPublic === true.)
const cache = makeCache();
const llm1 = makeLLM(validJson);
await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm1.callLLM });
assert.equal(llm1.calls.length, 1);
// Second call — different "user" context (the wrapper takes no
// userId, so this is just a second invocation), same pool.
// Should hit cache, NOT re-LLM.
const llm2 = makeLLM(() => { throw new Error('would not be called'); });
const out = await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm2.callLLM });
assert.ok(out);
assert.equal(llm2.calls.length, 0, 'public cache shared across calls — no per-user inflation');
});
it('does NOT collide with the personalised cache for the same story pool', async () => {
// Defensive: a private call (with profile/greeting/userId) and a
// public call must produce DIFFERENT cache keys. Otherwise a
// private call could poison the public cache row (or vice versa).
const cache = makeCache();
const llm = makeLLM(validJson);
await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm.callLLM });
const publicKeys = [...cache.store.keys()];
await generateDigestProse('user_xyz', stories, 'critical',
{ ...cache, callLLM: llm.callLLM },
{ profile: 'Watching: oil', greeting: 'Good morning', isPublic: false },
);
const privateKeys = [...cache.store.keys()].filter((k) => !publicKeys.includes(k));
assert.equal(publicKeys.length, 1, 'one public cache row');
assert.equal(privateKeys.length, 1, 'private call writes its own row');
assert.notEqual(publicKeys[0], privateKeys[0], 'public + private rows must use distinct keys');
// Public key contains literal "public:" segment — userId substitution
assert.match(publicKeys[0], /:public:/);
// Private key contains the userId
assert.match(privateKeys[0], /:user_xyz:/);
});
it('greeting changes invalidate the personalised cache (per Brain B parity)', async () => {
// Brain B's old cache (digest:ai-summary:v1) included greeting in
// the key — morning prose differed from afternoon prose. The
// canonical synthesis preserves that semantic via greetingBucket.
const cache = makeCache();
const llm1 = makeLLM(validJson);
await generateDigestProse('user_a', stories, 'all',
{ ...cache, callLLM: llm1.callLLM },
{ greeting: 'Good morning', isPublic: false },
);
const llm2 = makeLLM(validJson);
await generateDigestProse('user_a', stories, 'all',
{ ...cache, callLLM: llm2.callLLM },
{ greeting: 'Good evening', isPublic: false },
);
assert.equal(llm2.calls.length, 1, 'greeting bucket change re-keys the cache');
});
it('profile changes invalidate the personalised cache', async () => {
const cache = makeCache();
const llm1 = makeLLM(validJson);
await generateDigestProse('user_a', stories, 'all',
{ ...cache, callLLM: llm1.callLLM },
{ profile: 'Watching: oil', isPublic: false },
);
const llm2 = makeLLM(validJson);
await generateDigestProse('user_a', stories, 'all',
{ ...cache, callLLM: llm2.callLLM },
{ profile: 'Watching: gas', isPublic: false },
);
assert.equal(llm2.calls.length, 1, 'profile change re-keys the cache');
});
it('writes to cache under brief:llm:digest:v3 prefix (not v2)', async () => {
const cache = makeCache();
const llm = makeLLM(validJson);
await generateDigestProse('user_a', stories, 'all', { ...cache, callLLM: llm.callLLM });
const keys = [...cache.store.keys()];
assert.ok(keys.some((k) => k.startsWith('brief:llm:digest:v3:')), 'v3 prefix used');
assert.ok(!keys.some((k) => k.startsWith('brief:llm:digest:v2:')), 'no v2 writes');
});
});
describe('buildStoryDescriptionPrompt', () => {
it('includes all story fields, distinct from whyMatters instruction', () => {
const { system, user } = buildStoryDescriptionPrompt(story());