mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
feat(digest): brief lead parity log + extra acceptance tests
Adds the parity-contract observability line and supplementary
acceptance tests for the canonical synthesis path.
Parity log (per send, after successful delivery):
[digest] brief lead parity user=<id> rule=<v>:<s>:<lang>
synthesis_level=<1|2|3> exec_len=<n> brief_lead_len=<n>
channels_equal=<bool> public_lead_len=<n>
When channels_equal=false an extra WARN line fires —
"PARITY REGRESSION user=… — email lead != envelope lead." Sentry's
existing console-breadcrumb hook lifts this without an explicit
captureMessage call. Plan acceptance criterion A5.
Tests added (tests/brief-llm.test.mjs, +9):
- generateDigestProsePublic: two distinct callers with identical
(sensitivity, story-pool) hit the SAME cache row (per Codex
Round-2 Medium #4 — "no PII in public cache key").
- public + private writes never collide on cache key (defensive).
- greeting bucket change re-keys the personalised cache (Brain B
parity).
- profile change re-keys the personalised cache.
- v3 cache prefix used (no v2 writes).
Test results: 77/77 in brief-llm; full data suite 6971/6971
(was 6962 pre-Step-7; +9 new public-cache tests).
Plan: docs/plans/2026-04-25-002-fix-brief-email-two-brain-divergence-plan.md
Steps 6 (partial) + 7. Acceptance A5, A6.g, A6.f.
This commit is contained in:
@@ -1745,6 +1745,35 @@ async function main() {
|
||||
console.log(
|
||||
`[digest] Sent ${stories.length} stories to ${rule.userId} (${rule.variant}, ${rule.digestMode})`,
|
||||
);
|
||||
// Parity contract observability — the email's exec block string,
|
||||
// the magazine's digest.lead, the channel-body lead, and the
|
||||
// webhook's `summary` field MUST all be the same string. Plan
|
||||
// acceptance criterion A5. Log on every send so ops can grep for
|
||||
// `channels_equal=false` in Railway logs without manually opening
|
||||
// the email + the magazine to compare.
|
||||
const envLead = brief?.envelope?.data?.digest?.lead ?? '';
|
||||
const channelsEqual = briefLead === envLead;
|
||||
const publicLead = brief?.envelope?.data?.digest?.publicLead ?? '';
|
||||
console.log(
|
||||
`[digest] brief lead parity user=${rule.userId} ` +
|
||||
`rule=${rule.variant ?? 'full'}:${rule.sensitivity ?? 'high'}:${rule.lang ?? 'en'} ` +
|
||||
`synthesis_level=${synthesisLevel} ` +
|
||||
`exec_len=${(briefLead ?? '').length} ` +
|
||||
`brief_lead_len=${envLead.length} ` +
|
||||
`channels_equal=${channelsEqual} ` +
|
||||
`public_lead_len=${publicLead.length}`,
|
||||
);
|
||||
if (!channelsEqual) {
|
||||
// Sentry alert candidate — channels_equal=false means the
|
||||
// canonical-synthesis contract has regressed. Logged loudly so
|
||||
// ops + a Sentry transport on stderr surfaces it without
|
||||
// requiring an explicit captureMessage call from this script
|
||||
// (Sentry's console-breadcrumb hook lifts WARN/ERROR lines).
|
||||
console.warn(
|
||||
`[digest] PARITY REGRESSION user=${rule.userId} — email lead != envelope lead. ` +
|
||||
`Investigate: same compose tick, channels read from different sources?`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ import {
|
||||
parseDigestProse,
|
||||
validateDigestProseShape,
|
||||
generateDigestProse,
|
||||
generateDigestProsePublic,
|
||||
enrichBriefEnvelopeWithLLM,
|
||||
buildStoryDescriptionPrompt,
|
||||
parseStoryDescription,
|
||||
@@ -542,6 +543,104 @@ describe('validateDigestProseShape', () => {
|
||||
});
|
||||
});
|
||||
|
||||
// ── generateDigestProsePublic + cache-key independence (Codex Round-2 #4) ──
|
||||
|
||||
describe('generateDigestProsePublic — public cache shared across users', () => {
|
||||
const stories = [story(), story({ headline: 'Second', country: 'PS' })];
|
||||
const validJson = JSON.stringify({
|
||||
lead: 'A non-personalised editorial lead generated for the share-URL surface, free of profile context.',
|
||||
threads: [{ tag: 'Energy', teaser: 'Hormuz tensions resurface today.' }],
|
||||
signals: ['Watch for naval redeployment in the Gulf.'],
|
||||
});
|
||||
|
||||
it('two distinct callers with identical (sensitivity, story-pool) hit the SAME cache row', async () => {
|
||||
// The whole point of generateDigestProsePublic: when the share
|
||||
// URL is opened by 1000 different anonymous readers, only the
|
||||
// first call hits the LLM. Every subsequent call serves the
|
||||
// same cached output. (Internally: hashDigestInput substitutes
|
||||
// 'public' for userId when ctx.isPublic === true.)
|
||||
const cache = makeCache();
|
||||
const llm1 = makeLLM(validJson);
|
||||
await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm1.callLLM });
|
||||
assert.equal(llm1.calls.length, 1);
|
||||
|
||||
// Second call — different "user" context (the wrapper takes no
|
||||
// userId, so this is just a second invocation), same pool.
|
||||
// Should hit cache, NOT re-LLM.
|
||||
const llm2 = makeLLM(() => { throw new Error('would not be called'); });
|
||||
const out = await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm2.callLLM });
|
||||
assert.ok(out);
|
||||
assert.equal(llm2.calls.length, 0, 'public cache shared across calls — no per-user inflation');
|
||||
});
|
||||
|
||||
it('does NOT collide with the personalised cache for the same story pool', async () => {
|
||||
// Defensive: a private call (with profile/greeting/userId) and a
|
||||
// public call must produce DIFFERENT cache keys. Otherwise a
|
||||
// private call could poison the public cache row (or vice versa).
|
||||
const cache = makeCache();
|
||||
const llm = makeLLM(validJson);
|
||||
|
||||
await generateDigestProsePublic(stories, 'critical', { ...cache, callLLM: llm.callLLM });
|
||||
const publicKeys = [...cache.store.keys()];
|
||||
|
||||
await generateDigestProse('user_xyz', stories, 'critical',
|
||||
{ ...cache, callLLM: llm.callLLM },
|
||||
{ profile: 'Watching: oil', greeting: 'Good morning', isPublic: false },
|
||||
);
|
||||
const privateKeys = [...cache.store.keys()].filter((k) => !publicKeys.includes(k));
|
||||
|
||||
assert.equal(publicKeys.length, 1, 'one public cache row');
|
||||
assert.equal(privateKeys.length, 1, 'private call writes its own row');
|
||||
assert.notEqual(publicKeys[0], privateKeys[0], 'public + private rows must use distinct keys');
|
||||
// Public key contains literal "public:" segment — userId substitution
|
||||
assert.match(publicKeys[0], /:public:/);
|
||||
// Private key contains the userId
|
||||
assert.match(privateKeys[0], /:user_xyz:/);
|
||||
});
|
||||
|
||||
it('greeting changes invalidate the personalised cache (per Brain B parity)', async () => {
|
||||
// Brain B's old cache (digest:ai-summary:v1) included greeting in
|
||||
// the key — morning prose differed from afternoon prose. The
|
||||
// canonical synthesis preserves that semantic via greetingBucket.
|
||||
const cache = makeCache();
|
||||
const llm1 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm1.callLLM },
|
||||
{ greeting: 'Good morning', isPublic: false },
|
||||
);
|
||||
const llm2 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm2.callLLM },
|
||||
{ greeting: 'Good evening', isPublic: false },
|
||||
);
|
||||
assert.equal(llm2.calls.length, 1, 'greeting bucket change re-keys the cache');
|
||||
});
|
||||
|
||||
it('profile changes invalidate the personalised cache', async () => {
|
||||
const cache = makeCache();
|
||||
const llm1 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm1.callLLM },
|
||||
{ profile: 'Watching: oil', isPublic: false },
|
||||
);
|
||||
const llm2 = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all',
|
||||
{ ...cache, callLLM: llm2.callLLM },
|
||||
{ profile: 'Watching: gas', isPublic: false },
|
||||
);
|
||||
assert.equal(llm2.calls.length, 1, 'profile change re-keys the cache');
|
||||
});
|
||||
|
||||
it('writes to cache under brief:llm:digest:v3 prefix (not v2)', async () => {
|
||||
const cache = makeCache();
|
||||
const llm = makeLLM(validJson);
|
||||
await generateDigestProse('user_a', stories, 'all', { ...cache, callLLM: llm.callLLM });
|
||||
const keys = [...cache.store.keys()];
|
||||
assert.ok(keys.some((k) => k.startsWith('brief:llm:digest:v3:')), 'v3 prefix used');
|
||||
assert.ok(!keys.some((k) => k.startsWith('brief:llm:digest:v2:')), 'no v2 writes');
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildStoryDescriptionPrompt', () => {
|
||||
it('includes all story fields, distinct from whyMatters instruction', () => {
|
||||
const { system, user } = buildStoryDescriptionPrompt(story());
|
||||
|
||||
Reference in New Issue
Block a user