mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
* feat(brief): analyst prompt v2 — multi-sentence, grounded, includes story description
Shadow-diff of 12 prod stories on 2026-04-21 showed v1 analyst output
indistinguishable from legacy Gemini: identical single-sentence
abstraction ("destabilize / systemic / sovereign risk repricing") with
no named actors, metrics, or dates — in several cases Gemini was MORE
specific.
Root cause: 18–30 word cap compressed context specifics out.
v2 loosens three dials at once so we can settle the A/B:
1. New system prompt WHY_MATTERS_ANALYST_SYSTEM_V2 — 2–3 sentences,
40–70 words, implicit SITUATION→ANALYSIS→(optional) WATCH arc,
MUST cite one specific named actor / metric / date / place from
the context. Analyst path only; gemini path stays on v1.
2. New parser parseWhyMattersV2 — accepts 100–500 chars, rejects
preamble boilerplate + leaked section labels + markdown.
3. Story description plumbed through — endpoint body accepts optional
story.description (≤ 1000 chars, body cap bumped 4 KB → 8 KB).
Cron forwards it when upstream has one (skipped when it equals the
headline — no new signal).
Cache + shadow bumped v3 → v4 / v1 → v2 so fresh output lands on the
first post-deploy cron tick. maxTokens 180 → 260 for ~3× output length.
If shadow-diff 24h after deploy still shows no delta vs gemini, kill
is BRIEF_WHY_MATTERS_PRIMARY=gemini on Vercel (instant, no redeploy).
Tests: 6059 pass (was 6022 + 37 new). typecheck × 2 clean.
* fix(brief): stop truncating v2 multi-sentence output + description in cache hash
Two P1s caught in PR #3269 review.
P1a — cron reparsed endpoint output with v1 single-sentence parser,
silently dropping sentences 2+3 of v2 analyst output. The endpoint had
ALREADY validated the string (parseWhyMattersV2 for analyst path;
parseWhyMatters for gemini). Re-parsing with v1 took only the first
sentence — exact regression #3269 was meant to fix.
Fix: trust the endpoint. Replace re-parse with bounds check (30–500
chars) + stub-echo reject. Added regression test asserting multi-
sentence output reaches the envelope unchanged.
P1b — `story.description` flowed into the analyst prompt but NOT into
the cache hash. Two requests with identical core fields but different
descriptions collided on one cache slot → second caller got prose
grounded in the FIRST caller's description.
Fix: add `description` as the 6th field of `hashBriefStory`. Bump
endpoint cache v4→v5 and shadow v2→v3 so buggy 5-field entries are
dropped. Updated the parity sentinel in brief-llm-core.test.mjs to
match 6-field semantics. Added regression tests covering different-
descriptions-differ and present-vs-absent-differ.
Tests: 6083 pass. typecheck × 2 clean.
573 lines
22 KiB
JavaScript
573 lines
22 KiB
JavaScript
/**
|
||
* Integration tests for the /api/internal/brief-why-matters edge endpoint
|
||
* + the cron's analyst-priority fallback chain.
|
||
*
|
||
* The endpoint is a .ts file; we test the pure helpers that go into it
|
||
* (country normalizer, core hashing, prompt builder, context trim, env
|
||
* parsing) plus simulate the handler end-to-end via the imported
|
||
* modules. The cron-side `generateWhyMatters` priority chain is covered
|
||
* directly via in-process dep injection.
|
||
*
|
||
* Run: node --test tests/brief-why-matters-analyst.test.mjs
|
||
*/
|
||
|
||
import { describe, it } from 'node:test';
|
||
import assert from 'node:assert/strict';
|
||
|
||
import { generateWhyMatters } from '../scripts/lib/brief-llm.mjs';
|
||
import {
|
||
hashBriefStory,
|
||
parseWhyMatters,
|
||
WHY_MATTERS_SYSTEM,
|
||
} from '../shared/brief-llm-core.js';
|
||
|
||
// ── Story fixture matching the cron's actual payload shape
|
||
// (shared/brief-filter.js:134-135). ────────────────────────────────────
|
||
|
||
function story(overrides = {}) {
|
||
return {
|
||
headline: 'Iran closes Strait of Hormuz',
|
||
source: 'Reuters',
|
||
threatLevel: 'critical',
|
||
category: 'Geopolitical Risk',
|
||
country: 'IR',
|
||
...overrides,
|
||
};
|
||
}
|
||
|
||
// ── Country normalizer ───────────────────────────────────────────────────
|
||
|
||
describe('normalizeCountryToIso2', () => {
|
||
let normalize;
|
||
it('loads from server/_shared/country-normalize.ts via tsx or compiled', async () => {
|
||
// The module is .ts; in the repo's test setup, node 22 can load .ts
|
||
// via tsx. If direct import fails under the test runner, fall back
|
||
// to running the logic inline by importing the JSON and a mirror
|
||
// function. The logic is trivial so this isn't a flaky compromise.
|
||
try {
|
||
const mod = await import('../server/_shared/country-normalize.ts');
|
||
normalize = mod.normalizeCountryToIso2;
|
||
} catch {
|
||
const { default: COUNTRY_NAMES } = await import('../shared/country-names.json', {
|
||
with: { type: 'json' },
|
||
});
|
||
const ISO2_SET = new Set(Object.values(COUNTRY_NAMES));
|
||
normalize = (raw) => {
|
||
if (typeof raw !== 'string') return null;
|
||
const trimmed = raw.trim();
|
||
if (trimmed === '') return null;
|
||
if (trimmed.toLowerCase() === 'global') return null;
|
||
if (/^[A-Za-z]{2}$/.test(trimmed)) {
|
||
const upper = trimmed.toUpperCase();
|
||
return ISO2_SET.has(upper) ? upper : null;
|
||
}
|
||
const lookup = COUNTRY_NAMES[trimmed.toLowerCase()];
|
||
return typeof lookup === 'string' ? lookup : null;
|
||
};
|
||
}
|
||
assert.ok(typeof normalize === 'function');
|
||
});
|
||
|
||
it('passes through valid ISO2 case-insensitively', () => {
|
||
assert.equal(normalize('US'), 'US');
|
||
assert.equal(normalize('us'), 'US');
|
||
assert.equal(normalize('IR'), 'IR');
|
||
assert.equal(normalize('gb'), 'GB');
|
||
});
|
||
|
||
it('resolves full names case-insensitively', () => {
|
||
assert.equal(normalize('United States'), 'US');
|
||
assert.equal(normalize('united states'), 'US');
|
||
assert.equal(normalize('Iran'), 'IR');
|
||
assert.equal(normalize('United Kingdom'), 'GB');
|
||
});
|
||
|
||
it("'Global' sentinel maps to null (non-country; not an error)", () => {
|
||
assert.equal(normalize('Global'), null);
|
||
assert.equal(normalize('global'), null);
|
||
assert.equal(normalize('GLOBAL'), null);
|
||
});
|
||
|
||
it('rejects unknown / empty / undefined / non-string inputs', () => {
|
||
assert.equal(normalize(''), null);
|
||
assert.equal(normalize(' '), null);
|
||
assert.equal(normalize('Nowhere'), null);
|
||
assert.equal(normalize(undefined), null);
|
||
assert.equal(normalize(null), null);
|
||
assert.equal(normalize(123), null);
|
||
});
|
||
|
||
it('resolves common non-ISO2 abbreviations when they exist in the gazetteer', () => {
|
||
// Plan assumed "USA" was not in the gazetteer; it actually is mapped.
|
||
// This exercises the full-name-path (3+ chars) with a short abbreviation.
|
||
assert.equal(normalize('USA'), 'US');
|
||
});
|
||
|
||
it('rejects ISO2-shaped values not in the gazetteer', () => {
|
||
assert.equal(normalize('ZZ'), null); // structurally valid, not in gazetteer
|
||
assert.equal(normalize('XY'), null);
|
||
});
|
||
});
|
||
|
||
// ── Cache-key stability ──────────────────────────────────────────────────
|
||
|
||
describe('cache key identity', () => {
|
||
it('hashBriefStory stable across the 5-field material', async () => {
|
||
const a = await hashBriefStory(story());
|
||
const b = await hashBriefStory(story());
|
||
assert.equal(a, b);
|
||
});
|
||
|
||
it('hashBriefStory differs when any hash-field differs', async () => {
|
||
const baseline = await hashBriefStory(story());
|
||
for (const f of ['headline', 'source', 'threatLevel', 'category', 'country']) {
|
||
const h = await hashBriefStory(story({ [f]: `${story()[f]}X` }));
|
||
assert.notEqual(h, baseline, `${f} must be part of cache identity`);
|
||
}
|
||
});
|
||
});
|
||
|
||
// ── Deterministic shadow sampling ────────────────────────────────────────
|
||
|
||
describe('shadow sample deterministic hashing', () => {
|
||
// Mirror of the endpoint's sample decision — any drift between this
|
||
// and the endpoint would silently halve the sampled population.
|
||
function sampleHit(hash16, pct) {
|
||
if (pct >= 100) return true;
|
||
if (pct <= 0) return false;
|
||
const bucket = Number.parseInt(hash16.slice(0, 8), 16) % 100;
|
||
return bucket < pct;
|
||
}
|
||
|
||
it('pct=100 always hits', () => {
|
||
for (const h of ['0000000000000000', 'ffffffffffffffff', 'abcdef0123456789']) {
|
||
assert.equal(sampleHit(h, 100), true);
|
||
}
|
||
});
|
||
|
||
it('pct=0 never hits', () => {
|
||
for (const h of ['0000000000000000', 'ffffffffffffffff', 'abcdef0123456789']) {
|
||
assert.equal(sampleHit(h, 0), false);
|
||
}
|
||
});
|
||
|
||
it('pct=25 hits approximately 25% on a bulk sample, and is deterministic', async () => {
|
||
let hits = 0;
|
||
const N = 400;
|
||
const seen = new Map();
|
||
for (let i = 0; i < N; i++) {
|
||
const h = await hashBriefStory(story({ headline: `fixture-${i}` }));
|
||
const first = sampleHit(h, 25);
|
||
const second = sampleHit(h, 25);
|
||
assert.equal(first, second, `hash ${h} must give the same decision`);
|
||
seen.set(h, first);
|
||
if (first) hits++;
|
||
}
|
||
// Tolerance: uniform mod-100 on SHA-256 prefix should be tight.
|
||
assert.ok(hits > N * 0.15, `expected > 15% hits, got ${hits}`);
|
||
assert.ok(hits < N * 0.35, `expected < 35% hits, got ${hits}`);
|
||
});
|
||
});
|
||
|
||
// ── `generateWhyMatters` analyst-priority chain ─────────────────────────
|
||
|
||
describe('generateWhyMatters — analyst priority', () => {
|
||
const VALID = 'Closure of the Strait of Hormuz would spike global oil prices and force a US naval response.';
|
||
|
||
it('uses the analyst endpoint result when it returns a string', async () => {
|
||
let callLlmInvoked = false;
|
||
const out = await generateWhyMatters(story(), {
|
||
callAnalystWhyMatters: async () => VALID,
|
||
callLLM: async () => {
|
||
callLlmInvoked = true;
|
||
return 'FALLBACK unused';
|
||
},
|
||
cacheGet: async () => null,
|
||
cacheSet: async () => {},
|
||
});
|
||
assert.equal(out, VALID);
|
||
assert.equal(callLlmInvoked, false, 'legacy callLLM must NOT fire when analyst returns');
|
||
});
|
||
|
||
it('falls through to legacy chain when analyst returns null', async () => {
|
||
let callLlmInvoked = false;
|
||
const out = await generateWhyMatters(story(), {
|
||
callAnalystWhyMatters: async () => null,
|
||
callLLM: async () => {
|
||
callLlmInvoked = true;
|
||
return VALID;
|
||
},
|
||
cacheGet: async () => null,
|
||
cacheSet: async () => {},
|
||
});
|
||
assert.equal(out, VALID);
|
||
assert.equal(callLlmInvoked, true, 'legacy callLLM must fire after analyst miss');
|
||
});
|
||
|
||
it('falls through when analyst returns out-of-bounds output (too short)', async () => {
|
||
let callLlmInvoked = false;
|
||
const out = await generateWhyMatters(story(), {
|
||
callAnalystWhyMatters: async () => 'Short.',
|
||
callLLM: async () => {
|
||
callLlmInvoked = true;
|
||
return VALID;
|
||
},
|
||
cacheGet: async () => null,
|
||
cacheSet: async () => {},
|
||
});
|
||
assert.equal(out, VALID);
|
||
assert.equal(callLlmInvoked, true, 'out-of-bounds analyst output must trigger fallback');
|
||
});
|
||
|
||
it('preserves multi-sentence v2 analyst output verbatim (P1 regression guard)', async () => {
|
||
// The endpoint now returns 2–3 sentences validated by parseWhyMattersV2.
|
||
// The cron MUST NOT reparse with the v1 single-sentence parser, which
|
||
// would silently truncate the 2nd + 3rd sentences. Caught in PR #3269
|
||
// review; fixed by trusting the endpoint's own validation and only
|
||
// rejecting obvious garbage (length / stub echo) here.
|
||
const multi =
|
||
"Iran's closure of the Strait of Hormuz on April 21 halts roughly 20% of global seaborne oil. " +
|
||
'The disruption forces an immediate repricing of sovereign risk across Gulf energy exporters. ' +
|
||
'Watch IMF commentary in the next 48 hours for cascading guidance.';
|
||
let callLlmInvoked = false;
|
||
const out = await generateWhyMatters(story(), {
|
||
callAnalystWhyMatters: async () => multi,
|
||
callLLM: async () => {
|
||
callLlmInvoked = true;
|
||
return VALID;
|
||
},
|
||
cacheGet: async () => null,
|
||
cacheSet: async () => {},
|
||
});
|
||
assert.equal(out, multi, 'multi-sentence v2 output must reach the envelope unchanged');
|
||
assert.equal(callLlmInvoked, false, 'legacy callLLM must not fire when v2 analyst succeeds');
|
||
// Sanity: output is actually multi-sentence (not truncated to first).
|
||
assert.ok(out.split('. ').length >= 2, 'output must retain 2nd+ sentences');
|
||
});
|
||
|
||
it('falls through when analyst throws', async () => {
|
||
let callLlmInvoked = false;
|
||
const out = await generateWhyMatters(story(), {
|
||
callAnalystWhyMatters: async () => {
|
||
throw new Error('network timeout');
|
||
},
|
||
callLLM: async () => {
|
||
callLlmInvoked = true;
|
||
return VALID;
|
||
},
|
||
cacheGet: async () => null,
|
||
cacheSet: async () => {},
|
||
});
|
||
assert.equal(out, VALID);
|
||
assert.equal(callLlmInvoked, true);
|
||
});
|
||
|
||
it('returns null when BOTH layers fail (caller uses stub)', async () => {
|
||
const out = await generateWhyMatters(story(), {
|
||
callAnalystWhyMatters: async () => null,
|
||
callLLM: async () => null,
|
||
cacheGet: async () => null,
|
||
cacheSet: async () => {},
|
||
});
|
||
assert.equal(out, null);
|
||
});
|
||
|
||
it('no callAnalystWhyMatters dep → legacy chain runs directly (backcompat)', async () => {
|
||
let callLlmInvoked = false;
|
||
const out = await generateWhyMatters(story(), {
|
||
callLLM: async () => {
|
||
callLlmInvoked = true;
|
||
return VALID;
|
||
},
|
||
cacheGet: async () => null,
|
||
cacheSet: async () => {},
|
||
});
|
||
assert.equal(out, VALID);
|
||
assert.equal(callLlmInvoked, true);
|
||
});
|
||
});
|
||
|
||
// ── Body validation (simulated — same rules as endpoint's
|
||
// validateStoryBody) ────────────────────────────────────────────────────
|
||
|
||
describe('endpoint validation contract', () => {
|
||
// Mirror of the endpoint's validation so unit tests don't need the
|
||
// full edge runtime. Any divergence would surface as a cross-suite
|
||
// test regression on the endpoint flow (see "endpoint end-to-end" below).
|
||
const VALID_THREAT = new Set(['critical', 'high', 'medium', 'low']);
|
||
const CAPS = { headline: 400, source: 120, category: 80, country: 80 };
|
||
const MAX_BODY_BYTES = 4096;
|
||
|
||
function validate(raw) {
|
||
if (!raw || typeof raw !== 'object') return { ok: false, msg: 'body' };
|
||
const s = raw.story;
|
||
if (!s || typeof s !== 'object') return { ok: false, msg: 'body.story' };
|
||
for (const f of ['headline', 'source', 'category']) {
|
||
if (typeof s[f] !== 'string' || s[f].length === 0) return { ok: false, msg: f };
|
||
if (s[f].length > CAPS[f]) return { ok: false, msg: `${f}-length` };
|
||
}
|
||
if (typeof s.threatLevel !== 'string' || !VALID_THREAT.has(s.threatLevel)) {
|
||
return { ok: false, msg: 'threatLevel' };
|
||
}
|
||
if (s.country !== undefined) {
|
||
if (typeof s.country !== 'string') return { ok: false, msg: 'country' };
|
||
if (s.country.length > CAPS.country) return { ok: false, msg: 'country-length' };
|
||
}
|
||
return { ok: true };
|
||
}
|
||
|
||
function measureBytes(obj) {
|
||
return new TextEncoder().encode(JSON.stringify(obj)).byteLength;
|
||
}
|
||
|
||
it('accepts a valid payload', () => {
|
||
assert.deepEqual(validate({ story: story() }), { ok: true });
|
||
});
|
||
|
||
it('rejects threatLevel="info" (not in the 4-value enum)', () => {
|
||
const out = validate({ story: story({ threatLevel: 'info' }) });
|
||
assert.equal(out.ok, false);
|
||
assert.equal(out.msg, 'threatLevel');
|
||
});
|
||
|
||
it('accepts free-form category (no allowlist)', () => {
|
||
for (const cat of ['General', 'Geopolitical Risk', 'Market Activity', 'Humanitarian Crisis']) {
|
||
assert.deepEqual(validate({ story: story({ category: cat }) }), { ok: true });
|
||
}
|
||
});
|
||
|
||
it('rejects category exceeding length cap', () => {
|
||
const long = 'x'.repeat(81);
|
||
const out = validate({ story: story({ category: long }) });
|
||
assert.equal(out.ok, false);
|
||
assert.equal(out.msg, 'category-length');
|
||
});
|
||
|
||
it('rejects empty required fields', () => {
|
||
for (const f of ['headline', 'source', 'category']) {
|
||
const out = validate({ story: story({ [f]: '' }) });
|
||
assert.equal(out.ok, false);
|
||
assert.equal(out.msg, f);
|
||
}
|
||
});
|
||
|
||
it('accepts empty country + country="Global" + missing country', () => {
|
||
assert.deepEqual(validate({ story: story({ country: '' }) }), { ok: true });
|
||
assert.deepEqual(validate({ story: story({ country: 'Global' }) }), { ok: true });
|
||
const { country: _, ...withoutCountry } = story();
|
||
assert.deepEqual(validate({ story: withoutCountry }), { ok: true });
|
||
});
|
||
|
||
it('body cap catches oversize payloads (both Content-Length and post-read)', () => {
|
||
const bloated = {
|
||
story: {
|
||
...story(),
|
||
// Artificial oversize payload — would need headline cap bypassed
|
||
// to reach in practice, but the total body-byte cap must still fire.
|
||
extra: 'x'.repeat(5000),
|
||
},
|
||
};
|
||
assert.ok(measureBytes(bloated) > MAX_BODY_BYTES, 'fixture is oversize');
|
||
// Note: body-cap is enforced at the handler level, not the validator.
|
||
// We assert the invariant about the measure here; the handler path is
|
||
// covered by the endpoint smoke test below.
|
||
});
|
||
});
|
||
|
||
// ── Prompt builder shape ──────────────────────────────────────────────
|
||
|
||
describe('buildAnalystWhyMattersPrompt — shape and budget', () => {
|
||
let builder;
|
||
it('loads', async () => {
|
||
const mod = await import('../server/worldmonitor/intelligence/v1/brief-why-matters-prompt.ts');
|
||
builder = mod.buildAnalystWhyMattersPrompt;
|
||
assert.ok(typeof builder === 'function');
|
||
});
|
||
|
||
it('uses the analyst v2 system prompt (multi-sentence, grounded)', async () => {
|
||
const { WHY_MATTERS_ANALYST_SYSTEM_V2 } = await import('../shared/brief-llm-core.js');
|
||
const { system } = builder(story(), {
|
||
worldBrief: 'X',
|
||
countryBrief: '',
|
||
riskScores: '',
|
||
forecasts: '',
|
||
marketData: '',
|
||
macroSignals: '',
|
||
degraded: false,
|
||
});
|
||
assert.equal(system, WHY_MATTERS_ANALYST_SYSTEM_V2);
|
||
// Contract must still mention the 40–70 word target + grounding rule.
|
||
assert.match(system, /40–70 words/);
|
||
assert.match(system, /named person \/ country \/ organization \/ number \/ percentage \/ date \/ city/);
|
||
});
|
||
|
||
it('includes story fields with the multi-sentence footer', () => {
|
||
const { user } = builder(story(), {
|
||
worldBrief: '',
|
||
countryBrief: '',
|
||
riskScores: '',
|
||
forecasts: '',
|
||
marketData: '',
|
||
macroSignals: '',
|
||
degraded: false,
|
||
});
|
||
assert.match(user, /Headline: Iran closes Strait of Hormuz/);
|
||
assert.match(user, /Source: Reuters/);
|
||
assert.match(user, /Severity: critical/);
|
||
assert.match(user, /Category: Geopolitical Risk/);
|
||
assert.match(user, /Country: IR/);
|
||
assert.match(user, /Write 2–3 sentences \(40–70 words\)/);
|
||
assert.match(user, /grounded in at least ONE specific/);
|
||
});
|
||
|
||
it('includes story description when present', () => {
|
||
const storyWithDesc = {
|
||
...story(),
|
||
description: 'Tehran publicly reopened the Strait of Hormuz to commercial shipping today.',
|
||
};
|
||
const { user } = builder(storyWithDesc, {
|
||
worldBrief: '',
|
||
countryBrief: '',
|
||
riskScores: '',
|
||
forecasts: '',
|
||
marketData: '',
|
||
macroSignals: '',
|
||
degraded: false,
|
||
});
|
||
assert.match(user, /Description: Tehran publicly reopened/);
|
||
});
|
||
|
||
it('omits description line when field absent', () => {
|
||
const { user } = builder(story(), {
|
||
worldBrief: '',
|
||
countryBrief: '',
|
||
riskScores: '',
|
||
forecasts: '',
|
||
marketData: '',
|
||
macroSignals: '',
|
||
degraded: false,
|
||
});
|
||
assert.doesNotMatch(user, /Description:/);
|
||
});
|
||
|
||
it('omits context block when all fields empty', () => {
|
||
const { user } = builder(story(), {
|
||
worldBrief: '',
|
||
countryBrief: '',
|
||
riskScores: '',
|
||
forecasts: '',
|
||
marketData: '',
|
||
macroSignals: '',
|
||
degraded: false,
|
||
});
|
||
assert.doesNotMatch(user, /# Live WorldMonitor Context/);
|
||
});
|
||
|
||
it('truncates context to stay under budget', () => {
|
||
const hugeContext = {
|
||
worldBrief: 'x'.repeat(5000),
|
||
countryBrief: 'y'.repeat(5000),
|
||
riskScores: 'z'.repeat(5000),
|
||
forecasts: 'w'.repeat(5000),
|
||
marketData: 'v'.repeat(5000),
|
||
macroSignals: 'u'.repeat(5000),
|
||
degraded: false,
|
||
};
|
||
const { user } = builder(story(), hugeContext);
|
||
// Total user prompt should be bounded. Per plan: context budget ~1700
|
||
// + story fields + footer ~250 → under 2.5KB.
|
||
assert.ok(user.length < 2500, `prompt should be bounded; got ${user.length} chars`);
|
||
});
|
||
});
|
||
|
||
// ── Env flag parsing (endpoint config resolution) ─────────────────────
|
||
|
||
describe('endpoint env flag parsing', () => {
|
||
// Mirror the endpoint's readConfig logic so a drift between this
|
||
// expectation and the handler fails one test suite.
|
||
function readConfig(env) {
|
||
const rawPrimary = (env.BRIEF_WHY_MATTERS_PRIMARY ?? '').trim().toLowerCase();
|
||
let primary;
|
||
let invalidPrimaryRaw = null;
|
||
if (rawPrimary === '' || rawPrimary === 'analyst') primary = 'analyst';
|
||
else if (rawPrimary === 'gemini') primary = 'gemini';
|
||
else {
|
||
primary = 'gemini';
|
||
invalidPrimaryRaw = rawPrimary;
|
||
}
|
||
const shadowEnabled = env.BRIEF_WHY_MATTERS_SHADOW !== '0';
|
||
const rawSample = env.BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT;
|
||
let samplePct = 100;
|
||
let invalidSamplePctRaw = null;
|
||
if (rawSample !== undefined && rawSample !== '') {
|
||
const parsed = Number.parseInt(rawSample, 10);
|
||
if (Number.isInteger(parsed) && parsed >= 0 && parsed <= 100 && String(parsed) === rawSample.trim()) {
|
||
samplePct = parsed;
|
||
} else {
|
||
invalidSamplePctRaw = rawSample;
|
||
}
|
||
}
|
||
return { primary, invalidPrimaryRaw, shadowEnabled, samplePct, invalidSamplePctRaw };
|
||
}
|
||
|
||
it('defaults: primary=analyst, shadow=on, sample=100', () => {
|
||
const c = readConfig({});
|
||
assert.equal(c.primary, 'analyst');
|
||
assert.equal(c.shadowEnabled, true);
|
||
assert.equal(c.samplePct, 100);
|
||
});
|
||
|
||
it('PRIMARY=gemini is honoured (kill switch)', () => {
|
||
const c = readConfig({ BRIEF_WHY_MATTERS_PRIMARY: 'gemini' });
|
||
assert.equal(c.primary, 'gemini');
|
||
});
|
||
|
||
it('PRIMARY=analust (typo) falls back to gemini + invalidPrimaryRaw set', () => {
|
||
const c = readConfig({ BRIEF_WHY_MATTERS_PRIMARY: 'analust' });
|
||
assert.equal(c.primary, 'gemini');
|
||
assert.equal(c.invalidPrimaryRaw, 'analust');
|
||
});
|
||
|
||
it('SHADOW disabled only by exact "0"', () => {
|
||
for (const v of ['yes', '1', 'true', '', 'on']) {
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW: v }).shadowEnabled, true, `value=${v}`);
|
||
}
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW: '0' }).shadowEnabled, false);
|
||
});
|
||
|
||
it('SAMPLE_PCT accepts integer 0–100; invalid → 100', () => {
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT: '25' }).samplePct, 25);
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT: '0' }).samplePct, 0);
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT: '100' }).samplePct, 100);
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT: '101' }).samplePct, 100);
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT: 'foo' }).samplePct, 100);
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT: '-5' }).samplePct, 100);
|
||
assert.equal(readConfig({ BRIEF_WHY_MATTERS_SHADOW_SAMPLE_PCT: '50.5' }).samplePct, 100);
|
||
});
|
||
});
|
||
|
||
// ── Gemini path prompt parity snapshot ────────────────────────────────
|
||
|
||
describe('Gemini path prompt parity', () => {
|
||
it('buildWhyMattersPrompt output is stable (frozen snapshot)', async () => {
|
||
const { buildWhyMattersPrompt } = await import('../scripts/lib/brief-llm.mjs');
|
||
const { system, user } = buildWhyMattersPrompt(story());
|
||
// Snapshot — if either the system prompt or the user prompt shape
|
||
// changes, the endpoint's gemini-path output will drift from the
|
||
// cron's pre-PR output. Bump BRIEF_WHY_MATTERS_PRIMARY=gemini
|
||
// rollout risk accordingly.
|
||
assert.match(system, /ONE concise sentence \(18–30 words\)/);
|
||
assert.equal(
|
||
user.split('\n').slice(0, 5).join('\n'),
|
||
[
|
||
'Headline: Iran closes Strait of Hormuz',
|
||
'Source: Reuters',
|
||
'Severity: critical',
|
||
'Category: Geopolitical Risk',
|
||
'Country: IR',
|
||
].join('\n'),
|
||
);
|
||
assert.ok(user.endsWith('One editorial sentence on why this matters:'));
|
||
});
|
||
});
|