mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
fix(scoring): rebalance formula weights severity 55%, corroboration 15% (#3144)
* fix(scoring): rebalance formula weights severity 55%, corroboration 15%
PR A of the scoring recalibration plan (docs/plans/2026-04-17-002).
The v2 shadow-log recalibration (690 items, Pearson 0.413) showed the
formula compresses scores into a narrow 30-70 range, making the 85
critical gate unreachable and the 65 high gate marginal. Root cause:
corroboration at 30% weight penalizes breaking single-source news
(the most important alerts) while severity at 40% doesn't separate
critical from high enough.
Weight change:
BEFORE: severity 0.40 + sourceTier 0.20 + corroboration 0.30 + recency 0.10
AFTER: severity 0.55 + sourceTier 0.20 + corroboration 0.15 + recency 0.10
Expected effect: critical/tier1/fresh rises from 76 to 88 (clears 85
gate). critical/tier2/fresh rises from 71 to 83 (recommend lowering
critical gate to 80 at activation time). high/tier2/fresh rises from
61 to 69 (clears 65 gate). The HIGH-CRITICAL gap widens from 10 to
14 points for same-tier items.
Also:
- Bumps shadow-log key from v2 to v3 for a clean recalibration dataset
(v2 had old-weight scores that would contaminate the 48h soak)
- Updates proto/news_item.proto formula comment to reflect new weights
- Updates cache-keys.ts documentation
No cache migration needed: the classify cache stores {level, category},
not scores. Scores are computed at read time from the stored level +
the formula, so new digest requests immediately produce new scores.
Gates remain OFF. After 48h of v3 data, re-run:
node scripts/shadow-score-report.mjs
node scripts/shadow-score-rank.mjs sample 25
🤖 Generated with Claude Opus 4.6 via Claude Code + Compound Engineering v2.49.0
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
* chore: regenerate proto OpenAPI docs for weight rebalance
* fix(scoring): bump SHADOW_SCORE_LOG_KEY export to v3
The exported constant in cache-keys.ts was left at v2 while the relay's
local constant was bumped to v3. Anyone importing the export (or grep-
discovering it) would get a stale key. Architecture review flagged this.
* fix(scoring): update test + stale comments for shadow-log v3
Review found the regression test still asserted v2 key, causing CI
failure. Also fixed stale v1/v2 references in report script header,
default-key comment, report title render, and shouldNotify docstring.
---------
Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -296,7 +296,7 @@ components:
|
||||
importanceScore:
|
||||
type: integer
|
||||
format: int32
|
||||
description: 'Composite importance score (0-100): severity × 40% + source tier × 20% + corroboration × 30% + recency × 10%.'
|
||||
description: 'Composite importance score (0-100): severity × 55% + source tier × 20% + corroboration × 15% + recency × 10%.'
|
||||
corroborationCount:
|
||||
type: integer
|
||||
format: int32
|
||||
|
||||
@@ -30,7 +30,7 @@ message NewsItem {
|
||||
worldmonitor.core.v1.GeoCoordinates location = 7;
|
||||
// Human-readable location name.
|
||||
string location_name = 8;
|
||||
// Composite importance score (0-100): severity × 40% + source tier × 20% + corroboration × 30% + recency × 10%.
|
||||
// Composite importance score (0-100): severity × 55% + source tier × 20% + corroboration × 15% + recency × 10%.
|
||||
int32 importance_score = 9;
|
||||
// Number of distinct sources that reported the same story in this digest cycle.
|
||||
int32 corroboration_count = 10;
|
||||
|
||||
@@ -3413,7 +3413,7 @@ const RELAY_TIER4_SOURCES = new Set(
|
||||
Object.entries(RELAY_SOURCE_TIERS).filter(([, t]) => t === 4).map(([s]) => s),
|
||||
);
|
||||
|
||||
const RELAY_SCORE_WEIGHTS = { severity: 0.4, sourceTier: 0.2, corroboration: 0.3, recency: 0.1 };
|
||||
const RELAY_SCORE_WEIGHTS = { severity: 0.55, sourceTier: 0.2, corroboration: 0.15, recency: 0.1 };
|
||||
const RELAY_SEVERITY_SCORES = { critical: 100, high: 75, medium: 50, low: 25, info: 0 };
|
||||
|
||||
// Mirrors computeImportanceScore() in list-feed-digest.ts with ONE intentional
|
||||
|
||||
@@ -501,7 +501,7 @@ function matchesSensitivity(ruleSensitivity, eventSeverity) {
|
||||
*
|
||||
* Shadow mode (default, flag OFF): computes score decision but always falls
|
||||
* back to the legacy result so real notifications are unaffected. Logs to
|
||||
* shadow:score-log:v1 for tuning.
|
||||
* shadow:score-log (currently v3) for tuning.
|
||||
*/
|
||||
function shouldNotify(rule, event) {
|
||||
const passesLegacy = matchesSensitivity(rule.sensitivity, event.severity ?? 'high');
|
||||
@@ -560,7 +560,7 @@ const IMPORTANCE_SCORE_MIN = Number(process.env.IMPORTANCE_SCORE_MIN ?? 40);
|
||||
// The old v1 key (compact string format) is retained by consumers for
|
||||
// backward-compat reading but is no longer written. See
|
||||
// docs/internal/scoringDiagnostic.md §5 and §9 Step 4.
|
||||
const SHADOW_SCORE_LOG_KEY = 'shadow:score-log:v2';
|
||||
const SHADOW_SCORE_LOG_KEY = 'shadow:score-log:v3';
|
||||
const SHADOW_LOG_TTL = 7 * 24 * 3600; // 7 days
|
||||
|
||||
async function shadowLogScore(event) {
|
||||
|
||||
@@ -1,17 +1,18 @@
|
||||
#!/usr/bin/env node
|
||||
// Extract shadow:score-log (defaults to v2; override via SHADOW_SCORE_KEY) from
|
||||
// Extract shadow:score-log (defaults to v3; override via SHADOW_SCORE_KEY) from
|
||||
// Upstash and write a review bundle to ./shadow-score-report/. Parses both v2
|
||||
// JSON members and legacy v1 string members.
|
||||
// Usage: node scripts/shadow-score-report.mjs
|
||||
// Env: UPSTASH_REDIS_REST_URL, UPSTASH_REDIS_REST_TOKEN (reads .env.local if present)
|
||||
// SHADOW_SCORE_KEY=shadow:score-log:v2 to read pre-weight-rebalance data
|
||||
// SHADOW_SCORE_KEY=shadow:score-log:v1 to read pre-PR #3069 data
|
||||
|
||||
import { readFileSync, writeFileSync, mkdirSync, existsSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
|
||||
// v2 is the post-fix key (JSON members). v1 is the legacy key (compact strings).
|
||||
// Override with SHADOW_SCORE_KEY=shadow:score-log:v1 to read pre-fix data.
|
||||
const KEY = process.env.SHADOW_SCORE_KEY || 'shadow:score-log:v2';
|
||||
// Override with SHADOW_SCORE_KEY=shadow:score-log:v2 (pre-weight-rebalance) or v1 (pre-PR #3069).
|
||||
const KEY = process.env.SHADOW_SCORE_KEY || 'shadow:score-log:v3';
|
||||
const OUT = resolve(process.cwd(), 'shadow-score-report');
|
||||
const GATE_MIN = 40; // current IMPORTANCE_SCORE_MIN default
|
||||
const HIGH = 65; // current shouldNotify "high" sensitivity threshold
|
||||
@@ -121,7 +122,7 @@ function summary(events) {
|
||||
function renderReport(s, events) {
|
||||
const lines = [];
|
||||
const push = (...a) => lines.push(a.join(''));
|
||||
push('# shadow:score-log:v1 report');
|
||||
push(`# ${KEY} report`);
|
||||
push(`generated: ${new Date().toISOString()}`);
|
||||
push(`key: ${KEY}`);
|
||||
push(`window: ~7d rolling (ZREMRANGEBYSCORE on each write)`);
|
||||
|
||||
@@ -22,18 +22,19 @@ export const STORY_TRACKING_TTL_S = 172800;
|
||||
* TTL for all: 172800s (48h), refreshed each digest cycle.
|
||||
* Shadow scoring key (written by notification-relay.cjs, which owns the live
|
||||
* value — the constant here is documentation only, not imported):
|
||||
* shadow:score-log:v2 ZSet score=epoch_ms, member=JSON{ts,importanceScore,severity,eventType,title,source,publishedAt,corroborationCount,variant}
|
||||
* shadow:score-log:v1 ZSet legacy — no longer written, self-prunes via 7d ZREMRANGEBYSCORE
|
||||
* shadow:score-log:v3 ZSet score=epoch_ms, member=JSON{ts,importanceScore,severity,eventType,title,source,publishedAt,corroborationCount,variant}
|
||||
* shadow:score-log:v2 ZSet legacy (weight rebalance PR) — self-prunes via 7d ZREMRANGEBYSCORE
|
||||
* shadow:score-log:v1 ZSet legacy (pre-PR #3069) — self-prunes
|
||||
*/
|
||||
export const STORY_TRACK_KEY = (titleHash: string) => `story:track:v1:${titleHash}`;
|
||||
export const STORY_SOURCES_KEY = (titleHash: string) => `story:sources:v1:${titleHash}`;
|
||||
export const STORY_PEAK_KEY = (titleHash: string) => `story:peak:v1:${titleHash}`;
|
||||
export const DIGEST_ACCUMULATOR_KEY = (variant: string, lang = 'en') => `digest:accumulator:v1:${variant}:${lang}`;
|
||||
export const DIGEST_LAST_SENT_KEY = (userId: string, variant: string) => `digest:last-sent:v1:${userId}:${variant}`;
|
||||
// NOTE: notification-relay.cjs owns the live value (shadow:score-log:v2 since PR #3069).
|
||||
// NOTE: notification-relay.cjs owns the live value (shadow:score-log:v3 since weight rebalance).
|
||||
// This export is documentation/discoverability; changing it here does NOT affect the relay.
|
||||
// If you modify the key, also update scripts/notification-relay.cjs SHADOW_SCORE_LOG_KEY.
|
||||
export const SHADOW_SCORE_LOG_KEY = 'shadow:score-log:v2';
|
||||
export const SHADOW_SCORE_LOG_KEY = 'shadow:score-log:v3';
|
||||
export const STORY_TTL = 604800; // 7 days — enough for sustained multi-day stories
|
||||
export const DIGEST_ACCUMULATOR_TTL = 172800; // 48h — lookback window for digest content
|
||||
|
||||
|
||||
@@ -60,9 +60,9 @@ const SEVERITY_SCORES: Record<ThreatLevel, number> = {
|
||||
* Source tier boosts confidence. Recency is a minor tiebreaker.
|
||||
*/
|
||||
const SCORE_WEIGHTS = {
|
||||
severity: 0.4,
|
||||
severity: 0.55,
|
||||
sourceTier: 0.2,
|
||||
corroboration: 0.3,
|
||||
corroboration: 0.15,
|
||||
recency: 0.1,
|
||||
} as const;
|
||||
|
||||
|
||||
@@ -59,16 +59,16 @@ describe('notification-relay shadowLogScore discipline', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('shadow-log v2 key migration', () => {
|
||||
it('uses the v2 JSON-member key, not the legacy v1 string-member key', () => {
|
||||
describe('shadow-log key version', () => {
|
||||
it('uses the v3 JSON-member key (weight rebalance clean dataset)', () => {
|
||||
assert.match(
|
||||
relaySrc,
|
||||
/SHADOW_SCORE_LOG_KEY\s*=\s*['"]shadow:score-log:v2['"]/,
|
||||
'notification-relay must write to shadow:score-log:v2 after the schema fix',
|
||||
/SHADOW_SCORE_LOG_KEY\s*=\s*['"]shadow:score-log:v3['"]/,
|
||||
'notification-relay must write to shadow:score-log:v3 after the weight rebalance',
|
||||
);
|
||||
assert.ok(
|
||||
!/SHADOW_SCORE_LOG_KEY\s*=\s*['"]shadow:score-log:v1['"]/.test(relaySrc),
|
||||
'legacy v1 key must not be active',
|
||||
!/SHADOW_SCORE_LOG_KEY\s*=\s*['"]shadow:score-log:v[12]['"]/.test(relaySrc),
|
||||
'legacy v1/v2 keys must not be active',
|
||||
);
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user