mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
fix(intelligence): read regional-snapshot latestKey as raw string (#3302)
* fix(intelligence): read regional-snapshot latestKey as raw string
Regional Intelligence panel rendered "No snapshot available yet" for every
region despite the 6h cron writing per-region snapshots successfully. Root
cause: writer/reader encoding mismatch.
Writer (scripts/regional-snapshot/persist-snapshot.mjs:60) stores the
snapshot_id pointer via `['SET', latestKey, snapshotId]` — a BARE string,
not JSON.stringify'd. The seeder's own reader (line 97) reads it as-is
and works.
Vercel RPC handler used `getCachedJson(latestKey, true)`, which internally
does `JSON.parse(data.result)`. `JSON.parse('mena-20260421T000000-steady')`
throws; the try/catch silently returns null; handler returns {}; panel
renders empty.
Fix: new `getCachedRawString()` helper in server/_shared/redis.ts that
reads a Redis key as-is with no JSON.parse. Handler uses it for the
latestKey read (while still using getCachedJson for the snapshot-by-id
payload, which IS JSON.stringify'd). No writer or backfill change needed.
Regression guard: new structural test asserts the handler reads latestKey
specifically via getCachedRawString so a future refactor can't silently
revert to getCachedJson and re-break every region.
Health.js monitors the summary key (intelligence:regional-snapshots:
summary:v1), which stays green because summary writes succeed. Per-region
health probes would be worth adding as a follow-up.
* fix(redis): detect AbortSignal.timeout() as TimeoutError too
Greptile P2 on PR #3302: AbortSignal.timeout() throws a DOMException with
name='TimeoutError' on V8 runtimes (Vercel Edge included). The existing
isTimeout check only matched name==='AbortError' — what you'd get from a
manual controller.abort() — so the [REDIS-TIMEOUT] structured log never
fired. Every redis fetch timeout silently fell through to the generic
console.warn branch, eroding the Sentry drain we added specifically to
catch these per docs/plans/chokepoint-rpc-payload-split.md.
Fix both getCachedJson (pre-existing) and the new getCachedRawString by
matching TimeoutError OR AbortError — covers both the current
AbortSignal.timeout() path and any future switch to manual AbortController.
Pre-existing copy in getCachedJson fixed in the same edit since it's the
same file and the same observability hole.
* test(redis): update isTimeout regex to match new TimeoutError|AbortError check
Pre-push hook caught the brittle static-analysis test in
tests/get-chokepoint-history.test.mjs:83 that asserted the exact old
single-name pattern. Update the regex (and description) to cover both
TimeoutError and AbortError, matching the observability fix in the
previous commit.
This commit is contained in:
@@ -57,6 +57,44 @@ export async function getRawJson(key: string): Promise<unknown | null> {
|
||||
return unwrapEnvelope(JSON.parse(data.result)).data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a key's value as a raw Upstash string — no JSON.parse, no envelope unwrap.
|
||||
* Use when a seeder stores a bare scalar (e.g., a snapshot_id pointer) via
|
||||
* `['SET', key, bareString]` without JSON.stringify. getCachedJson() on these
|
||||
* keys silently returns null because JSON.parse throws on unquoted strings,
|
||||
* and the try/catch swallows the error.
|
||||
*
|
||||
* Always uses the raw (unprefixed) key — matches the seed-script write path
|
||||
* (seeders don't know about the Vercel env-prefix scheme).
|
||||
*/
|
||||
export async function getCachedRawString(key: string): Promise<string | null> {
|
||||
if (process.env.LOCAL_API_MODE === 'tauri-sidecar') {
|
||||
const { sidecarCacheGet } = await import('./sidecar-cache');
|
||||
const v = sidecarCacheGet(key);
|
||||
return typeof v === 'string' ? v : null;
|
||||
}
|
||||
const url = process.env.UPSTASH_REDIS_REST_URL;
|
||||
const token = process.env.UPSTASH_REDIS_REST_TOKEN;
|
||||
if (!url || !token) return null;
|
||||
try {
|
||||
const resp = await fetch(`${url}/get/${encodeURIComponent(key)}`, {
|
||||
headers: { Authorization: `Bearer ${token}` },
|
||||
signal: AbortSignal.timeout(REDIS_OP_TIMEOUT_MS),
|
||||
});
|
||||
if (!resp.ok) return null;
|
||||
const data = (await resp.json()) as { result?: string | null };
|
||||
return typeof data.result === 'string' && data.result.length > 0 ? data.result : null;
|
||||
} catch (err) {
|
||||
// AbortSignal.timeout() throws DOMException name='TimeoutError' (on V8
|
||||
// runtimes incl. Vercel Edge); manual controller.abort() throws 'AbortError'.
|
||||
// Match both so the [REDIS-TIMEOUT] structured log actually fires.
|
||||
const isTimeout = err instanceof Error && (err.name === 'TimeoutError' || err.name === 'AbortError');
|
||||
if (isTimeout) console.error(`[REDIS-TIMEOUT] getCachedRawString key=${key} timeoutMs=${REDIS_OP_TIMEOUT_MS}`);
|
||||
else console.warn('[redis] getCachedRawString failed:', errMsg(err));
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getCachedJson(key: string, raw = false): Promise<unknown | null> {
|
||||
if (process.env.LOCAL_API_MODE === 'tauri-sidecar') {
|
||||
const { sidecarCacheGet } = await import('./sidecar-cache');
|
||||
@@ -80,12 +118,16 @@ export async function getCachedJson(key: string, raw = false): Promise<unknown |
|
||||
// through unchanged (unwrapEnvelope returns {_seed: null, data: raw}).
|
||||
return unwrapEnvelope(JSON.parse(data.result)).data;
|
||||
} catch (err) {
|
||||
// AbortError = timeout; structured + errored so log drains (e.g. Sentry via
|
||||
// Vercel integration) pick it up. Large-payload timeouts used to silently
|
||||
// return null and let downstream callers cache zero-state — see
|
||||
// docs/plans/chokepoint-rpc-payload-split.md for the incident that added
|
||||
// this tag.
|
||||
const isTimeout = err instanceof Error && err.name === 'AbortError';
|
||||
// Structured timeout log goes to Sentry via Vercel integration. Large-
|
||||
// payload timeouts used to silently return null and let downstream callers
|
||||
// cache zero-state — see docs/plans/chokepoint-rpc-payload-split.md for
|
||||
// the incident that added this tag.
|
||||
//
|
||||
// AbortSignal.timeout() throws DOMException name='TimeoutError' (on V8
|
||||
// runtimes incl. Vercel Edge); manual controller.abort() throws
|
||||
// 'AbortError'. Checking only 'AbortError' meant the [REDIS-TIMEOUT] log
|
||||
// never fired — every timeout fell through to the generic console.warn.
|
||||
const isTimeout = err instanceof Error && (err.name === 'TimeoutError' || err.name === 'AbortError');
|
||||
if (isTimeout) {
|
||||
console.error(`[REDIS-TIMEOUT] getCachedJson key=${key} timeoutMs=${REDIS_OP_TIMEOUT_MS}`);
|
||||
} else {
|
||||
|
||||
@@ -24,7 +24,7 @@ import type {
|
||||
RegionalNarrative,
|
||||
NarrativeSection,
|
||||
} from '../../../../src/generated/server/worldmonitor/intelligence/v1/service_server';
|
||||
import { getCachedJson } from '../../../_shared/redis';
|
||||
import { getCachedJson, getCachedRawString } from '../../../_shared/redis';
|
||||
|
||||
const LATEST_KEY_PREFIX = 'intelligence:snapshot:v1:';
|
||||
const BY_ID_KEY_PREFIX = 'intelligence:snapshot-by-id:v1:';
|
||||
@@ -485,18 +485,19 @@ export const getRegionalSnapshot: IntelligenceServiceHandler['getRegionalSnapsho
|
||||
return {};
|
||||
}
|
||||
|
||||
// Step 1: resolve latest pointer -> snapshot_id
|
||||
// Step 1: resolve latest pointer -> snapshot_id.
|
||||
//
|
||||
// The seed writer in scripts/regional-snapshot/persist-snapshot.mjs:60
|
||||
// stores the id via `['SET', latestKey, snapshotId]` — a BARE string, not
|
||||
// JSON-encoded. Using getCachedJson() here silently returned null because
|
||||
// JSON.parse('mena-20260421-steady') throws and the helper's try/catch
|
||||
// swallows the error — left every panel showing "No snapshot available
|
||||
// yet" despite the seed cron running fine every 6h.
|
||||
//
|
||||
// getCachedRawString reads the value as-is with no JSON.parse, matching
|
||||
// the writer's own reader at persist-snapshot.mjs:97.
|
||||
const latestKey = `${LATEST_KEY_PREFIX}${regionId}:latest`;
|
||||
const latestRaw = await getCachedJson(latestKey, true);
|
||||
// The seed writer stores the id as a bare string (JSON-encoded). getCachedJson
|
||||
// returns whatever the JSON parser produced, so we handle both shapes.
|
||||
let snapshotId: string | null = null;
|
||||
if (typeof latestRaw === 'string') {
|
||||
snapshotId = latestRaw;
|
||||
} else if (latestRaw && typeof latestRaw === 'object' && 'snapshot_id' in latestRaw) {
|
||||
const candidate = (latestRaw as { snapshot_id?: unknown }).snapshot_id;
|
||||
if (typeof candidate === 'string') snapshotId = candidate;
|
||||
}
|
||||
const snapshotId = await getCachedRawString(latestKey);
|
||||
if (!snapshotId) {
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -76,11 +76,19 @@ describe('proto wiring', () => {
|
||||
describe('Redis timeout observability', () => {
|
||||
const redisSrc = readFileSync(resolve(root, 'server/_shared/redis.ts'), 'utf-8');
|
||||
|
||||
it('logs [REDIS-TIMEOUT] with key and timeoutMs on AbortError', () => {
|
||||
it('logs [REDIS-TIMEOUT] with key and timeoutMs on timeout (TimeoutError or AbortError)', () => {
|
||||
// Grepable tag that log drains / Sentry-Vercel integration can pick up —
|
||||
// before this, large-payload timeouts silently returned null and consumers
|
||||
// cached zero-state. See docs/plans/chokepoint-rpc-payload-split.md.
|
||||
assert.match(redisSrc, /isTimeout\s*=\s*err instanceof Error && err\.name === 'AbortError'/);
|
||||
//
|
||||
// AbortSignal.timeout() throws DOMException name='TimeoutError' (V8
|
||||
// runtimes incl. Vercel Edge); manual controller.abort() throws
|
||||
// 'AbortError'. The predicate must match both — historically only
|
||||
// 'AbortError' was checked and every real timeout silently fell through.
|
||||
assert.match(
|
||||
redisSrc,
|
||||
/isTimeout\s*=\s*err instanceof Error && \(err\.name === 'TimeoutError' \|\| err\.name === 'AbortError'\)/,
|
||||
);
|
||||
assert.match(redisSrc, /\[REDIS-TIMEOUT\] getCachedJson key=\$\{key\} timeoutMs=\$\{REDIS_OP_TIMEOUT_MS\}/);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -384,8 +384,21 @@ describe('adaptSnapshot', () => {
|
||||
// ────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
describe('get-regional-snapshot handler: structural checks', () => {
|
||||
it('imports getCachedJson from redis helpers', () => {
|
||||
assert.match(handlerSrc, /import\s*\{\s*getCachedJson\s*\}\s*from\s*'\.\.\/\.\.\/\.\.\/_shared\/redis'/);
|
||||
it('imports getCachedJson + getCachedRawString from redis helpers', () => {
|
||||
// getCachedRawString reads the bare-string latestKey pointer (the seed
|
||||
// writer stores snapshot_id via `SET key bareString` — JSON.parse would
|
||||
// throw); getCachedJson reads the JSON-stringified snapshot-by-id payload.
|
||||
// Both must be imported.
|
||||
assert.match(handlerSrc, /import\s*\{[^}]*\bgetCachedJson\b[^}]*\}\s*from\s*'\.\.\/\.\.\/\.\.\/_shared\/redis'/);
|
||||
assert.match(handlerSrc, /import\s*\{[^}]*\bgetCachedRawString\b[^}]*\}\s*from\s*'\.\.\/\.\.\/\.\.\/_shared\/redis'/);
|
||||
});
|
||||
|
||||
it('reads latestKey via getCachedRawString (encoding contract)', () => {
|
||||
// Guard the primary fix: if a future refactor swaps back to getCachedJson
|
||||
// for the latestKey read, every region will silently render empty because
|
||||
// JSON.parse throws on the bare snapshot_id string. See commit that
|
||||
// replaced the getCachedJson+JSON.parse path with getCachedRawString.
|
||||
assert.match(handlerSrc, /getCachedRawString\s*\(\s*latestKey\s*\)/);
|
||||
});
|
||||
|
||||
it('uses the canonical :latest key prefix', () => {
|
||||
|
||||
Reference in New Issue
Block a user