mirror of
https://github.com/koala73/worldmonitor.git
synced 2026-04-25 17:14:57 +02:00
* fix(unrest): proxy-only fetch + 3-attempt retry for GDELT Production logs showed PR #3362's 45s proxy timeout solved one failure mode (CONNECT-tunnel timeouts) but ~80% of ticks now fail in 3-14 seconds with either "Proxy CONNECT: HTTP/1.1 522 Server Error" (Cloudflare can't reach GDELT origin) or "Client network socket disconnected before secure TLS connection" (Decodo RSTs the handshake). These are fast-fails, not timeouts — no amount of timeout bumping helps. Two changes: 1. Drop the direct fetch entirely. Every direct attempt in 14h of logs errored with UND_ERR_CONNECT_TIMEOUT or ECONNRESET — 0% success since PR #3256 added the proxy fallback. The direct call costs ~8-30s per tick for nothing. 2. Wrap the proxy call in a 3-attempt retry with 1.5-3s jitter. Single-attempt per-tick success rate measured at ~18%; with 3 attempts that lifts to ~75%+ under the same Decodo↔Cloudflare flake rate, comfortably keeping seedAge under the 120m STALE_SEED threshold. Deeper structural fix (out of scope here): wire ACLED credentials on the Railway unrest service so GDELT isn't the single upstream. * test(unrest): cover GDELT proxy retry path + no-proxy hard-fail Address PR #3395 reviewer concerns: (1) "no automated coverage for the new retry path or the no-proxy path" Add scripts/seed-unrest-events.mjs DI seams (_proxyFetcher, _sleep, _jitter, _maxAttempts, _resolveProxyForConnect) and a 6-test suite at tests/seed-unrest-gdelt-fetch.test.mjs covering: 1. Single-attempt success — no retries fire. 2. 2 transient failures + 3rd-attempt success — recovers, returns JSON. 3. All attempts fail — throws LAST error, exact attempt count. 4. Malformed proxy body — SyntaxError short-circuits retry (deterministic parse failures shouldn't burn attempts). 5. Missing CONNECT proxy creds — fetchGdeltEvents throws clear "PROXY_URL env var is not set" pointer for ops, asserts NO proxy fetcher invocation (no wasted network). 6. End-to-end with retry — fetchGdeltEvents with one transient 522 recovers and aggregates events normally. Gate runSeed() entry-point with `import.meta.url === file://argv[1]` so tests can `import` the module without triggering a real seed run. (2) "review assumes Railway has Decodo creds; without them, fails immediately" Yes — that's intentional. Direct fetch had 0% success in production for weeks (every Railway tick errored UND_ERR_CONNECT_TIMEOUT or ECONNRESET) since PR #3256 added the proxy fallback. Reintroducing it as "soft" fallback would just add ~30s of latency + log noise per tick. What's improved here: the no-proxy error message now names the missing env var (PROXY_URL) so an operator who hits this in Railway logs has a direct pointer instead of a generic "GDELT requires proxy" string.
164 lines
6.1 KiB
JavaScript
164 lines
6.1 KiB
JavaScript
// Tests for the GDELT proxy retry path in scripts/seed-unrest-events.mjs.
|
|
//
|
|
// Locks the behavioural contract introduced in PR #3395:
|
|
//
|
|
// 1. Single attempt success — happy path, no retries fire.
|
|
// 2. Transient proxy failure recoverable by retry — first attempt(s)
|
|
// fail, a later attempt succeeds, returns parsed JSON.
|
|
// 3. All attempts fail — throws the LAST error so ops sees the most
|
|
// recent failure mode (Cloudflare 522 vs ECONNRESET drift).
|
|
// 4. Malformed proxy body — JSON.parse throws SyntaxError; the helper
|
|
// bails immediately rather than burning attempts on a deterministic
|
|
// parse failure.
|
|
// 5. Missing CONNECT proxy creds — fetchGdeltEvents throws with a
|
|
// clear "PROXY_URL env var is not set" pointer for ops, with NO
|
|
// proxy fetcher invocation (no wasted network).
|
|
//
|
|
// Pre-PR-#3395 behaviour to AVOID regressing into:
|
|
// - Direct fetch was tried first and failed UND_ERR_CONNECT_TIMEOUT
|
|
// on every Railway tick (0% success). Re-introducing a "soft"
|
|
// direct fallback would just add latency and log noise.
|
|
|
|
import { test } from 'node:test';
|
|
import { strict as assert } from 'node:assert';
|
|
|
|
process.env.UPSTASH_REDIS_REST_URL = 'https://redis.test';
|
|
process.env.UPSTASH_REDIS_REST_TOKEN = 'fake-token';
|
|
|
|
const { fetchGdeltViaProxy, fetchGdeltEvents } = await import('../scripts/seed-unrest-events.mjs');
|
|
|
|
const URL = 'https://api.gdeltproject.org/api/v1/gkg_geojson?query=test';
|
|
const PROXY_AUTH = 'user:pass@gate.decodo.com:7000';
|
|
|
|
function jsonBuffer(obj) {
|
|
return { buffer: Buffer.from(JSON.stringify(obj), 'utf8') };
|
|
}
|
|
|
|
const noSleep = async () => {};
|
|
const noJitter = () => 0;
|
|
|
|
// ─── 1. happy path: first attempt succeeds ─────────────────────────────
|
|
|
|
test('proxy success on first attempt → returns parsed JSON, no retries', async () => {
|
|
let calls = 0;
|
|
const _proxyFetcher = async () => {
|
|
calls++;
|
|
return jsonBuffer({ features: [{ name: 'A' }] });
|
|
};
|
|
const result = await fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
|
_proxyFetcher,
|
|
_sleep: noSleep,
|
|
_jitter: noJitter,
|
|
});
|
|
assert.deepEqual(result, { features: [{ name: 'A' }] });
|
|
assert.equal(calls, 1, 'should NOT retry on success');
|
|
});
|
|
|
|
// ─── 2. transient flake: 2 failures + 1 success ────────────────────────
|
|
|
|
test('two proxy failures, third attempt succeeds → returns parsed JSON', async () => {
|
|
let calls = 0;
|
|
const _proxyFetcher = async () => {
|
|
calls++;
|
|
if (calls < 3) throw new Error(`Proxy CONNECT: HTTP/1.1 522 Server Error`);
|
|
return jsonBuffer({ features: [{ name: 'B' }] });
|
|
};
|
|
let sleepCount = 0;
|
|
const _sleep = async () => { sleepCount++; };
|
|
const result = await fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
|
_proxyFetcher,
|
|
_sleep,
|
|
_jitter: noJitter,
|
|
_maxAttempts: 3,
|
|
});
|
|
assert.deepEqual(result, { features: [{ name: 'B' }] });
|
|
assert.equal(calls, 3, 'should retry until success');
|
|
assert.equal(sleepCount, 2, 'should sleep between attempts only (not after final)');
|
|
});
|
|
|
|
// ─── 3. all attempts fail ──────────────────────────────────────────────
|
|
|
|
test('all attempts fail → throws LAST error', async () => {
|
|
let calls = 0;
|
|
const errors = [
|
|
new Error('Proxy CONNECT: HTTP/1.1 522 Server Error'),
|
|
new Error('CONNECT tunnel timeout'),
|
|
new Error('Client network socket disconnected'),
|
|
];
|
|
const _proxyFetcher = async () => {
|
|
throw errors[calls++];
|
|
};
|
|
await assert.rejects(
|
|
fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
|
_proxyFetcher,
|
|
_sleep: noSleep,
|
|
_jitter: noJitter,
|
|
_maxAttempts: 3,
|
|
}),
|
|
/Client network socket disconnected/,
|
|
);
|
|
assert.equal(calls, 3);
|
|
});
|
|
|
|
// ─── 4. parse failure short-circuits retry ─────────────────────────────
|
|
|
|
test('malformed proxy body → throws SyntaxError immediately, no retry', async () => {
|
|
let calls = 0;
|
|
const _proxyFetcher = async () => {
|
|
calls++;
|
|
return { buffer: Buffer.from('<html>this is not json</html>', 'utf8') };
|
|
};
|
|
await assert.rejects(
|
|
fetchGdeltViaProxy(URL, PROXY_AUTH, {
|
|
_proxyFetcher,
|
|
_sleep: noSleep,
|
|
_jitter: noJitter,
|
|
_maxAttempts: 3,
|
|
}),
|
|
SyntaxError,
|
|
);
|
|
assert.equal(calls, 1, 'parse error must not trigger retries');
|
|
});
|
|
|
|
// ─── 5. fetchGdeltEvents: missing proxy creds ──────────────────────────
|
|
|
|
test('fetchGdeltEvents with no proxy creds → throws clear ops-actionable error, no fetcher call', async () => {
|
|
let fetcherCalled = false;
|
|
await assert.rejects(
|
|
fetchGdeltEvents({
|
|
_resolveProxyForConnect: () => null,
|
|
_proxyFetcher: async () => { fetcherCalled = true; return jsonBuffer({}); },
|
|
_sleep: noSleep,
|
|
_jitter: noJitter,
|
|
}),
|
|
/PROXY_URL env var is not set/,
|
|
);
|
|
assert.equal(fetcherCalled, false, 'must not attempt proxy fetch when creds missing');
|
|
});
|
|
|
|
// ─── 6. fetchGdeltEvents: end-to-end with retry path ───────────────────
|
|
|
|
test('fetchGdeltEvents with one transient proxy failure → recovers and aggregates events', async () => {
|
|
let calls = 0;
|
|
const _proxyFetcher = async () => {
|
|
calls++;
|
|
if (calls === 1) throw new Error('Proxy CONNECT: HTTP/1.1 522 Server Error');
|
|
// Five mentions at the same lat/lon — passes the count >= 5 floor in the aggregator.
|
|
const features = Array.from({ length: 5 }, () => ({
|
|
properties: { name: 'Cairo, Egypt', urltone: -3 },
|
|
geometry: { type: 'Point', coordinates: [31.2, 30.0] },
|
|
}));
|
|
return jsonBuffer({ features });
|
|
};
|
|
const events = await fetchGdeltEvents({
|
|
_resolveProxyForConnect: () => PROXY_AUTH,
|
|
_proxyFetcher,
|
|
_sleep: noSleep,
|
|
_jitter: noJitter,
|
|
_maxAttempts: 3,
|
|
});
|
|
assert.equal(calls, 2, 'should retry exactly once after the 522');
|
|
assert.equal(events.length, 1, 'five mentions at one location → one aggregated event');
|
|
assert.equal(events[0].country, 'Egypt');
|
|
});
|