From 9b07fc8d8a5ab9c1464e962dbb77b7c07c7942e1 Mon Sep 17 00:00:00 2001 From: Elie Habib Date: Thu, 16 Apr 2026 09:25:06 +0400 Subject: [PATCH] feat(yahoo): _yahoo-fetch helper with curl-only Decodo proxy fallback + 4 seeder migrations (#3120) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(_yahoo-fetch): curl-only Decodo proxy fallback helper Yahoo Finance throttles Railway egress IPs aggressively. 4 seeders (seed-commodity-quotes, seed-etf-flows, seed-gulf-quotes, seed-market-quotes) duplicated the same fetchYahooWithRetry block with no proxy fallback. This helper consolidates them and adds the proxy fallback. Yahoo-specific: CURL-ONLY proxy strategy. Probed 2026-04-16: query1.finance.yahoo.com via CONNECT (httpsProxyFetchRaw): HTTP 404 query1.finance.yahoo.com via curl (curlFetch): HTTP 200 Yahoo's edge blocks Decodo's CONNECT egress IPs but accepts the curl egress IPs. Helper deliberately omits the CONNECT leg — adding it would burn time on guaranteed-404 attempts. Production defaults expose ONLY curlProxyResolver + curlFetcher. All learnings from PR #3118 + #3119 reviews baked in: - lastDirectError accumulator across the loop, embedded in final throw + Error.cause chain - catch block uses break (NOT throw) so thrown errors also reach proxy - DI seams (_curlProxyResolver, _proxyCurlFetcher) for hermetic tests - _PROXY_DEFAULTS exported for production-default lock tests - Sync curlFetch wrapped with await Promise.resolve() to future-proof against an async refactor (Greptile P2 from #3119) Tests (tests/yahoo-fetch.test.mjs, 11 cases): - Production defaults: curl resolver/fetcher reference equality - Production defaults: NO CONNECT leg present (regression guard) - 200 OK passthrough, never touches proxy - 429 with no proxy → throws exhausted with HTTP 429 in message - Retry-After header parsed correctly - 429 + curl proxy succeeds → returns proxy data - Thrown fetch error on final retry → proxy fallback runs (P1 guard) - 429 + proxy ALSO fails → both errors visible in message + cause chain - Proxy malformed JSON → throws exhausted - Non-retryable 500 → no extra direct retry, falls to proxy - parseRetryAfterMs unit (exported sanity check) Verification: 11/11 helper tests pass. node --check clean. Phase 1 of 2 — seeder migrations follow. * feat(yahoo-seeders): migrate 4 seeders to _yahoo-fetch helper Removes the duplicated fetchYahooWithRetry function (4 byte-identical copies across seed-commodity-quotes, seed-etf-flows, seed-gulf-quotes, seed-market-quotes) and routes all Yahoo Finance fetches through the new scripts/_yahoo-fetch.mjs helper. Each seeder gains the curl-only Decodo proxy fallback baked into the helper. Per-seeder changes (mechanical): - import { fetchYahooJson } from './_yahoo-fetch.mjs' - delete the local fetchYahooWithRetry function - replace 'const resp = await fetchYahooWithRetry(url, label); if (!resp) return X; const json = await resp.json()' with 'let json; try { json = await fetchYahooJson(url, { label }); } catch { return X; }' - prune now-unused CHROME_UA/sleep imports where applicable Latent bugs fixed in passing: - seed-etf-flows.mjs:23 and seed-market-quotes.mjs:38 referenced CHROME_UA without importing it (would throw ReferenceError at runtime if the helper were called). Now the call site is gone in etf-flows; in market-quotes CHROME_UA is properly imported because Finnhub call still uses it. seed-commodity-quotes also has fetchYahooChart1y (separate non-retry function for gold history). Migrated to use fetchYahooJson under the hood — preserves return shape, adds proxy fallback automatically. Verification: - node --check clean on all 4 modified seeders - npm run typecheck:all clean - npm run test:data: 5374/5374 pass Phase 2 of 2. * fix(_yahoo-fetch): log success AFTER parse + add _sleep DI seam for honest Retry-After test Greptile P2: "[YAHOO] proxy (curl) succeeded" was logged BEFORE JSON.parse(text). On malformed proxy JSON, Railway logs would show: [YAHOO] proxy (curl) succeeded for AAPL throw: Yahoo retries exhausted ... Contradictory + breaks the post-deploy log-grep verification this PR relies on ("look for [YAHOO] proxy (curl) succeeded"). Fix: parse first; success log only fires when parse succeeds AND the value is about to be returned. Greptile P3: 'Retry-After header parsed correctly' test used header value '0', but parseRetryAfterMs() treats non-positive seconds as null → helper falls through to default linear backoff. So the test was exercising the wrong branch despite its name. Fix: added _sleep DI opt seam to the helper. New test injects a sleep spy and asserts the captured duration: Retry-After: '7' → captured sleep == [7000] (Retry-After branch) no Retry-After → captured sleep == [10] (default backoff = retryBaseMs * 1) Two paired tests lock both branches separately so a future regression that collapses them is caught. Also added a log-ordering regression test: malformed proxy JSON must NOT emit the 'succeeded' log. Captures console.log into an array and asserts no 'proxy (curl) succeeded' line appeared before the throw. Verification: - tests/yahoo-fetch.test.mjs: 13/13 (was 11, +2) - npm run test:data: 5376/5376 (+2) - npm run typecheck:all: clean Followup commits on PR #3120. --- scripts/_yahoo-fetch.mjs | 171 +++++++++++++++++ scripts/seed-commodity-quotes.mjs | 75 +++----- scripts/seed-etf-flows.mjs | 33 +--- scripts/seed-gulf-quotes.mjs | 35 +--- scripts/seed-market-quotes.mjs | 30 +-- tests/yahoo-fetch.test.mjs | 303 ++++++++++++++++++++++++++++++ 6 files changed, 524 insertions(+), 123 deletions(-) create mode 100644 scripts/_yahoo-fetch.mjs create mode 100644 tests/yahoo-fetch.test.mjs diff --git a/scripts/_yahoo-fetch.mjs b/scripts/_yahoo-fetch.mjs new file mode 100644 index 000000000..1e114bb70 --- /dev/null +++ b/scripts/_yahoo-fetch.mjs @@ -0,0 +1,171 @@ +// Yahoo Finance fetch helper with curl-only Decodo proxy fallback. +// +// Yahoo Finance throttles Railway egress IPs aggressively (429s). Existing +// seeders had identical `fetchYahooWithRetry` blocks duplicated 4 times +// (seed-commodity-quotes, seed-etf-flows, seed-gulf-quotes, +// seed-market-quotes) with no proxy fallback. This helper consolidates +// them and adds the proxy fallback. +// +// PROXY STRATEGY — CURL ONLY, NO CONNECT +// +// Decodo provides two egress paths via different hosts: +// - resolveProxyForConnect() → gate.decodo.com (CONNECT egress pool) +// - resolveProxy() → us.decodo.com (curl-x egress pool) +// +// Probed 2026-04-16: +// query1.finance.yahoo.com via CONNECT (httpsProxyFetchRaw): HTTP 404 +// query1.finance.yahoo.com via curl (curlFetch): HTTP 200 +// +// Yahoo's edge blocks Decodo's CONNECT egress IPs but accepts the curl +// egress IPs. So this helper deliberately omits the CONNECT leg — adding +// it would burn time on a guaranteed-404 attempt before the curl path +// runs anyway. Production defaults expose ONLY the curl resolver + +// fetcher (see _PROXY_DEFAULTS). +// +// If Yahoo's behavior toward Decodo CONNECT changes (e.g. Decodo rotates +// the CONNECT pool), add a second leg following the +// scripts/_open-meteo-archive.mjs cascade pattern. + +import { CHROME_UA, sleep, resolveProxy, curlFetch } from './_seed-utils.mjs'; + +const RETRYABLE_STATUSES = new Set([429, 503]); +const MAX_RETRY_AFTER_MS = 60_000; + +/** + * Production defaults. Exported so tests can lock the wiring at the + * helper level (see tests/yahoo-fetch.test.mjs production-defaults + * cases). Mixing these up — e.g. swapping in resolveProxyForConnect + * — would route requests through the egress pool Yahoo blocks. + */ +export const _PROXY_DEFAULTS = Object.freeze({ + curlProxyResolver: resolveProxy, + curlFetcher: curlFetch, +}); + +/** + * Parse `Retry-After` header value (seconds OR HTTP-date). Mirrors the + * helper in scripts/_open-meteo-archive.mjs — duplicated for now to keep + * each helper module self-contained; consolidate to _seed-utils.mjs if + * a third helper needs it. + */ +export function parseRetryAfterMs(value) { + if (!value) return null; + const seconds = Number(value); + if (Number.isFinite(seconds) && seconds > 0) { + return Math.min(seconds * 1000, MAX_RETRY_AFTER_MS); + } + const retryAt = Date.parse(value); + if (Number.isFinite(retryAt)) { + return Math.min(Math.max(retryAt - Date.now(), 1000), MAX_RETRY_AFTER_MS); + } + return null; +} + +/** + * Fetch JSON from a Yahoo Finance endpoint with retry + proxy fallback. + * + * @param {string} url - Yahoo Finance URL (typically + * `https://query1.finance.yahoo.com/v8/finance/chart/...`). + * @param {object} [opts] + * @param {string} [opts.label] - Symbol or label for log lines (default 'unknown'). + * @param {number} [opts.timeoutMs] - Per-attempt timeout (default 10_000). + * @param {number} [opts.maxRetries] - Direct retries (default 3 → 4 attempts total). + * @param {number} [opts.retryBaseMs] - Linear backoff base (default 5_000). + * @returns {Promise} Parsed JSON. Throws on exhaustion. + * + * Throws (does NOT return null) on exhaustion — caller decides whether + * to swallow with try/catch. Existing pre-helper code returned null on + * failure; migrating callers should wrap in try/catch where null + * semantics is required (rare — most should propagate the error). + */ +export async function fetchYahooJson(url, opts = {}) { + const { + label = 'unknown', + timeoutMs = 10_000, + maxRetries = 3, + retryBaseMs = 5_000, + // Test hooks. Production callers leave these unset and get + // _PROXY_DEFAULTS. Tests inject mocks to exercise the proxy path + // without spinning up real curl execs. `_sleep` lets tests assert + // the actual backoff durations (e.g. Retry-After parsing) without + // sleeping in real time. + _curlProxyResolver = _PROXY_DEFAULTS.curlProxyResolver, + _proxyCurlFetcher = _PROXY_DEFAULTS.curlFetcher, + _sleep = sleep, + } = opts; + + // Track the last direct-path failure so the eventual throw carries + // useful upstream context (HTTP status, error message). Without this + // the helper would throw "retries exhausted" alone and lose the signal + // that triggered the proxy attempt. + let lastDirectError = null; + + for (let attempt = 0; attempt <= maxRetries; attempt++) { + let resp; + try { + resp = await fetch(url, { + headers: { 'User-Agent': CHROME_UA }, + signal: AbortSignal.timeout(timeoutMs), + }); + } catch (err) { + lastDirectError = err; + if (attempt < maxRetries) { + const retryMs = retryBaseMs * (attempt + 1); + console.warn(` [YAHOO] ${label} ${err?.message ?? err}; retrying in ${Math.round(retryMs / 1000)}s (${attempt + 1}/${maxRetries})`); + await _sleep(retryMs); + continue; + } + // Final direct attempt threw (timeout, ECONNRESET, DNS, etc.). + // Fall through to the proxy fallback below — NEVER throw here. + // PR #3118 review: throwing here silently bypasses the proxy path + // for thrown-error cases. + break; + } + + if (resp.ok) return await resp.json(); + + lastDirectError = new Error(`HTTP ${resp.status}`); + + if (RETRYABLE_STATUSES.has(resp.status) && attempt < maxRetries) { + const retryAfter = parseRetryAfterMs(resp.headers.get('retry-after')); + const retryMs = retryAfter ?? retryBaseMs * (attempt + 1); + console.warn(` [YAHOO] ${label} ${resp.status} — waiting ${Math.round(retryMs / 1000)}s (${attempt + 1}/${maxRetries})`); + await _sleep(retryMs); + continue; + } + + break; + } + + // Curl-only proxy fallback. See module header for why CONNECT is + // omitted (Yahoo blocks Decodo's CONNECT egress IPs). + const curlProxyAuth = _curlProxyResolver(); + if (curlProxyAuth) { + try { + console.log(` [YAHOO] direct exhausted on ${label} (${lastDirectError?.message ?? 'unknown'}); trying proxy (curl)`); + // _proxyCurlFetcher (curlFetch / execFileSync) is sync today; + // wrap with await Promise.resolve so a future async refactor of + // curlFetch silently keeps working instead of handing a Promise + // to JSON.parse (Greptile P2 from PR #3119). + const text = await Promise.resolve(_proxyCurlFetcher(url, curlProxyAuth, { 'User-Agent': CHROME_UA, Accept: 'application/json' })); + // Parse BEFORE logging success. If JSON.parse throws, the catch block + // below records lastProxyError and we throw exhausted — no contradictory + // "succeeded" log line followed by an "exhausted" throw. The post-deploy + // verification in the PR description relies on this success log being + // a true success signal. + const parsed = JSON.parse(text); + console.log(` [YAHOO] proxy (curl) succeeded for ${label}`); + return parsed; + } catch (curlErr) { + throw new Error( + `Yahoo retries exhausted for ${label} (last direct: ${lastDirectError?.message ?? 'unknown'}; last proxy: ${curlErr?.message ?? curlErr})`, + { cause: lastDirectError ?? curlErr }, + ); + } + } + + throw new Error( + `Yahoo retries exhausted for ${label}${lastDirectError ? ` (last direct: ${lastDirectError.message})` : ''}`, + lastDirectError ? { cause: lastDirectError } : undefined, + ); +} diff --git a/scripts/seed-commodity-quotes.mjs b/scripts/seed-commodity-quotes.mjs index eabb4e668..f39352bcb 100644 --- a/scripts/seed-commodity-quotes.mjs +++ b/scripts/seed-commodity-quotes.mjs @@ -1,6 +1,7 @@ #!/usr/bin/env node -import { loadEnvFile, loadSharedConfig, sleep, runSeed, parseYahooChart, writeExtraKey, writeExtraKeyWithMeta, CHROME_UA } from './_seed-utils.mjs'; +import { loadEnvFile, loadSharedConfig, sleep, runSeed, parseYahooChart, writeExtraKey, writeExtraKeyWithMeta } from './_seed-utils.mjs'; +import { fetchYahooJson } from './_yahoo-fetch.mjs'; import { AV_PHYSICAL_MAP, fetchAvPhysicalCommodity, fetchAvBulkQuotes } from './_shared-av.mjs'; const commodityConfig = loadSharedConfig('commodities.json'); @@ -20,30 +21,29 @@ const GOLD_DRIVER_SYMBOLS = [ async function fetchYahooChart1y(symbol) { const url = `https://query1.finance.yahoo.com/v8/finance/chart/${encodeURIComponent(symbol)}?range=1y&interval=1d`; + let json; try { - const resp = await fetch(url, { headers: { 'User-Agent': CHROME_UA }, signal: AbortSignal.timeout(15_000) }); - if (!resp.ok) return null; - const json = await resp.json(); - const r = json?.chart?.result?.[0]; - if (!r) return null; - const meta = r.meta; - const ts = r.timestamp || []; - const closes = r.indicators?.quote?.[0]?.close || []; - const history = ts.map((t, i) => ({ d: new Date(t * 1000).toISOString().slice(0, 10), c: closes[i] })) - .filter(p => p.c != null && Number.isFinite(p.c)); - return { - symbol, - price: meta?.regularMarketPrice ?? null, - dayHigh: meta?.regularMarketDayHigh ?? null, - dayLow: meta?.regularMarketDayLow ?? null, - prevClose: meta?.chartPreviousClose ?? meta?.previousClose ?? null, - fiftyTwoWeekHigh: meta?.fiftyTwoWeekHigh ?? null, - fiftyTwoWeekLow: meta?.fiftyTwoWeekLow ?? null, - history, - }; + json = await fetchYahooJson(url, { label: symbol, timeoutMs: 15_000 }); } catch { return null; } + const r = json?.chart?.result?.[0]; + if (!r) return null; + const meta = r.meta; + const ts = r.timestamp || []; + const closes = r.indicators?.quote?.[0]?.close || []; + const history = ts.map((t, i) => ({ d: new Date(t * 1000).toISOString().slice(0, 10), c: closes[i] })) + .filter(p => p.c != null && Number.isFinite(p.c)); + return { + symbol, + price: meta?.regularMarketPrice ?? null, + dayHigh: meta?.regularMarketDayHigh ?? null, + dayLow: meta?.regularMarketDayLow ?? null, + prevClose: meta?.chartPreviousClose ?? meta?.previousClose ?? null, + fiftyTwoWeekHigh: meta?.fiftyTwoWeekHigh ?? null, + fiftyTwoWeekLow: meta?.fiftyTwoWeekLow ?? null, + history, + }; } function computeReturns(history, currentPrice) { @@ -153,28 +153,6 @@ async function fetchGoldExtended() { }; } -async function fetchYahooWithRetry(url, label, maxAttempts = 4) { - for (let i = 0; i < maxAttempts; i++) { - const resp = await fetch(url, { - headers: { 'User-Agent': CHROME_UA }, - signal: AbortSignal.timeout(10_000), - }); - if (resp.status === 429) { - const wait = 5000 * (i + 1); - console.warn(` [Yahoo] ${label} 429 — waiting ${wait / 1000}s (attempt ${i + 1}/${maxAttempts})`); - await sleep(wait); - continue; - } - if (!resp.ok) { - console.warn(` [Yahoo] ${label} HTTP ${resp.status}`); - return null; - } - return resp; - } - console.warn(` [Yahoo] ${label} rate limited after ${maxAttempts} attempts`); - return null; -} - const COMMODITY_SYMBOLS = commodityConfig.commodities.map(c => c.symbol); async function fetchCommodityQuotes() { @@ -217,9 +195,14 @@ async function fetchCommodityQuotes() { try { const url = `https://query1.finance.yahoo.com/v8/finance/chart/${encodeURIComponent(symbol)}`; - const resp = await fetchYahooWithRetry(url, symbol); - if (!resp) { misses++; continue; } - const parsed = parseYahooChart(await resp.json(), symbol); + let chart; + try { + chart = await fetchYahooJson(url, { label: symbol }); + } catch { + misses++; + continue; + } + const parsed = parseYahooChart(chart, symbol); if (parsed) { quotes.push(parsed); covered.add(symbol); diff --git a/scripts/seed-etf-flows.mjs b/scripts/seed-etf-flows.mjs index 14503f622..827bdc4eb 100755 --- a/scripts/seed-etf-flows.mjs +++ b/scripts/seed-etf-flows.mjs @@ -1,6 +1,7 @@ #!/usr/bin/env node import { loadEnvFile, loadSharedConfig, runSeed } from './_seed-utils.mjs'; +import { fetchYahooJson } from './_yahoo-fetch.mjs'; import { fetchAvBulkQuotes } from './_shared-av.mjs'; const etfConfig = loadSharedConfig('etfs.json'); @@ -17,28 +18,6 @@ function sleep(ms) { return new Promise((r) => setTimeout(r, ms)); } -async function fetchYahooWithRetry(url, label, maxAttempts = 4) { - for (let i = 0; i < maxAttempts; i++) { - const resp = await fetch(url, { - headers: { 'User-Agent': CHROME_UA }, - signal: AbortSignal.timeout(10_000), - }); - if (resp.status === 429) { - const wait = 5000 * (i + 1); - console.warn(` [Yahoo] ${label} 429 — waiting ${wait / 1000}s (attempt ${i + 1}/${maxAttempts})`); - await sleep(wait); - continue; - } - if (!resp.ok) { - console.warn(` [Yahoo] ${label} HTTP ${resp.status}`); - return null; - } - return resp; - } - console.warn(` [Yahoo] ${label} rate limited after ${maxAttempts} attempts`); - return null; -} - function parseEtfChartData(chart, ticker, issuer) { const result = chart?.chart?.result?.[0]; if (!result) return null; @@ -112,9 +91,13 @@ async function fetchEtfFlows() { try { const url = `https://query1.finance.yahoo.com/v8/finance/chart/${ticker}?range=5d&interval=1d`; - const resp = await fetchYahooWithRetry(url, ticker); - if (!resp) { misses++; continue; } - const chart = await resp.json(); + let chart; + try { + chart = await fetchYahooJson(url, { label: ticker }); + } catch { + misses++; + continue; + } const parsed = parseEtfChartData(chart, ticker, issuer); if (parsed) { etfs.push(parsed); diff --git a/scripts/seed-gulf-quotes.mjs b/scripts/seed-gulf-quotes.mjs index 8b405a844..8d5d4b975 100755 --- a/scripts/seed-gulf-quotes.mjs +++ b/scripts/seed-gulf-quotes.mjs @@ -1,6 +1,7 @@ #!/usr/bin/env node -import { loadEnvFile, loadSharedConfig, CHROME_UA, runSeed, sleep } from './_seed-utils.mjs'; +import { loadEnvFile, loadSharedConfig, runSeed, sleep } from './_seed-utils.mjs'; +import { fetchYahooJson } from './_yahoo-fetch.mjs'; import { fetchAvPhysicalCommodity, fetchAvFxDaily } from './_shared-av.mjs'; const gulfConfig = loadSharedConfig('gulf.json'); @@ -13,28 +14,6 @@ const YAHOO_DELAY_MS = 200; const GULF_SYMBOLS = gulfConfig.symbols; -async function fetchYahooWithRetry(url, label, maxAttempts = 4) { - for (let i = 0; i < maxAttempts; i++) { - const resp = await fetch(url, { - headers: { 'User-Agent': CHROME_UA }, - signal: AbortSignal.timeout(10_000), - }); - if (resp.status === 429) { - const wait = 5000 * (i + 1); - console.warn(` [Yahoo] ${label} 429 — waiting ${wait / 1000}s (attempt ${i + 1}/${maxAttempts})`); - await sleep(wait); - continue; - } - if (!resp.ok) { - console.warn(` [Yahoo] ${label} HTTP ${resp.status}`); - return null; - } - return resp; - } - console.warn(` [Yahoo] ${label} rate limited after ${maxAttempts} attempts`); - return null; -} - function parseYahooChart(data, meta) { const result = data?.chart?.result?.[0]; const chartMeta = result?.meta; @@ -98,9 +77,13 @@ async function fetchGulfQuotes() { try { const url = `https://query1.finance.yahoo.com/v8/finance/chart/${encodeURIComponent(meta.symbol)}`; - const resp = await fetchYahooWithRetry(url, meta.symbol); - if (!resp) { misses++; continue; } - const chart = await resp.json(); + let chart; + try { + chart = await fetchYahooJson(url, { label: meta.symbol }); + } catch { + misses++; + continue; + } const parsed = parseYahooChart(chart, meta); if (parsed) { quotes.push(parsed); diff --git a/scripts/seed-market-quotes.mjs b/scripts/seed-market-quotes.mjs index 73dd91f15..61282de22 100644 --- a/scripts/seed-market-quotes.mjs +++ b/scripts/seed-market-quotes.mjs @@ -1,6 +1,7 @@ #!/usr/bin/env node -import { loadEnvFile, loadSharedConfig, sleep, runSeed, parseYahooChart, writeExtraKey } from './_seed-utils.mjs'; +import { loadEnvFile, loadSharedConfig, sleep, CHROME_UA, runSeed, parseYahooChart, writeExtraKey } from './_seed-utils.mjs'; +import { fetchYahooJson } from './_yahoo-fetch.mjs'; import { fetchAvBulkQuotes } from './_shared-av.mjs'; const stocksConfig = loadSharedConfig('stocks.json'); @@ -32,34 +33,11 @@ async function fetchFinnhubQuote(symbol, apiKey) { } } -async function fetchYahooWithRetry(url, label, maxAttempts = 4) { - for (let i = 0; i < maxAttempts; i++) { - const resp = await fetch(url, { - headers: { 'User-Agent': CHROME_UA }, - signal: AbortSignal.timeout(10_000), - }); - if (resp.status === 429) { - const wait = 5000 * (i + 1); - console.warn(` [Yahoo] ${label} 429 — waiting ${wait / 1000}s (attempt ${i + 1}/${maxAttempts})`); - await sleep(wait); - continue; - } - if (!resp.ok) { - console.warn(` [Yahoo] ${label} HTTP ${resp.status}`); - return null; - } - return resp; - } - console.warn(` [Yahoo] ${label} rate limited after ${maxAttempts} attempts`); - return null; -} - async function fetchYahooQuote(symbol) { try { const url = `https://query1.finance.yahoo.com/v8/finance/chart/${encodeURIComponent(symbol)}`; - const resp = await fetchYahooWithRetry(url, symbol); - if (!resp) return null; - return parseYahooChart(await resp.json(), symbol); + const chart = await fetchYahooJson(url, { label: symbol }); + return parseYahooChart(chart, symbol); } catch (err) { console.warn(` [Yahoo] ${symbol} error: ${err.message}`); return null; diff --git a/tests/yahoo-fetch.test.mjs b/tests/yahoo-fetch.test.mjs new file mode 100644 index 000000000..926e8d358 --- /dev/null +++ b/tests/yahoo-fetch.test.mjs @@ -0,0 +1,303 @@ +// Tests for scripts/_yahoo-fetch.mjs. +// +// Locks every learning from PR #3118 + #3119 review cycles: +// +// 1. Direct retries → proxy fallback cascade with `lastError` accumulator. +// 2. Catch block uses `break` (NOT `throw`) so thrown errors also reach +// the proxy path. Includes explicit P1 regression guard. +// 3. Final exhausted-throw embeds last upstream signal (HTTP status or +// thrown error message) + `Error.cause`. +// 4. Production defaults locked at the helper level (_PROXY_DEFAULTS). +// Without this lock, the cascade tests would all pass even if the +// helper accidentally wired the wrong resolver/fetcher. +// 5. DI seams (`_curlProxyResolver`, `_proxyCurlFetcher`) — production +// callers leave unset; tests inject mocks. +// 6. Sync-curl-future-safety covered indirectly via the await-resolve +// wrap in the helper (no test needed; the wrap is a no-op today and +// adapts automatically if curlFetch becomes async). +// +// Yahoo-specific: NO CONNECT leg (Decodo CONNECT 404s on Yahoo per probe). +// Production defaults must NOT include a connectProxyResolver. + +import { test, afterEach } from 'node:test'; +import { strict as assert } from 'node:assert'; + +process.env.UPSTASH_REDIS_REST_URL = 'https://redis.test'; +process.env.UPSTASH_REDIS_REST_TOKEN = 'fake-token'; + +const URL = 'https://query1.finance.yahoo.com/v8/finance/chart/AAPL'; +const VALID_PAYLOAD = { chart: { result: [{ meta: { symbol: 'AAPL', regularMarketPrice: 150 } }] } }; + +const COMMON_OPTS = { + label: 'AAPL', + maxRetries: 1, + retryBaseMs: 10, + timeoutMs: 1000, +}; + +const originalFetch = globalThis.fetch; +afterEach(() => { globalThis.fetch = originalFetch; }); + +// ─── Production defaults: lock the wiring ─────────────────────────────── + +test('production defaults: curl leg uses resolveProxy (us.decodo.com pool) and curlFetch', async () => { + // No `?t=` cache-buster — reference equality across modules requires + // same module instance. + const { _PROXY_DEFAULTS } = await import('../scripts/_yahoo-fetch.mjs'); + const { resolveProxy, curlFetch } = await import('../scripts/_seed-utils.mjs'); + assert.equal(_PROXY_DEFAULTS.curlProxyResolver, resolveProxy, 'curl leg MUST use resolveProxy (us.decodo.com pool — Yahoo accepts this egress)'); + assert.equal(_PROXY_DEFAULTS.curlFetcher, curlFetch, 'curl leg MUST use curlFetch (sync, future-wrapped with await Promise.resolve)'); +}); + +test('production defaults: NO CONNECT leg (Yahoo blocks Decodo CONNECT egress with 404)', async () => { + const { _PROXY_DEFAULTS } = await import('../scripts/_yahoo-fetch.mjs'); + // Asserting absence prevents a future "let's add CONNECT for redundancy" + // refactor from silently re-introducing the 404 cascade. If you need to + // add CONNECT, also re-probe Yahoo and update the comment in the helper. + assert.equal(_PROXY_DEFAULTS.connectProxyResolver, undefined, 'No CONNECT leg — see helper module header for why'); + assert.equal(_PROXY_DEFAULTS.connectFetcher, undefined, 'No CONNECT fetcher'); +}); + +// ─── Direct path ──────────────────────────────────────────────────────── + +test('200 OK: returns parsed JSON, never touches proxy', async () => { + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + globalThis.fetch = async () => ({ + ok: true, status: 200, + headers: { get: () => null }, + json: async () => VALID_PAYLOAD, + }); + + let proxyCalls = 0; + const result = await fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => 'should-not-be-used', + _proxyCurlFetcher: () => { proxyCalls += 1; throw new Error('not reached'); }, + }); + assert.deepEqual(result, VALID_PAYLOAD); + assert.equal(proxyCalls, 0); +}); + +test('429 with no proxy configured: throws after exhausting retries (HTTP 429 in message)', async () => { + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + let calls = 0; + globalThis.fetch = async () => { + calls += 1; + return { ok: false, status: 429, headers: { get: () => null }, json: async () => ({}) }; + }; + + await assert.rejects( + () => fetchYahooJson(URL, { ...COMMON_OPTS, _curlProxyResolver: () => null }), + (err) => { + assert.match(err.message, /Yahoo retries exhausted/); + assert.match(err.message, /HTTP 429/, 'last direct status MUST appear in message'); + return true; + }, + ); + assert.equal(calls, 2, 'maxRetries=1 → 2 direct attempts'); +}); + +test('Retry-After header parsed: backoff respects upstream hint (DI _sleep capture)', async () => { + // Pre-fix bug: this test used Retry-After: '0', but parseRetryAfterMs() + // treats non-positive seconds as null → helper falls back to default + // backoff. So the test was named "Retry-After parsed" but actually + // exercised the default-backoff branch. Fix: use a positive header value + // that's distinctly different from `retryBaseMs * (attempt+1)`, AND + // capture the _sleep call so we can assert which branch ran. + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + let calls = 0; + globalThis.fetch = async () => { + calls += 1; + return { + ok: calls > 1, + status: calls > 1 ? 200 : 429, + headers: { get: (name) => name.toLowerCase() === 'retry-after' ? '7' : null }, + json: async () => VALID_PAYLOAD, + }; + }; + const sleepDurations = []; + const result = await fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => null, + _sleep: async (ms) => { sleepDurations.push(ms); }, // capture, never actually sleep + }); + assert.deepEqual(result, VALID_PAYLOAD); + assert.equal(calls, 2); + assert.deepEqual(sleepDurations, [7000], 'Retry-After: 7 must produce a 7000ms sleep, not retryBaseMs default (10ms)'); +}); + +test('Retry-After absent: falls back to linear backoff retryBaseMs * (attempt+1)', async () => { + // Companion to the test above — locks the OTHER branch of the if so + // they're not collapsed into one path silently. + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + let calls = 0; + globalThis.fetch = async () => { + calls += 1; + return { + ok: calls > 1, + status: calls > 1 ? 200 : 429, + headers: { get: () => null }, // no Retry-After + json: async () => VALID_PAYLOAD, + }; + }; + const sleepDurations = []; + await fetchYahooJson(URL, { + ...COMMON_OPTS, // retryBaseMs: 10 + _curlProxyResolver: () => null, + _sleep: async (ms) => { sleepDurations.push(ms); }, + }); + assert.deepEqual(sleepDurations, [10], 'no Retry-After → retryBaseMs * 1 = 10ms'); +}); + +// ─── Curl proxy fallback path ─────────────────────────────────────────── + +test('429 + curl proxy succeeds: returns proxy data', async () => { + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + globalThis.fetch = async () => ({ + ok: false, status: 429, headers: { get: () => null }, json: async () => ({}), + }); + + let curlCalls = 0; + let receivedAuth = null; + let receivedHeaders = null; + const result = await fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => 'user:pass@us.decodo.com:10001', + _proxyCurlFetcher: (url, auth, headers) => { + curlCalls += 1; + receivedAuth = auth; + receivedHeaders = headers; + assert.match(url, /query1\.finance\.yahoo\.com/); + return JSON.stringify(VALID_PAYLOAD); + }, + }); + + assert.equal(curlCalls, 1); + assert.equal(receivedAuth, 'user:pass@us.decodo.com:10001'); + assert.equal(receivedHeaders['User-Agent'].length > 50, true, 'CHROME_UA forwarded to curl'); + assert.deepEqual(result, VALID_PAYLOAD); +}); + +test('thrown fetch error on final retry → proxy fallback runs (P1 regression guard)', async () => { + // Pre-fix bug class: `throw err` in the catch block bypasses the proxy + // path for thrown-error cases (timeout, ECONNRESET, DNS). Lock that + // the new control flow `break`s and reaches the proxy. + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + let directCalls = 0; + globalThis.fetch = async () => { + directCalls += 1; + throw Object.assign(new Error('Connect Timeout Error'), { code: 'UND_ERR_CONNECT_TIMEOUT' }); + }; + + let curlCalls = 0; + const result = await fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => 'user:pass@us.decodo.com:10001', + _proxyCurlFetcher: () => { curlCalls += 1; return JSON.stringify(VALID_PAYLOAD); }, + }); + + assert.equal(directCalls, 2, 'direct attempts exhausted before proxy'); + assert.equal(curlCalls, 1, 'proxy MUST run on thrown-error path'); + assert.deepEqual(result, VALID_PAYLOAD); +}); + +test('429 + proxy ALSO fails: throws with both errors visible in message', async () => { + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + globalThis.fetch = async () => ({ + ok: false, status: 429, headers: { get: () => null }, json: async () => ({}), + }); + + await assert.rejects( + () => fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => 'user:pass@us.decodo.com:10001', + _proxyCurlFetcher: () => { throw new Error('curl 502'); }, + }), + (err) => { + assert.match(err.message, /Yahoo retries exhausted/); + assert.match(err.message, /HTTP 429/, 'direct status preserved'); + assert.match(err.message, /curl 502/, 'proxy error appended'); + assert.ok(err.cause, 'Error.cause chain set'); + return true; + }, + ); +}); + +test('proxy returns malformed JSON: throws exhausted', async () => { + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + globalThis.fetch = async () => ({ + ok: false, status: 429, headers: { get: () => null }, json: async () => ({}), + }); + await assert.rejects( + () => fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => 'user:pass@us.decodo.com:10001', + _proxyCurlFetcher: () => 'not-valid-json', + }), + /Yahoo retries exhausted/, + ); +}); + +test('proxy malformed JSON does NOT emit "succeeded" log before throwing (P2 log ordering)', async () => { + // Pre-fix bug class: success log was emitted before JSON.parse, so a + // malformed proxy response produced contradictory Railway logs: + // [YAHOO] proxy (curl) succeeded for AAPL + // throw: Yahoo retries exhausted ... + // Post-fix: parse runs first; success log only fires when parse succeeds. + // This breaks the log-grep used by the post-deploy verification in the + // PR description (`look for [YAHOO] proxy (curl) succeeded`). + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + globalThis.fetch = async () => ({ + ok: false, status: 429, headers: { get: () => null }, json: async () => ({}), + }); + + const logs = []; + const originalLog = console.log; + console.log = (msg) => { logs.push(String(msg)); }; + try { + await assert.rejects( + () => fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => 'user:pass@us.decodo.com:10001', + _proxyCurlFetcher: () => 'not-valid-json', + }), + /Yahoo retries exhausted/, + ); + } finally { + console.log = originalLog; + } + + const succeededLogged = logs.some((l) => l.includes('proxy (curl) succeeded')); + assert.equal(succeededLogged, false, 'success log MUST NOT fire when JSON.parse throws'); +}); + +test('non-retryable status (500): no extra direct retry, falls to proxy', async () => { + const { fetchYahooJson } = await import('../scripts/_yahoo-fetch.mjs'); + let directCalls = 0; + globalThis.fetch = async () => { + directCalls += 1; + return { ok: false, status: 500, headers: { get: () => null }, json: async () => ({}) }; + }; + let curlCalls = 0; + const result = await fetchYahooJson(URL, { + ...COMMON_OPTS, + _curlProxyResolver: () => 'user:pass@us.decodo.com:10001', + _proxyCurlFetcher: () => { curlCalls += 1; return JSON.stringify(VALID_PAYLOAD); }, + }); + assert.equal(directCalls, 1, 'non-retryable status → no extra direct retry'); + assert.equal(curlCalls, 1, 'falls to proxy'); + assert.deepEqual(result, VALID_PAYLOAD); +}); + +// ─── parseRetryAfterMs unit (export sanity) ───────────────────────────── + +test('parseRetryAfterMs: seconds + HTTP-date + null cases', async () => { + const { parseRetryAfterMs } = await import('../scripts/_yahoo-fetch.mjs'); + assert.equal(parseRetryAfterMs(null), null); + assert.equal(parseRetryAfterMs(''), null); + assert.equal(parseRetryAfterMs('5'), 5_000); + assert.equal(parseRetryAfterMs('70'), 60_000, 'capped at MAX_RETRY_AFTER_MS=60_000'); + // HTTP-date in the past clamps to >= 1000ms. + const past = new Date(Date.now() - 30_000).toUTCString(); + assert.equal(parseRetryAfterMs(past), 1000); +});