fix(desktop): backoff on errors to stop CPU abuse + shrink settings window (#633)

Three bugs combine to burn 130% CPU when sidecar auth fails:

1. RefreshScheduler resets backoff multiplier to 1 (fastest) on error,
   causing failed endpoints to poll at base interval instead of backing off.
   Fix: exponential backoff on errors, same as unchanged-data path.

2. classify-event batch system ignores 401 (auth failure) — only pauses
   on 429/5xx. Hundreds of classify calls fire every 2s, each wasted.
   Fix: pause 120s on 401, matching the 429/5xx pattern.

3. Fetch patch retries every 401 (refresh token + retry), doubling all
   requests to the sidecar even when token refresh consistently fails.
   Fix: 60s cooldown after a retry-401 still returns 401.

Also shrinks settings window from 760→600px (min 620→480) to reduce
the empty whitespace below content on all tabs.
This commit is contained in:
Elie Habib
2026-03-01 10:53:54 +04:00
committed by GitHub
parent eb1d596c0a
commit d0a2a50506
5 changed files with 15 additions and 7 deletions

2
src-tauri/Cargo.lock generated
View File

@@ -5423,7 +5423,7 @@ dependencies = [
[[package]]
name = "world-monitor"
version = "2.5.19"
version = "2.5.21"
dependencies = [
"getrandom 0.2.17",
"keyring",

View File

@@ -563,8 +563,8 @@ fn open_settings_window(app: &AppHandle) -> Result<(), String> {
let _settings_window = WebviewWindowBuilder::new(app, "settings", WebviewUrl::App("settings.html".into()))
.title("World Monitor Settings")
.inner_size(980.0, 760.0)
.min_inner_size(820.0, 620.0)
.inner_size(980.0, 600.0)
.min_inner_size(820.0, 480.0)
.resizable(true)
.background_color(tauri::webview::Color(26, 28, 30, 255))
.build()

View File

@@ -85,7 +85,7 @@ export class RefreshScheduler implements AppModule {
}
} catch (e) {
console.error(`[App] Refresh ${name} failed:`, e);
currentMultiplier = 1;
currentMultiplier = Math.min(currentMultiplier * 2, MAX_BACKOFF_MULTIPLIER);
} finally {
this.ctx.inFlight.delete(name);
scheduleNext(computeDelay(intervalMs * currentMultiplier, false));

View File

@@ -257,6 +257,7 @@ export function installRuntimeFetchPatch(): void {
const nativeFetch = window.fetch.bind(window);
let localApiToken: string | null = null;
let tokenFetchedAt = 0;
let authRetryCooldownUntil = 0; // suppress 401 retries after consecutive failures
window.fetch = async (input: RequestInfo | URL, init?: RequestInit): Promise<Response> => {
const target = getApiTargetFromRequestInput(input);
@@ -333,7 +334,8 @@ export function installRuntimeFetchPatch(): void {
if (debug) console.log(`[fetch] ${target}${response.status} (${Math.round(performance.now() - t0)}ms)`);
// Token may be stale after a sidecar restart — refresh and retry once.
if (response.status === 401 && localApiToken) {
// Skip retry if we recently failed (avoid doubling every request during auth outages).
if (response.status === 401 && localApiToken && Date.now() > authRetryCooldownUntil) {
if (debug) console.log(`[fetch] 401 from sidecar, refreshing token and retrying`);
try {
const { tryInvokeTauri } = await import('@/services/tauri-bridge');
@@ -348,6 +350,12 @@ export function installRuntimeFetchPatch(): void {
retryHeaders.set('Authorization', `Bearer ${localApiToken}`);
response = await fetchLocalWithStartupRetry(nativeFetch, localUrl, { ...init, headers: retryHeaders });
if (debug) console.log(`[fetch] retry ${target}${response.status}`);
if (response.status === 401) {
authRetryCooldownUntil = Date.now() + 60_000;
if (debug) console.log(`[fetch] auth retry failed, suppressing retries for 60s`);
} else {
authRetryCooldownUntil = 0;
}
}
}

View File

@@ -462,9 +462,9 @@ function flushBatch(): void {
});
job.resolve(toThreat(resp));
} catch (err) {
if (err instanceof ApiError && (err.statusCode === 429 || err.statusCode >= 500)) {
if (err instanceof ApiError && (err.statusCode === 401 || err.statusCode === 429 || err.statusCode >= 500)) {
batchPaused = true;
const delay = err.statusCode === 429 ? 60_000 : 30_000;
const delay = err.statusCode === 401 ? 120_000 : err.statusCode === 429 ? 60_000 : 30_000;
console.warn(`[Classify] ${err.statusCode} — pausing AI classification for ${delay / 1000}s`);
const remaining = batch.slice(i + 1);
// Failed job: increment attempts, requeue if under limit