From 8f1cd0474fa022363521191faf6bd44c71ec19b3 Mon Sep 17 00:00:00 2001 From: Dotta <34892728+cryppadotta@users.noreply.github.com> Date: Fri, 24 Apr 2026 09:40:40 -0500 Subject: [PATCH] [codex] Improve transient recovery and Codex model refresh (#4383) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Adapter execution and retry classification decide whether agent work pauses, retries, or recovers automatically > - Transient provider failures need to be classified precisely so Paperclip does not convert retryable upstream conditions into false hard failures > - At the same time, operators need an up-to-date model list for Codex-backed agents and prompts should nudge agents toward targeted verification instead of repo-wide sweeps > - This pull request tightens transient recovery classification for Claude and Codex, updates the agent prompt guidance, and adds Codex model refresh support end-to-end > - The benefit is better automatic retry behavior plus fresher operator-facing model configuration ## What Changed - added Codex usage-limit retry-window parsing and Claude extra-usage transient classification - normalized the heartbeat transient-recovery contract across adapter executions and heartbeat scheduling - documented that deferred comment wakes only reopen completed issues for human/comment-reopen interactions, while system follow-ups leave closed work closed - updated adapter-utils prompt guidance to prefer targeted verification - added Codex model refresh support in the server route, registry, shared types, and agent config form - added adapter/server tests covering the new parsing, retry scheduling, and model-refresh behavior ## Verification - `pnpm exec vitest run --project @paperclipai/adapter-utils packages/adapter-utils/src/server-utils.test.ts` - `pnpm exec vitest run --project @paperclipai/adapter-claude-local packages/adapters/claude-local/src/server/parse.test.ts` - `pnpm exec vitest run --project @paperclipai/adapter-codex-local packages/adapters/codex-local/src/server/parse.test.ts` - `pnpm exec vitest run --project @paperclipai/server server/src/__tests__/adapter-model-refresh-routes.test.ts server/src/__tests__/adapter-models.test.ts server/src/__tests__/claude-local-execute.test.ts server/src/__tests__/codex-local-execute.test.ts server/src/__tests__/heartbeat-process-recovery.test.ts server/src/__tests__/heartbeat-retry-scheduling.test.ts` ## Risks - Moderate behavior risk: retry classification affects whether runs auto-recover or block, so mistakes here could either suppress needed retries or over-retry real failures - Low workflow risk: deferred comment wake reopening is intentionally scoped to human/comment-reopen interactions so system follow-ups do not revive completed issues unexpectedly > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex GPT-5-based coding agent with tool use and code execution in the Codex CLI environment ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [ ] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip --- AGENTS.md | 4 +- .../adapter-utils/src/server-utils.test.ts | 1 + packages/adapter-utils/src/server-utils.ts | 1 + packages/adapter-utils/src/types.ts | 11 + .../claude-local/src/server/execute.ts | 82 ++++++- .../claude-local/src/server/parse.test.ts | 123 +++++++++++ .../adapters/claude-local/src/server/parse.ts | 206 +++++++++++++++++- .../codex-local/src/server/execute.ts | 28 ++- .../codex-local/src/server/parse.test.ts | 20 ++ .../adapters/codex-local/src/server/parse.ts | 172 ++++++++++++++- .../adapter-model-refresh-routes.test.ts | 185 ++++++++++++++++ server/src/__tests__/adapter-models.test.ts | 26 ++- .../__tests__/claude-local-execute.test.ts | 199 ++++++++++++++++- .../__tests__/cleanup-removal-service.test.ts | 43 ++++ .../src/__tests__/codex-local-execute.test.ts | 65 +++++- .../heartbeat-process-recovery.test.ts | 9 +- .../heartbeat-retry-scheduling.test.ts | 129 ++++++++++- server/src/adapters/codex-models.ts | 13 +- server/src/adapters/index.ts | 1 + server/src/adapters/registry.ts | 17 +- server/src/routes/agents.ts | 8 +- server/src/services/companies.ts | 2 + server/src/services/heartbeat.ts | 98 ++++++++- ui/src/api/agents.ts | 4 +- ui/src/components/AgentConfigForm.tsx | 56 ++++- 25 files changed, 1455 insertions(+), 48 deletions(-) create mode 100644 packages/adapters/claude-local/src/server/parse.test.ts create mode 100644 server/src/__tests__/adapter-model-refresh-routes.test.ts diff --git a/AGENTS.md b/AGENTS.md index 89cefbef4b..3555bfcdaf 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -123,7 +123,9 @@ pnpm test:release-smoke Run the browser suites only when your change touches them or when you are explicitly verifying CI/release flows. -Run this full check before claiming done: +For normal issue work, run the smallest relevant verification first. Do not default to repo-wide typecheck/build/test on every heartbeat when a narrower check is enough to prove the change. + +Run this full check before claiming repo work done in a PR-ready hand-off, or when the change scope is broad enough that targeted checks are not sufficient: ```sh pnpm -r typecheck diff --git a/packages/adapter-utils/src/server-utils.test.ts b/packages/adapter-utils/src/server-utils.test.ts index 4fd2281487..068474456e 100644 --- a/packages/adapter-utils/src/server-utils.test.ts +++ b/packages/adapter-utils/src/server-utils.test.ts @@ -254,6 +254,7 @@ describe("renderPaperclipWakePrompt", () => { it("keeps the default local-agent prompt action-oriented", () => { expect(DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE).toContain("Start actionable work in this heartbeat"); expect(DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE).toContain("do not stop at a plan"); + expect(DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE).toContain("Prefer the smallest verification that proves the change"); expect(DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE).toContain("Use child issues"); expect(DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE).toContain("instead of polling agents, sessions, or processes"); expect(DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE).toContain("Create child issues directly when you know what needs to be done"); diff --git a/packages/adapter-utils/src/server-utils.ts b/packages/adapter-utils/src/server-utils.ts index 20ce8f2f05..a66ccdc08e 100644 --- a/packages/adapter-utils/src/server-utils.ts +++ b/packages/adapter-utils/src/server-utils.ts @@ -87,6 +87,7 @@ export const DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE = [ "Execution contract:", "- Start actionable work in this heartbeat; do not stop at a plan unless the issue asks for planning.", "- Leave durable progress in comments, documents, or work products with a clear next action.", + "- Prefer the smallest verification that proves the change; do not default to full workspace typecheck/build/test on every heartbeat unless the task scope warrants it.", "- Use child issues for parallel or long delegated work instead of polling agents, sessions, or processes.", "- If woken by a human comment on a dependency-blocked issue, respond or triage the comment without treating the blocked deliverable work as unblocked.", "- Create child issues directly when you know what needs to be done; use issue-thread interactions when the board/user must choose suggested tasks, answer structured questions, or confirm a proposal.", diff --git a/packages/adapter-utils/src/types.ts b/packages/adapter-utils/src/types.ts index ff00a0e989..89df75853d 100644 --- a/packages/adapter-utils/src/types.ts +++ b/packages/adapter-utils/src/types.ts @@ -64,12 +64,16 @@ export interface AdapterRuntimeServiceReport { healthStatus?: "unknown" | "healthy" | "unhealthy"; } +export type AdapterExecutionErrorFamily = "transient_upstream"; + export interface AdapterExecutionResult { exitCode: number | null; signal: string | null; timedOut: boolean; errorMessage?: string | null; errorCode?: string | null; + errorFamily?: AdapterExecutionErrorFamily | null; + retryNotBefore?: string | null; errorMeta?: Record; usage?: UsageSummary; /** @@ -311,6 +315,13 @@ export interface ServerAdapterModule { supportsLocalAgentJwt?: boolean; models?: AdapterModel[]; listModels?: () => Promise; + /** + * Optional explicit refresh hook for model discovery. + * Use this when the adapter caches discovered models and needs a bypass path + * so the UI can fetch newly released models without waiting for cache expiry + * or a Paperclip code update. + */ + refreshModels?: () => Promise; agentConfigurationDoc?: string; /** * Optional lifecycle hook when an agent is approved/hired (join-request or hire_agent approval). diff --git a/packages/adapters/claude-local/src/server/execute.ts b/packages/adapters/claude-local/src/server/execute.ts index 6fa870c85d..a79e722466 100644 --- a/packages/adapters/claude-local/src/server/execute.ts +++ b/packages/adapters/claude-local/src/server/execute.ts @@ -39,7 +39,9 @@ import { parseClaudeStreamJson, describeClaudeFailure, detectClaudeLoginRequired, + extractClaudeRetryNotBefore, isClaudeMaxTurnsResult, + isClaudeTransientUpstreamError, isClaudeUnknownSessionError, } from "./parse.js"; import { resolveClaudeDesiredSkillNames } from "./skills.js"; @@ -625,16 +627,48 @@ export async function execute(ctx: AdapterExecutionContext): Promise) : null; const clearSessionForMaxTurns = isClaudeMaxTurnsResult(parsed); + const parsedIsError = asBoolean(parsed.is_error, false); + const failed = (proc.exitCode ?? 0) !== 0 || parsedIsError; + const errorMessage = failed + ? describeClaudeFailure(parsed) ?? `Claude exited with code ${proc.exitCode ?? -1}` + : null; + const transientUpstream = + failed && + !loginMeta.requiresLogin && + isClaudeTransientUpstreamError({ + parsed, + stdout: proc.stdout, + stderr: proc.stderr, + errorMessage, + }); + const transientRetryNotBefore = transientUpstream + ? extractClaudeRetryNotBefore({ + parsed, + stdout: proc.stdout, + stderr: proc.stderr, + errorMessage, + }) + : null; + const resolvedErrorCode = loginMeta.requiresLogin + ? "claude_auth_required" + : transientUpstream + ? "claude_transient_upstream" + : null; + const mergedResultJson: Record = { + ...parsed, + ...(transientUpstream ? { errorFamily: "transient_upstream" } : {}), + ...(transientRetryNotBefore ? { retryNotBefore: transientRetryNotBefore.toISOString() } : {}), + ...(transientRetryNotBefore ? { transientRetryNotBefore: transientRetryNotBefore.toISOString() } : {}), + }; return { exitCode: proc.exitCode, signal: proc.signal, timedOut: false, - errorMessage: - (proc.exitCode ?? 0) === 0 - ? null - : describeClaudeFailure(parsed) ?? `Claude exited with code ${proc.exitCode ?? -1}`, - errorCode: loginMeta.requiresLogin ? "claude_auth_required" : null, + errorMessage, + errorCode: resolvedErrorCode, + errorFamily: transientUpstream ? "transient_upstream" : null, + retryNotBefore: transientRetryNotBefore ? transientRetryNotBefore.toISOString() : null, errorMeta, usage, sessionId: resolvedSessionId, @@ -690,7 +756,7 @@ export async function execute(ctx: AdapterExecutionContext): Promise { + it("classifies the 'out of extra usage' subscription window failure as transient", () => { + expect( + isClaudeTransientUpstreamError({ + errorMessage: "You're out of extra usage · resets 4pm (America/Chicago)", + }), + ).toBe(true); + expect( + isClaudeTransientUpstreamError({ + parsed: { + is_error: true, + result: "You're out of extra usage. Resets at 4pm (America/Chicago).", + }, + }), + ).toBe(true); + }); + + it("classifies Anthropic API rate_limit_error and overloaded_error as transient", () => { + expect( + isClaudeTransientUpstreamError({ + parsed: { + is_error: true, + errors: [{ type: "rate_limit_error", message: "Rate limit reached for requests." }], + }, + }), + ).toBe(true); + expect( + isClaudeTransientUpstreamError({ + parsed: { + is_error: true, + errors: [{ type: "overloaded_error", message: "Overloaded" }], + }, + }), + ).toBe(true); + expect( + isClaudeTransientUpstreamError({ + stderr: "HTTP 429: Too Many Requests", + }), + ).toBe(true); + expect( + isClaudeTransientUpstreamError({ + stderr: "Bedrock ThrottlingException: slow down", + }), + ).toBe(true); + }); + + it("classifies the subscription 5-hour / weekly limit wording", () => { + expect( + isClaudeTransientUpstreamError({ + errorMessage: "Claude usage limit reached — weekly limit reached. Try again in 2 days.", + }), + ).toBe(true); + expect( + isClaudeTransientUpstreamError({ + errorMessage: "5-hour limit reached.", + }), + ).toBe(true); + }); + + it("does not classify login/auth failures as transient", () => { + expect( + isClaudeTransientUpstreamError({ + stderr: "Please log in. Run `claude login` first.", + }), + ).toBe(false); + }); + + it("does not classify max-turns or unknown-session as transient", () => { + expect( + isClaudeTransientUpstreamError({ + parsed: { subtype: "error_max_turns", result: "Maximum turns reached." }, + }), + ).toBe(false); + expect( + isClaudeTransientUpstreamError({ + parsed: { + result: "No conversation found with session id abc-123", + errors: [{ message: "No conversation found with session id abc-123" }], + }, + }), + ).toBe(false); + }); + + it("does not classify deterministic validation errors as transient", () => { + expect( + isClaudeTransientUpstreamError({ + errorMessage: "Invalid request_error: Unknown parameter 'foo'.", + }), + ).toBe(false); + }); +}); + +describe("extractClaudeRetryNotBefore", () => { + it("parses the 'resets 4pm' hint in its explicit timezone", () => { + const now = new Date("2026-04-22T15:15:00.000Z"); + const extracted = extractClaudeRetryNotBefore( + { errorMessage: "You're out of extra usage · resets 4pm (America/Chicago)" }, + now, + ); + expect(extracted?.toISOString()).toBe("2026-04-22T21:00:00.000Z"); + }); + + it("rolls forward past midnight when the reset time has already passed today", () => { + const now = new Date("2026-04-22T23:30:00.000Z"); + const extracted = extractClaudeRetryNotBefore( + { errorMessage: "Usage limit reached. Resets at 3:15 AM (UTC)." }, + now, + ); + expect(extracted?.toISOString()).toBe("2026-04-23T03:15:00.000Z"); + }); + + it("returns null when no reset hint is present", () => { + expect( + extractClaudeRetryNotBefore({ errorMessage: "Overloaded. Try again later." }, new Date()), + ).toBeNull(); + }); +}); diff --git a/packages/adapters/claude-local/src/server/parse.ts b/packages/adapters/claude-local/src/server/parse.ts index c41e16cdfb..4591aaad02 100644 --- a/packages/adapters/claude-local/src/server/parse.ts +++ b/packages/adapters/claude-local/src/server/parse.ts @@ -1,9 +1,19 @@ import type { UsageSummary } from "@paperclipai/adapter-utils"; -import { asString, asNumber, parseObject, parseJson } from "@paperclipai/adapter-utils/server-utils"; +import { + asString, + asNumber, + parseObject, + parseJson, +} from "@paperclipai/adapter-utils/server-utils"; const CLAUDE_AUTH_REQUIRED_RE = /(?:not\s+logged\s+in|please\s+log\s+in|please\s+run\s+`?claude\s+login`?|login\s+required|requires\s+login|unauthorized|authentication\s+required)/i; const URL_RE = /(https?:\/\/[^\s'"`<>()[\]{};,!?]+[^\s'"`<>()[\]{};,!.?:]+)/gi; +const CLAUDE_TRANSIENT_UPSTREAM_RE = + /(?:rate[-\s]?limit(?:ed)?|rate_limit_error|too\s+many\s+requests|\b429\b|overloaded(?:_error)?|server\s+overloaded|service\s+unavailable|\b503\b|\b529\b|high\s+demand|try\s+again\s+later|temporarily\s+unavailable|throttl(?:ed|ing)|throttlingexception|servicequotaexceededexception|out\s+of\s+extra\s+usage|extra\s+usage\b|claude\s+usage\s+limit\s+reached|5[-\s]?hour\s+limit\s+reached|weekly\s+limit\s+reached|usage\s+limit\s+reached|usage\s+cap\s+reached)/i; +const CLAUDE_EXTRA_USAGE_RESET_RE = + /(?:out\s+of\s+extra\s+usage|extra\s+usage|usage\s+limit\s+reached|usage\s+cap\s+reached|5[-\s]?hour\s+limit\s+reached|weekly\s+limit\s+reached|claude\s+usage\s+limit\s+reached)[\s\S]{0,80}?\bresets?\s+(?:at\s+)?([^\n()]+?)(?:\s*\(([^)]+)\))?(?:[.!]|\n|$)/i; + export function parseClaudeStreamJson(stdout: string) { let sessionId: string | null = null; let model = ""; @@ -177,3 +187,197 @@ export function isClaudeUnknownSessionError(parsed: Record): bo /no conversation found with session id|unknown session|session .* not found/i.test(msg), ); } + +function buildClaudeTransientHaystack(input: { + parsed?: Record | null; + stdout?: string | null; + stderr?: string | null; + errorMessage?: string | null; +}): string { + const parsed = input.parsed ?? null; + const resultText = parsed ? asString(parsed.result, "") : ""; + const parsedErrors = parsed ? extractClaudeErrorMessages(parsed) : []; + return [ + input.errorMessage ?? "", + resultText, + ...parsedErrors, + input.stdout ?? "", + input.stderr ?? "", + ] + .join("\n") + .split(/\r?\n/) + .map((line) => line.trim()) + .filter(Boolean) + .join("\n"); +} + +function readTimeZoneParts(date: Date, timeZone: string) { + const values = new Map( + new Intl.DateTimeFormat("en-US", { + timeZone, + hourCycle: "h23", + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + }).formatToParts(date).map((part) => [part.type, part.value]), + ); + return { + year: Number.parseInt(values.get("year") ?? "", 10), + month: Number.parseInt(values.get("month") ?? "", 10), + day: Number.parseInt(values.get("day") ?? "", 10), + hour: Number.parseInt(values.get("hour") ?? "", 10), + minute: Number.parseInt(values.get("minute") ?? "", 10), + }; +} + +function normalizeResetTimeZone(timeZoneHint: string | null | undefined): string | null { + const normalized = timeZoneHint?.trim(); + if (!normalized) return null; + if (/^(?:utc|gmt)$/i.test(normalized)) return "UTC"; + + try { + new Intl.DateTimeFormat("en-US", { timeZone: normalized }).format(new Date(0)); + return normalized; + } catch { + return null; + } +} + +function dateFromTimeZoneWallClock(input: { + year: number; + month: number; + day: number; + hour: number; + minute: number; + timeZone: string; +}): Date | null { + let candidate = new Date(Date.UTC(input.year, input.month - 1, input.day, input.hour, input.minute, 0, 0)); + const targetUtc = Date.UTC(input.year, input.month - 1, input.day, input.hour, input.minute, 0, 0); + + for (let attempt = 0; attempt < 4; attempt += 1) { + const actual = readTimeZoneParts(candidate, input.timeZone); + const actualUtc = Date.UTC(actual.year, actual.month - 1, actual.day, actual.hour, actual.minute, 0, 0); + const offsetMs = targetUtc - actualUtc; + if (offsetMs === 0) break; + candidate = new Date(candidate.getTime() + offsetMs); + } + + const verified = readTimeZoneParts(candidate, input.timeZone); + if ( + verified.year !== input.year || + verified.month !== input.month || + verified.day !== input.day || + verified.hour !== input.hour || + verified.minute !== input.minute + ) { + return null; + } + + return candidate; +} + +function nextClockTimeInTimeZone(input: { + now: Date; + hour: number; + minute: number; + timeZoneHint: string; +}): Date | null { + const timeZone = normalizeResetTimeZone(input.timeZoneHint); + if (!timeZone) return null; + + const nowParts = readTimeZoneParts(input.now, timeZone); + let retryAt = dateFromTimeZoneWallClock({ + year: nowParts.year, + month: nowParts.month, + day: nowParts.day, + hour: input.hour, + minute: input.minute, + timeZone, + }); + if (!retryAt) return null; + + if (retryAt.getTime() <= input.now.getTime()) { + const nextDay = new Date(Date.UTC(nowParts.year, nowParts.month - 1, nowParts.day + 1, 0, 0, 0, 0)); + retryAt = dateFromTimeZoneWallClock({ + year: nextDay.getUTCFullYear(), + month: nextDay.getUTCMonth() + 1, + day: nextDay.getUTCDate(), + hour: input.hour, + minute: input.minute, + timeZone, + }); + } + + return retryAt; +} + +function parseClaudeResetClockTime(clockText: string, now: Date, timeZoneHint?: string | null): Date | null { + const normalized = clockText.trim().replace(/\s+/g, " "); + const match = normalized.match(/^(\d{1,2})(?::(\d{2}))?\s*([ap])\.?\s*m\.?/i); + if (!match) return null; + + const hour12 = Number.parseInt(match[1] ?? "", 10); + const minute = Number.parseInt(match[2] ?? "0", 10); + if (!Number.isInteger(hour12) || hour12 < 1 || hour12 > 12) return null; + if (!Number.isInteger(minute) || minute < 0 || minute > 59) return null; + + let hour24 = hour12 % 12; + if ((match[3] ?? "").toLowerCase() === "p") hour24 += 12; + + if (timeZoneHint) { + const explicitRetryAt = nextClockTimeInTimeZone({ + now, + hour: hour24, + minute, + timeZoneHint, + }); + if (explicitRetryAt) return explicitRetryAt; + } + + const retryAt = new Date(now); + retryAt.setHours(hour24, minute, 0, 0); + if (retryAt.getTime() <= now.getTime()) { + retryAt.setDate(retryAt.getDate() + 1); + } + return retryAt; +} + +export function extractClaudeRetryNotBefore( + input: { + parsed?: Record | null; + stdout?: string | null; + stderr?: string | null; + errorMessage?: string | null; + }, + now = new Date(), +): Date | null { + const haystack = buildClaudeTransientHaystack(input); + const match = haystack.match(CLAUDE_EXTRA_USAGE_RESET_RE); + if (!match) return null; + return parseClaudeResetClockTime(match[1] ?? "", now, match[2]); +} + +export function isClaudeTransientUpstreamError(input: { + parsed?: Record | null; + stdout?: string | null; + stderr?: string | null; + errorMessage?: string | null; +}): boolean { + const parsed = input.parsed ?? null; + // Deterministic failures are handled by their own classifiers. + if (parsed && (isClaudeMaxTurnsResult(parsed) || isClaudeUnknownSessionError(parsed))) { + return false; + } + const loginMeta = detectClaudeLoginRequired({ + parsed, + stdout: input.stdout ?? "", + stderr: input.stderr ?? "", + }); + if (loginMeta.requiresLogin) return false; + + const haystack = buildClaudeTransientHaystack(input); + if (!haystack) return false; + return CLAUDE_TRANSIENT_UPSTREAM_RE.test(haystack); +} diff --git a/packages/adapters/codex-local/src/server/execute.ts b/packages/adapters/codex-local/src/server/execute.ts index 87d14a7249..2a6b253a16 100644 --- a/packages/adapters/codex-local/src/server/execute.ts +++ b/packages/adapters/codex-local/src/server/execute.ts @@ -34,6 +34,7 @@ import { } from "@paperclipai/adapter-utils/server-utils"; import { parseCodexJsonl, + extractCodexRetryNotBefore, isCodexTransientUpstreamError, isCodexUnknownSessionError, } from "./parse.js"; @@ -725,6 +726,21 @@ export async function execute(ctx: AdapterExecutionContext): Promise { ).toBe(true); }); + it("classifies usage-limit windows as transient and extracts the retry time", () => { + const errorMessage = "You've hit your usage limit for GPT-5.3-Codex-Spark. Switch to another model now, or try again at 11:31 PM."; + const now = new Date(2026, 3, 22, 22, 29, 2); + + expect(isCodexTransientUpstreamError({ errorMessage })).toBe(true); + expect(extractCodexRetryNotBefore({ errorMessage }, now)?.getTime()).toBe( + new Date(2026, 3, 22, 23, 31, 0, 0).getTime(), + ); + }); + + it("parses explicit timezone hints on usage-limit retry windows", () => { + const errorMessage = "You've hit your usage limit for GPT-5.3-Codex-Spark. Switch to another model now, or try again at 11:31 PM (America/Chicago)."; + const now = new Date("2026-04-23T03:29:02.000Z"); + + expect(extractCodexRetryNotBefore({ errorMessage }, now)?.toISOString()).toBe( + "2026-04-23T04:31:00.000Z", + ); + }); + it("does not classify deterministic compaction errors as transient", () => { expect( isCodexTransientUpstreamError({ diff --git a/packages/adapters/codex-local/src/server/parse.ts b/packages/adapters/codex-local/src/server/parse.ts index c3ecff03ad..679a3f8f4d 100644 --- a/packages/adapters/codex-local/src/server/parse.ts +++ b/packages/adapters/codex-local/src/server/parse.ts @@ -1,8 +1,15 @@ -import { asString, asNumber, parseObject, parseJson } from "@paperclipai/adapter-utils/server-utils"; +import { + asString, + asNumber, + parseObject, + parseJson, +} from "@paperclipai/adapter-utils/server-utils"; const CODEX_TRANSIENT_UPSTREAM_RE = /(?:we(?:'|’)re\s+currently\s+experiencing\s+high\s+demand|temporary\s+errors|rate[-\s]?limit(?:ed)?|too\s+many\s+requests|\b429\b|server\s+overloaded|service\s+unavailable|try\s+again\s+later)/i; const CODEX_REMOTE_COMPACTION_RE = /remote\s+compact\s+task/i; +const CODEX_USAGE_LIMIT_RE = + /you(?:'|’)ve hit your usage limit for .+\.\s+switch to another model now,\s+or try again at\s+([^.!\n]+)(?:[.!]|\n|$)/i; export function parseCodexJsonl(stdout: string) { let sessionId: string | null = null; @@ -76,12 +83,12 @@ export function isCodexUnknownSessionError(stdout: string, stderr: string): bool ); } -export function isCodexTransientUpstreamError(input: { +function buildCodexErrorHaystack(input: { stdout?: string | null; stderr?: string | null; errorMessage?: string | null; -}): boolean { - const haystack = [ +}): string { + return [ input.errorMessage ?? "", input.stdout ?? "", input.stderr ?? "", @@ -91,9 +98,164 @@ export function isCodexTransientUpstreamError(input: { .map((line) => line.trim()) .filter(Boolean) .join("\n"); +} +function readTimeZoneParts(date: Date, timeZone: string) { + const values = new Map( + new Intl.DateTimeFormat("en-US", { + timeZone, + hourCycle: "h23", + year: "numeric", + month: "2-digit", + day: "2-digit", + hour: "2-digit", + minute: "2-digit", + }).formatToParts(date).map((part) => [part.type, part.value]), + ); + return { + year: Number.parseInt(values.get("year") ?? "", 10), + month: Number.parseInt(values.get("month") ?? "", 10), + day: Number.parseInt(values.get("day") ?? "", 10), + hour: Number.parseInt(values.get("hour") ?? "", 10), + minute: Number.parseInt(values.get("minute") ?? "", 10), + }; +} + +function normalizeResetTimeZone(timeZoneHint: string | null | undefined): string | null { + const normalized = timeZoneHint?.trim(); + if (!normalized) return null; + if (/^(?:utc|gmt)$/i.test(normalized)) return "UTC"; + + try { + new Intl.DateTimeFormat("en-US", { timeZone: normalized }).format(new Date(0)); + return normalized; + } catch { + return null; + } +} + +function dateFromTimeZoneWallClock(input: { + year: number; + month: number; + day: number; + hour: number; + minute: number; + timeZone: string; +}): Date | null { + let candidate = new Date(Date.UTC(input.year, input.month - 1, input.day, input.hour, input.minute, 0, 0)); + const targetUtc = Date.UTC(input.year, input.month - 1, input.day, input.hour, input.minute, 0, 0); + + for (let attempt = 0; attempt < 4; attempt += 1) { + const actual = readTimeZoneParts(candidate, input.timeZone); + const actualUtc = Date.UTC(actual.year, actual.month - 1, actual.day, actual.hour, actual.minute, 0, 0); + const offsetMs = targetUtc - actualUtc; + if (offsetMs === 0) break; + candidate = new Date(candidate.getTime() + offsetMs); + } + + const verified = readTimeZoneParts(candidate, input.timeZone); + if ( + verified.year !== input.year || + verified.month !== input.month || + verified.day !== input.day || + verified.hour !== input.hour || + verified.minute !== input.minute + ) { + return null; + } + + return candidate; +} + +function nextClockTimeInTimeZone(input: { + now: Date; + hour: number; + minute: number; + timeZoneHint: string; +}): Date | null { + const timeZone = normalizeResetTimeZone(input.timeZoneHint); + if (!timeZone) return null; + + const nowParts = readTimeZoneParts(input.now, timeZone); + let retryAt = dateFromTimeZoneWallClock({ + year: nowParts.year, + month: nowParts.month, + day: nowParts.day, + hour: input.hour, + minute: input.minute, + timeZone, + }); + if (!retryAt) return null; + + if (retryAt.getTime() <= input.now.getTime()) { + const nextDay = new Date(Date.UTC(nowParts.year, nowParts.month - 1, nowParts.day + 1, 0, 0, 0, 0)); + retryAt = dateFromTimeZoneWallClock({ + year: nextDay.getUTCFullYear(), + month: nextDay.getUTCMonth() + 1, + day: nextDay.getUTCDate(), + hour: input.hour, + minute: input.minute, + timeZone, + }); + } + + return retryAt; +} + +function parseLocalClockTime(clockText: string, now: Date): Date | null { + const normalized = clockText.trim(); + const match = normalized.match(/^(\d{1,2})(?::(\d{2}))?\s*([ap])\.?\s*m\.?(?:\s*\(([^)]+)\)|\s+([A-Z]{2,5}))?$/i); + if (!match) return null; + + const hour12 = Number.parseInt(match[1] ?? "", 10); + const minute = Number.parseInt(match[2] ?? "0", 10); + if (!Number.isInteger(hour12) || hour12 < 1 || hour12 > 12) return null; + if (!Number.isInteger(minute) || minute < 0 || minute > 59) return null; + + let hour24 = hour12 % 12; + if ((match[3] ?? "").toLowerCase() === "p") hour24 += 12; + + const timeZoneHint = match[4] ?? match[5]; + if (timeZoneHint) { + const explicitRetryAt = nextClockTimeInTimeZone({ + now, + hour: hour24, + minute, + timeZoneHint, + }); + if (explicitRetryAt) return explicitRetryAt; + } + + const retryAt = new Date(now); + retryAt.setHours(hour24, minute, 0, 0); + if (retryAt.getTime() <= now.getTime()) { + retryAt.setDate(retryAt.getDate() + 1); + } + return retryAt; +} + +export function extractCodexRetryNotBefore(input: { + stdout?: string | null; + stderr?: string | null; + errorMessage?: string | null; +}, now = new Date()): Date | null { + const haystack = buildCodexErrorHaystack(input); + const usageLimitMatch = haystack.match(CODEX_USAGE_LIMIT_RE); + if (!usageLimitMatch) return null; + return parseLocalClockTime(usageLimitMatch[1] ?? "", now); +} + +export function isCodexTransientUpstreamError(input: { + stdout?: string | null; + stderr?: string | null; + errorMessage?: string | null; +}): boolean { + const haystack = buildCodexErrorHaystack(input); + + if (extractCodexRetryNotBefore(input) != null) return true; if (!CODEX_TRANSIENT_UPSTREAM_RE.test(haystack)) return false; // Keep automatic retries scoped to the observed remote-compaction/high-demand - // failure shape; broader 429s may be caused by user or account limits. + // failure shape, plus explicit usage-limit windows that tell us when retrying + // becomes safe again. return CODEX_REMOTE_COMPACTION_RE.test(haystack) || /high\s+demand|temporary\s+errors/i.test(haystack); } diff --git a/server/src/__tests__/adapter-model-refresh-routes.test.ts b/server/src/__tests__/adapter-model-refresh-routes.test.ts new file mode 100644 index 0000000000..5be7a5a0fe --- /dev/null +++ b/server/src/__tests__/adapter-model-refresh-routes.test.ts @@ -0,0 +1,185 @@ +import express from "express"; +import request from "supertest"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { ServerAdapterModule } from "../adapters/index.js"; + +const mockAccessService = vi.hoisted(() => ({ + canUser: vi.fn(), + hasPermission: vi.fn(), + ensureMembership: vi.fn(), + setPrincipalPermission: vi.fn(), +})); + +const mockCompanySkillService = vi.hoisted(() => ({ + listRuntimeSkillEntries: vi.fn(), + resolveRequestedSkillKeys: vi.fn(), +})); + +const mockSecretService = vi.hoisted(() => ({ + normalizeAdapterConfigForPersistence: vi.fn(async (_companyId: string, config: Record) => config), + resolveAdapterConfigForRuntime: vi.fn(async (_companyId: string, config: Record) => ({ config })), +})); + +const mockAgentInstructionsService = vi.hoisted(() => ({ + materializeManagedBundle: vi.fn(), + getBundle: vi.fn(), + readFile: vi.fn(), + updateBundle: vi.fn(), + writeFile: vi.fn(), + deleteFile: vi.fn(), + exportFiles: vi.fn(), + ensureManagedBundle: vi.fn(), +})); + +const mockBudgetService = vi.hoisted(() => ({ + upsertPolicy: vi.fn(), +})); + +const mockHeartbeatService = vi.hoisted(() => ({ + cancelActiveForAgent: vi.fn(), +})); + +const mockIssueApprovalService = vi.hoisted(() => ({ + linkManyForApproval: vi.fn(), +})); + +const mockApprovalService = vi.hoisted(() => ({ + create: vi.fn(), + getById: vi.fn(), +})); + +const mockInstanceSettingsService = vi.hoisted(() => ({ + getGeneral: vi.fn(async () => ({ censorUsernameInLogs: false })), +})); + +const mockLogActivity = vi.hoisted(() => vi.fn()); + +function registerModuleMocks() { + vi.doMock("../services/index.js", () => ({ + agentService: () => ({}), + agentInstructionsService: () => mockAgentInstructionsService, + accessService: () => mockAccessService, + approvalService: () => mockApprovalService, + companySkillService: () => mockCompanySkillService, + budgetService: () => mockBudgetService, + heartbeatService: () => mockHeartbeatService, + issueApprovalService: () => mockIssueApprovalService, + issueService: () => ({}), + logActivity: mockLogActivity, + secretService: () => mockSecretService, + syncInstructionsBundleConfigFromFilePath: vi.fn((_agent, config) => config), + workspaceOperationService: () => ({}), + })); + + vi.doMock("../services/instance-settings.js", () => ({ + instanceSettingsService: () => mockInstanceSettingsService, + })); +} + +const refreshableAdapterType = "refreshable_adapter_route_test"; + +async function createApp() { + const [{ agentRoutes }, { errorHandler }] = await Promise.all([ + vi.importActual("../routes/agents.js"), + vi.importActual("../middleware/index.js"), + ]); + const app = express(); + app.use(express.json()); + app.use((req, _res, next) => { + (req as any).actor = { + type: "board", + userId: "local-board", + companyIds: ["company-1"], + source: "local_implicit", + isInstanceAdmin: false, + }; + next(); + }); + app.use("/api", agentRoutes({} as any)); + app.use(errorHandler); + return app; +} + +async function requestApp( + app: express.Express, + buildRequest: (baseUrl: string) => request.Test, +) { + const { createServer } = await vi.importActual("node:http"); + const server = createServer(app); + try { + await new Promise((resolve) => { + server.listen(0, "127.0.0.1", resolve); + }); + const address = server.address(); + if (!address || typeof address === "string") { + throw new Error("Expected HTTP server to listen on a TCP port"); + } + return await buildRequest(`http://127.0.0.1:${address.port}`); + } finally { + if (server.listening) { + await new Promise((resolve, reject) => { + server.close((error) => { + if (error) reject(error); + else resolve(); + }); + }); + } + } +} + +async function unregisterTestAdapter(type: string) { + const { unregisterServerAdapter } = await import("../adapters/index.js"); + unregisterServerAdapter(type); +} + +describe("adapter model refresh route", () => { + beforeEach(async () => { + vi.resetModules(); + vi.doUnmock("../routes/agents.js"); + vi.doUnmock("../routes/authz.js"); + vi.doUnmock("../middleware/index.js"); + registerModuleMocks(); + vi.clearAllMocks(); + mockCompanySkillService.listRuntimeSkillEntries.mockResolvedValue([]); + mockCompanySkillService.resolveRequestedSkillKeys.mockResolvedValue([]); + mockAccessService.canUser.mockResolvedValue(true); + mockAccessService.hasPermission.mockResolvedValue(true); + mockAccessService.ensureMembership.mockResolvedValue(undefined); + mockAccessService.setPrincipalPermission.mockResolvedValue(undefined); + mockLogActivity.mockResolvedValue(undefined); + await unregisterTestAdapter(refreshableAdapterType); + }); + + afterEach(async () => { + await unregisterTestAdapter(refreshableAdapterType); + }); + + it("uses refreshModels when refresh=1 is requested", async () => { + const listModels = vi.fn(async () => [{ id: "stale-model", label: "stale-model" }]); + const refreshModels = vi.fn(async () => [{ id: "fresh-model", label: "fresh-model" }]); + const { registerServerAdapter } = await import("../adapters/index.js"); + const adapter: ServerAdapterModule = { + type: refreshableAdapterType, + execute: async () => ({ exitCode: 0, signal: null, timedOut: false }), + testEnvironment: async () => ({ + adapterType: refreshableAdapterType, + status: "pass", + checks: [], + testedAt: new Date(0).toISOString(), + }), + listModels, + refreshModels, + }; + registerServerAdapter(adapter); + + const app = await createApp(); + const res = await requestApp(app, (baseUrl) => + request(baseUrl).get(`/api/companies/company-1/adapters/${refreshableAdapterType}/models?refresh=1`), + ); + + expect(res.status, JSON.stringify(res.body)).toBe(200); + expect(res.body).toEqual([{ id: "fresh-model", label: "fresh-model" }]); + expect(refreshModels).toHaveBeenCalledTimes(1); + expect(listModels).not.toHaveBeenCalled(); + }); +}); diff --git a/server/src/__tests__/adapter-models.test.ts b/server/src/__tests__/adapter-models.test.ts index a6c5eb3584..2be936d98a 100644 --- a/server/src/__tests__/adapter-models.test.ts +++ b/server/src/__tests__/adapter-models.test.ts @@ -3,7 +3,7 @@ import { models as codexFallbackModels } from "@paperclipai/adapter-codex-local" import { models as cursorFallbackModels } from "@paperclipai/adapter-cursor-local"; import { models as opencodeFallbackModels } from "@paperclipai/adapter-opencode-local"; import { resetOpenCodeModelsCacheForTests } from "@paperclipai/adapter-opencode-local/server"; -import { listAdapterModels } from "../adapters/index.js"; +import { listAdapterModels, refreshAdapterModels } from "../adapters/index.js"; import { resetCodexModelsCacheForTests } from "../adapters/codex-models.js"; import { resetCursorModelsCacheForTests, setCursorModelsRunnerForTests } from "../adapters/cursor-models.js"; @@ -52,6 +52,30 @@ describe("adapter model listing", () => { expect(first.some((model) => model.id === "codex-mini-latest")).toBe(true); }); + it("refreshes cached codex models on demand", async () => { + process.env.OPENAI_API_KEY = "sk-test"; + const fetchSpy = vi.spyOn(globalThis, "fetch") + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + data: [{ id: "gpt-5" }], + }), + } as Response) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + data: [{ id: "gpt-5.5" }], + }), + } as Response); + + const initial = await listAdapterModels("codex_local"); + const refreshed = await refreshAdapterModels("codex_local"); + + expect(fetchSpy).toHaveBeenCalledTimes(2); + expect(initial.some((model) => model.id === "gpt-5")).toBe(true); + expect(refreshed.some((model) => model.id === "gpt-5.5")).toBe(true); + }); + it("falls back to static codex models when OpenAI model discovery fails", async () => { process.env.OPENAI_API_KEY = "sk-test"; vi.spyOn(globalThis, "fetch").mockResolvedValue({ diff --git a/server/src/__tests__/claude-local-execute.test.ts b/server/src/__tests__/claude-local-execute.test.ts index 233615cfe3..96f5854b44 100644 --- a/server/src/__tests__/claude-local-execute.test.ts +++ b/server/src/__tests__/claude-local-execute.test.ts @@ -1,9 +1,23 @@ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; import { execute } from "@paperclipai/adapter-claude-local/server"; +async function writeFailingClaudeCommand( + commandPath: string, + options: { resultEvent: Record; exitCode?: number }, +): Promise { + const payload = JSON.stringify(options.resultEvent); + const exit = options.exitCode ?? 1; + const script = `#!/usr/bin/env node +console.log(${JSON.stringify(payload)}); +process.exit(${exit}); +`; + await fs.writeFile(commandPath, script, "utf8"); + await fs.chmod(commandPath, 0o755); +} + async function writeFakeClaudeCommand(commandPath: string): Promise { const script = `#!/usr/bin/env node const fs = require("node:fs"); @@ -398,8 +412,10 @@ describe("claude execute", () => { const previousHome = process.env.HOME; const previousPaperclipHome = process.env.PAPERCLIP_HOME; + const previousPaperclipInstanceId = process.env.PAPERCLIP_INSTANCE_ID; process.env.HOME = root; process.env.PAPERCLIP_HOME = paperclipHome; + process.env.PAPERCLIP_INSTANCE_ID = "default"; try { const first = await execute({ @@ -534,6 +550,8 @@ describe("claude execute", () => { else process.env.HOME = previousHome; if (previousPaperclipHome === undefined) delete process.env.PAPERCLIP_HOME; else process.env.PAPERCLIP_HOME = previousPaperclipHome; + if (previousPaperclipInstanceId === undefined) delete process.env.PAPERCLIP_INSTANCE_ID; + else process.env.PAPERCLIP_INSTANCE_ID = previousPaperclipInstanceId; await fs.rm(root, { recursive: true, force: true }); } }); @@ -553,8 +571,10 @@ describe("claude execute", () => { const previousHome = process.env.HOME; const previousPaperclipHome = process.env.PAPERCLIP_HOME; + const previousPaperclipInstanceId = process.env.PAPERCLIP_INSTANCE_ID; process.env.HOME = root; process.env.PAPERCLIP_HOME = paperclipHome; + process.env.PAPERCLIP_INSTANCE_ID = "default"; try { const first = await execute({ @@ -635,7 +655,184 @@ describe("claude execute", () => { else process.env.HOME = previousHome; if (previousPaperclipHome === undefined) delete process.env.PAPERCLIP_HOME; else process.env.PAPERCLIP_HOME = previousPaperclipHome; + if (previousPaperclipInstanceId === undefined) delete process.env.PAPERCLIP_INSTANCE_ID; + else process.env.PAPERCLIP_INSTANCE_ID = previousPaperclipInstanceId; await fs.rm(root, { recursive: true, force: true }); } }, 15_000); + + it("classifies Claude 'out of extra usage' failures as transient upstream errors", async () => { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-claude-execute-transient-")); + const workspace = path.join(root, "workspace"); + const commandPath = path.join(root, "claude"); + await fs.mkdir(workspace, { recursive: true }); + await writeFailingClaudeCommand(commandPath, { + resultEvent: { + type: "result", + subtype: "error", + session_id: "claude-session-extra", + is_error: true, + result: "You're out of extra usage · resets 4pm (America/Chicago)", + errors: [{ type: "rate_limit_error", message: "You're out of extra usage" }], + }, + }); + + const previousHome = process.env.HOME; + process.env.HOME = root; + vi.useFakeTimers(); + vi.setSystemTime(new Date(2026, 3, 22, 10, 15, 0)); + + try { + const result = await execute({ + runId: "run-claude-transient", + agent: { + id: "agent-1", + companyId: "company-1", + name: "Claude Coder", + adapterType: "claude_local", + adapterConfig: {}, + }, + runtime: { + sessionId: null, + sessionParams: null, + sessionDisplayId: null, + taskKey: null, + }, + config: { + command: commandPath, + cwd: workspace, + promptTemplate: "Follow the paperclip heartbeat.", + }, + context: {}, + authToken: "run-jwt-token", + onLog: async () => {}, + }); + + expect(result.exitCode).toBe(1); + expect(result.errorCode).toBe("claude_transient_upstream"); + expect(result.errorFamily).toBe("transient_upstream"); + expect(result.retryNotBefore).toBe("2026-04-22T21:00:00.000Z"); + expect(result.resultJson?.retryNotBefore).toBe("2026-04-22T21:00:00.000Z"); + expect(result.errorMessage ?? "").toContain("extra usage"); + expect(new Date(String(result.resultJson?.transientRetryNotBefore)).getTime()).toBe( + new Date("2026-04-22T21:00:00.000Z").getTime(), + ); + } finally { + vi.useRealTimers(); + if (previousHome === undefined) delete process.env.HOME; + else process.env.HOME = previousHome; + await fs.rm(root, { recursive: true, force: true }); + } + }); + + it("classifies rate-limit / overloaded failures without reset metadata as transient", async () => { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-claude-execute-rate-limit-")); + const workspace = path.join(root, "workspace"); + const commandPath = path.join(root, "claude"); + await fs.mkdir(workspace, { recursive: true }); + await writeFailingClaudeCommand(commandPath, { + resultEvent: { + type: "result", + subtype: "error", + session_id: "claude-session-overloaded", + is_error: true, + result: "Overloaded", + errors: [{ type: "overloaded_error", message: "Overloaded_error: API is overloaded." }], + }, + }); + + const previousHome = process.env.HOME; + process.env.HOME = root; + + try { + const result = await execute({ + runId: "run-claude-overloaded", + agent: { + id: "agent-1", + companyId: "company-1", + name: "Claude Coder", + adapterType: "claude_local", + adapterConfig: {}, + }, + runtime: { + sessionId: null, + sessionParams: null, + sessionDisplayId: null, + taskKey: null, + }, + config: { + command: commandPath, + cwd: workspace, + promptTemplate: "Follow the paperclip heartbeat.", + }, + context: {}, + authToken: "run-jwt-token", + onLog: async () => {}, + }); + + expect(result.exitCode).toBe(1); + expect(result.errorCode).toBe("claude_transient_upstream"); + expect(result.errorFamily).toBe("transient_upstream"); + expect(result.retryNotBefore ?? null).toBeNull(); + expect(result.resultJson?.retryNotBefore ?? null).toBeNull(); + expect(result.resultJson?.transientRetryNotBefore ?? null).toBeNull(); + } finally { + if (previousHome === undefined) delete process.env.HOME; + else process.env.HOME = previousHome; + await fs.rm(root, { recursive: true, force: true }); + } + }); + + it("does not reclassify deterministic Claude failures (auth, max turns) as transient", async () => { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-claude-execute-max-turns-")); + const workspace = path.join(root, "workspace"); + const commandPath = path.join(root, "claude"); + await fs.mkdir(workspace, { recursive: true }); + await writeFailingClaudeCommand(commandPath, { + resultEvent: { + type: "result", + subtype: "error_max_turns", + session_id: "claude-session-max-turns", + is_error: true, + result: "Maximum turns reached.", + }, + }); + + const previousHome = process.env.HOME; + process.env.HOME = root; + + try { + const result = await execute({ + runId: "run-claude-max-turns", + agent: { + id: "agent-1", + companyId: "company-1", + name: "Claude Coder", + adapterType: "claude_local", + adapterConfig: {}, + }, + runtime: { + sessionId: null, + sessionParams: null, + sessionDisplayId: null, + taskKey: null, + }, + config: { + command: commandPath, + cwd: workspace, + promptTemplate: "Follow the paperclip heartbeat.", + }, + context: {}, + authToken: "run-jwt-token", + onLog: async () => {}, + }); + + expect(result.exitCode).toBe(1); + expect(result.errorCode).not.toBe("claude_transient_upstream"); + } finally { + if (previousHome === undefined) delete process.env.HOME; + else process.env.HOME = previousHome; + await fs.rm(root, { recursive: true, force: true }); + } + }); }); diff --git a/server/src/__tests__/cleanup-removal-service.test.ts b/server/src/__tests__/cleanup-removal-service.test.ts index f74f3798f1..25dc11d679 100644 --- a/server/src/__tests__/cleanup-removal-service.test.ts +++ b/server/src/__tests__/cleanup-removal-service.test.ts @@ -7,8 +7,11 @@ import { companies, companySkills, createDb, + documents, + documentRevisions, heartbeatRuns, issueComments, + issueDocuments, issueExecutionDecisions, issueReadStates, issues, @@ -43,6 +46,8 @@ describeEmbeddedPostgres("cleanup removal services", () => { await db.delete(issueReadStates); await db.delete(issueComments); await db.delete(issueExecutionDecisions); + await db.delete(documentRevisions); + await db.delete(documents); await db.delete(companySkills); await db.delete(heartbeatRuns); await db.delete(issues); @@ -148,6 +153,8 @@ describeEmbeddedPostgres("cleanup removal services", () => { it("removes issue read states and activity rows before deleting the company", async () => { const { companyId, issueId, runId } = await seedFixture(); + const documentId = randomUUID(); + const revisionId = randomUUID(); await db.insert(issueReadStates).values({ id: randomUUID(), @@ -177,11 +184,47 @@ describeEmbeddedPostgres("cleanup removal services", () => { details: {}, }); + await db.insert(documents).values({ + id: documentId, + companyId, + title: "Run summary", + latestBody: "body", + latestRevisionId: revisionId, + latestRevisionNumber: 1, + createdByAgentId: null, + createdByUserId: "user-1", + updatedByAgentId: null, + updatedByUserId: "user-1", + }); + + await db.insert(issueDocuments).values({ + id: randomUUID(), + companyId, + issueId, + documentId, + key: "summary", + }); + + await db.insert(documentRevisions).values({ + id: revisionId, + companyId, + documentId, + revisionNumber: 1, + title: "Run summary", + format: "markdown", + body: "body", + createdByAgentId: null, + createdByUserId: "user-1", + createdByRunId: runId, + }); + const removed = await companyService(db).remove(companyId); expect(removed?.id).toBe(companyId); await expect(db.select().from(companies).where(eq(companies.id, companyId))).resolves.toHaveLength(0); await expect(db.select().from(issues).where(eq(issues.id, issueId))).resolves.toHaveLength(0); + await expect(db.select().from(documents).where(eq(documents.id, documentId))).resolves.toHaveLength(0); + await expect(db.select().from(documentRevisions).where(eq(documentRevisions.id, revisionId))).resolves.toHaveLength(0); await expect(db.select().from(issueReadStates).where(eq(issueReadStates.companyId, companyId))).resolves.toHaveLength(0); await expect(db.select().from(activityLog).where(eq(activityLog.companyId, companyId))).resolves.toHaveLength(0); }); diff --git a/server/src/__tests__/codex-local-execute.test.ts b/server/src/__tests__/codex-local-execute.test.ts index 54b790ad12..5a5bc9f493 100644 --- a/server/src/__tests__/codex-local-execute.test.ts +++ b/server/src/__tests__/codex-local-execute.test.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import fs from "node:fs/promises"; import os from "node:os"; import path from "node:path"; @@ -419,6 +419,7 @@ describe("codex execute", () => { expect(result.exitCode).toBe(1); expect(result.errorCode).toBe("codex_transient_upstream"); + expect(result.errorFamily).toBe("transient_upstream"); expect(result.errorMessage).toContain("high demand"); } finally { if (previousHome === undefined) delete process.env.HOME; @@ -427,6 +428,68 @@ describe("codex execute", () => { } }); + it("persists retry-not-before metadata for codex usage-limit failures", async () => { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-codex-execute-usage-limit-")); + const workspace = path.join(root, "workspace"); + const commandPath = path.join(root, "codex"); + await fs.mkdir(workspace, { recursive: true }); + await writeFailingCodexCommand( + commandPath, + "You've hit your usage limit for GPT-5.3-Codex-Spark. Switch to another model now, or try again at 11:31 PM.", + ); + + const previousHome = process.env.HOME; + process.env.HOME = root; + vi.useFakeTimers(); + vi.setSystemTime(new Date(2026, 3, 22, 22, 29, 0)); + + try { + const result = await execute({ + runId: "run-usage-limit", + agent: { + id: "agent-1", + companyId: "company-1", + name: "Codex Coder", + adapterType: "codex_local", + adapterConfig: {}, + }, + runtime: { + sessionId: "codex-session-usage-limit", + sessionParams: { + sessionId: "codex-session-usage-limit", + cwd: workspace, + }, + sessionDisplayId: "codex-session-usage-limit", + taskKey: null, + }, + config: { + command: commandPath, + cwd: workspace, + model: "gpt-5.3-codex-spark", + promptTemplate: "Follow the paperclip heartbeat.", + }, + context: {}, + authToken: "run-jwt-token", + onLog: async () => {}, + }); + + expect(result.exitCode).toBe(1); + expect(result.errorCode).toBe("codex_transient_upstream"); + expect(result.errorFamily).toBe("transient_upstream"); + const expectedRetryNotBefore = new Date(2026, 3, 22, 23, 31, 0, 0).toISOString(); + expect(result.retryNotBefore).toBe(expectedRetryNotBefore); + expect(result.resultJson?.retryNotBefore).toBe(expectedRetryNotBefore); + expect(new Date(String(result.resultJson?.transientRetryNotBefore)).getTime()).toBe( + new Date(2026, 3, 22, 23, 31, 0, 0).getTime(), + ); + } finally { + vi.useRealTimers(); + if (previousHome === undefined) delete process.env.HOME; + else process.env.HOME = previousHome; + await fs.rm(root, { recursive: true, force: true }); + } + }); + it("uses safer invocation settings and a fresh-session handoff for codex transient fallback retries", async () => { const root = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-codex-execute-fallback-")); const workspace = path.join(root, "workspace"); diff --git a/server/src/__tests__/heartbeat-process-recovery.test.ts b/server/src/__tests__/heartbeat-process-recovery.test.ts index fe0a24c6ab..d54239851a 100644 --- a/server/src/__tests__/heartbeat-process-recovery.test.ts +++ b/server/src/__tests__/heartbeat-process-recovery.test.ts @@ -765,11 +765,15 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { exitCode: 1, signal: null, timedOut: false, - errorCode: "codex_transient_upstream", + errorCode: "adapter_failed", + errorFamily: "transient_upstream", errorMessage: "Error running remote compact task: We're currently experiencing high demand, which may cause temporary errors.", provider: "openai", model: "gpt-5.4", + resultJson: { + errorFamily: "transient_upstream", + }, }); const { agentId, runId, issueId } = await seedQueuedIssueRunFixture(); @@ -790,7 +794,8 @@ describeEmbeddedPostgres("heartbeat orphaned process recovery", () => { const failedRun = runs?.find((row) => row.id === runId); const retryRun = runs?.find((row) => row.id !== runId); expect(failedRun?.status).toBe("failed"); - expect(failedRun?.errorCode).toBe("codex_transient_upstream"); + expect(failedRun?.errorCode).toBe("adapter_failed"); + expect((failedRun?.resultJson as Record | null)?.errorFamily).toBe("transient_upstream"); expect(retryRun?.status).toBe("scheduled_retry"); expect(retryRun?.scheduledRetryReason).toBe("transient_failure"); expect((retryRun?.contextSnapshot as Record | null)?.codexTransientFallbackMode).toBe("same_session"); diff --git a/server/src/__tests__/heartbeat-retry-scheduling.test.ts b/server/src/__tests__/heartbeat-retry-scheduling.test.ts index 09ab2ec7a7..162ea539ed 100644 --- a/server/src/__tests__/heartbeat-retry-scheduling.test.ts +++ b/server/src/__tests__/heartbeat-retry-scheduling.test.ts @@ -56,8 +56,15 @@ describeEmbeddedPostgres("heartbeat bounded retry scheduling", () => { agentId: string; now: Date; errorCode: string; + errorFamily?: "transient_upstream" | null; + retryNotBefore?: string | null; scheduledRetryAttempt?: number; + resultJson?: Record | null; + adapterType?: "codex_local" | "claude_local"; + agentName?: string; }) { + const adapterType = input.adapterType ?? "codex_local"; + const agentName = input.agentName ?? (adapterType === "claude_local" ? "ClaudeCoder" : "CodexCoder"); await db.insert(companies).values({ id: input.companyId, name: "Paperclip", @@ -68,10 +75,10 @@ describeEmbeddedPostgres("heartbeat bounded retry scheduling", () => { await db.insert(agents).values({ id: input.agentId, companyId: input.companyId, - name: "CodexCoder", + name: agentName, role: "engineer", status: "active", - adapterType: "codex_local", + adapterType, adapterConfig: {}, runtimeConfig: { heartbeat: { @@ -93,6 +100,15 @@ describeEmbeddedPostgres("heartbeat bounded retry scheduling", () => { finishedAt: input.now, scheduledRetryAttempt: input.scheduledRetryAttempt ?? 0, scheduledRetryReason: input.scheduledRetryAttempt ? "transient_failure" : null, + resultJson: input.resultJson ?? { + ...(input.errorFamily ? { errorFamily: input.errorFamily } : {}), + ...(input.retryNotBefore + ? { + retryNotBefore: input.retryNotBefore, + transientRetryNotBefore: input.retryNotBefore, + } + : {}), + }, contextSnapshot: { issueId: randomUUID(), wakeReason: "issue_assigned", @@ -299,7 +315,8 @@ describeEmbeddedPostgres("heartbeat bounded retry scheduling", () => { companyId, agentId, now, - errorCode: "codex_transient_upstream", + errorCode: "adapter_failed", + errorFamily: "transient_upstream", scheduledRetryAttempt: index, }); @@ -335,4 +352,110 @@ describeEmbeddedPostgres("heartbeat bounded retry scheduling", () => { await db.delete(companies); } }); + + it("honors codex retry-not-before timestamps when they exceed the default bounded backoff", async () => { + const companyId = randomUUID(); + const agentId = randomUUID(); + const runId = randomUUID(); + const now = new Date(2026, 3, 22, 22, 29, 0); + const retryNotBefore = new Date(2026, 3, 22, 23, 31, 0); + + await seedRetryFixture({ + runId, + companyId, + agentId, + now, + errorCode: "adapter_failed", + errorFamily: "transient_upstream", + retryNotBefore: retryNotBefore.toISOString(), + }); + + const scheduled = await heartbeat.scheduleBoundedRetry(runId, { + now, + random: () => 0.5, + }); + + expect(scheduled.outcome).toBe("scheduled"); + if (scheduled.outcome !== "scheduled") return; + expect(scheduled.dueAt.getTime()).toBe(retryNotBefore.getTime()); + + const retryRun = await db + .select({ + contextSnapshot: heartbeatRuns.contextSnapshot, + scheduledRetryAt: heartbeatRuns.scheduledRetryAt, + wakeupRequestId: heartbeatRuns.wakeupRequestId, + }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, scheduled.run.id)) + .then((rows) => rows[0] ?? null); + + expect(retryRun?.scheduledRetryAt?.getTime()).toBe(retryNotBefore.getTime()); + expect((retryRun?.contextSnapshot as Record | null)?.transientRetryNotBefore).toBe( + retryNotBefore.toISOString(), + ); + + const wakeupRequest = await db + .select({ payload: agentWakeupRequests.payload }) + .from(agentWakeupRequests) + .where(eq(agentWakeupRequests.id, retryRun?.wakeupRequestId ?? "")) + .then((rows) => rows[0] ?? null); + + expect((wakeupRequest?.payload as Record | null)?.transientRetryNotBefore).toBe( + retryNotBefore.toISOString(), + ); + }); + + it("schedules bounded retries for claude_transient_upstream and honors its retry-not-before hint", async () => { + const companyId = randomUUID(); + const agentId = randomUUID(); + const runId = randomUUID(); + const now = new Date(2026, 3, 22, 10, 0, 0); + const retryNotBefore = new Date(2026, 3, 22, 16, 0, 0); + + await seedRetryFixture({ + runId, + companyId, + agentId, + now, + errorCode: "adapter_failed", + errorFamily: "transient_upstream", + adapterType: "claude_local", + retryNotBefore: retryNotBefore.toISOString(), + }); + + const scheduled = await heartbeat.scheduleBoundedRetry(runId, { + now, + random: () => 0.5, + }); + + expect(scheduled.outcome).toBe("scheduled"); + if (scheduled.outcome !== "scheduled") return; + expect(scheduled.dueAt.getTime()).toBe(retryNotBefore.getTime()); + + const retryRun = await db + .select({ + contextSnapshot: heartbeatRuns.contextSnapshot, + scheduledRetryAt: heartbeatRuns.scheduledRetryAt, + wakeupRequestId: heartbeatRuns.wakeupRequestId, + }) + .from(heartbeatRuns) + .where(eq(heartbeatRuns.id, scheduled.run.id)) + .then((rows) => rows[0] ?? null); + + expect(retryRun?.scheduledRetryAt?.getTime()).toBe(retryNotBefore.getTime()); + const contextSnapshot = (retryRun?.contextSnapshot as Record | null) ?? {}; + expect(contextSnapshot.transientRetryNotBefore).toBe(retryNotBefore.toISOString()); + // Claude does not participate in the Codex fallback-mode ladder. + expect(contextSnapshot.codexTransientFallbackMode ?? null).toBeNull(); + + const wakeupRequest = await db + .select({ payload: agentWakeupRequests.payload }) + .from(agentWakeupRequests) + .where(eq(agentWakeupRequests.id, retryRun?.wakeupRequestId ?? "")) + .then((rows) => rows[0] ?? null); + + expect((wakeupRequest?.payload as Record | null)?.transientRetryNotBefore).toBe( + retryNotBefore.toISOString(), + ); + }); }); diff --git a/server/src/adapters/codex-models.ts b/server/src/adapters/codex-models.ts index 418bb6c76b..872779144f 100644 --- a/server/src/adapters/codex-models.ts +++ b/server/src/adapters/codex-models.ts @@ -70,14 +70,15 @@ async function fetchOpenAiModels(apiKey: string): Promise { } } -export async function listCodexModels(): Promise { +async function loadCodexModels(options?: { forceRefresh?: boolean }): Promise { + const forceRefresh = options?.forceRefresh === true; const apiKey = resolveOpenAiApiKey(); const fallback = dedupeModels(codexFallbackModels); if (!apiKey) return fallback; const now = Date.now(); const keyFingerprint = fingerprint(apiKey); - if (cached && cached.keyFingerprint === keyFingerprint && cached.expiresAt > now) { + if (!forceRefresh && cached && cached.keyFingerprint === keyFingerprint && cached.expiresAt > now) { return cached.models; } @@ -99,6 +100,14 @@ export async function listCodexModels(): Promise { return fallback; } +export async function listCodexModels(): Promise { + return loadCodexModels(); +} + +export async function refreshCodexModels(): Promise { + return loadCodexModels({ forceRefresh: true }); +} + export function resetCodexModelsCacheForTests() { cached = null; } diff --git a/server/src/adapters/index.ts b/server/src/adapters/index.ts index 49530dc772..9b27b32563 100644 --- a/server/src/adapters/index.ts +++ b/server/src/adapters/index.ts @@ -1,6 +1,7 @@ export { getServerAdapter, listAdapterModels, + refreshAdapterModels, listServerAdapters, findServerAdapter, findActiveServerAdapter, diff --git a/server/src/adapters/registry.ts b/server/src/adapters/registry.ts index 8268f0629a..20be46e11c 100644 --- a/server/src/adapters/registry.ts +++ b/server/src/adapters/registry.ts @@ -55,7 +55,7 @@ import { agentConfigurationDoc as openclawGatewayAgentConfigurationDoc, models as openclawGatewayModels, } from "@paperclipai/adapter-openclaw-gateway"; -import { listCodexModels } from "./codex-models.js"; +import { listCodexModels, refreshCodexModels } from "./codex-models.js"; import { listCursorModels } from "./cursor-models.js"; import { execute as piExecute, @@ -145,6 +145,7 @@ const codexLocalAdapter: ServerAdapterModule = { sessionManagement: getAdapterSessionManagement("codex_local") ?? undefined, models: codexModels, listModels: listCodexModels, + refreshModels: refreshCodexModels, supportsLocalAgentJwt: true, supportsInstructionsBundle: true, instructionsPathKey: "instructionsFilePath", @@ -459,6 +460,20 @@ export async function listAdapterModels(type: string): Promise<{ id: string; lab return adapter.models ?? []; } +export async function refreshAdapterModels(type: string): Promise<{ id: string; label: string }[]> { + const adapter = findActiveServerAdapter(type); + if (!adapter) return []; + if (adapter.refreshModels) { + const refreshed = await adapter.refreshModels(); + if (refreshed.length > 0) return refreshed; + } + if (adapter.listModels) { + const discovered = await adapter.listModels(); + if (discovered.length > 0) return discovered; + } + return adapter.models ?? []; +} + export function listServerAdapters(): ServerAdapterModule[] { return Array.from(adaptersByType.values()); } diff --git a/server/src/routes/agents.ts b/server/src/routes/agents.ts index 758182c81f..1076654094 100644 --- a/server/src/routes/agents.ts +++ b/server/src/routes/agents.ts @@ -59,6 +59,7 @@ import { findActiveServerAdapter, findServerAdapter, listAdapterModels, + refreshAdapterModels, requireServerAdapter, } from "../adapters/index.js"; import { redactEventPayload } from "../redaction.js"; @@ -875,7 +876,12 @@ export function agentRoutes(db: Db) { const companyId = req.params.companyId as string; assertCompanyAccess(req, companyId); const type = assertKnownAdapterType(req.params.type as string); - const models = await listAdapterModels(type); + const refresh = typeof req.query.refresh === "string" + ? ["1", "true", "yes"].includes(req.query.refresh.toLowerCase()) + : false; + const models = refresh + ? await refreshAdapterModels(type) + : await listAdapterModels(type); res.json(models); }); diff --git a/server/src/services/companies.ts b/server/src/services/companies.ts index 89678572ca..9220081ef0 100644 --- a/server/src/services/companies.ts +++ b/server/src/services/companies.ts @@ -27,6 +27,7 @@ import { principalPermissionGrants, companyMemberships, companySkills, + documents, } from "@paperclipai/db"; import { notFound, unprocessable } from "../errors.js"; @@ -279,6 +280,7 @@ export function companyService(db: Db) { await tx.delete(companyMemberships).where(eq(companyMemberships.companyId, id)); await tx.delete(companySkills).where(eq(companySkills.companyId, id)); await tx.delete(issueReadStates).where(eq(issueReadStates.companyId, id)); + await tx.delete(documents).where(eq(documents.companyId, id)); await tx.delete(issues).where(eq(issues.companyId, id)); await tx.delete(companyLogos).where(eq(companyLogos.companyId, id)); await tx.delete(assets).where(eq(assets.companyId, id)); diff --git a/server/src/services/heartbeat.ts b/server/src/services/heartbeat.ts index 8dbc3b74b8..7fa908e4da 100644 --- a/server/src/services/heartbeat.ts +++ b/server/src/services/heartbeat.ts @@ -179,6 +179,61 @@ function resolveCodexTransientFallbackMode(attempt: number): CodexTransientFallb if (attempt === 3) return "fresh_session"; return "fresh_session_safer_invocation"; } + +function readHeartbeatRunErrorFamily( + run: Pick, +) { + const resultJson = parseObject(run.resultJson); + const persistedFamily = readNonEmptyString(resultJson.errorFamily); + if (persistedFamily) return persistedFamily; + + if (run.errorCode === "codex_transient_upstream" || run.errorCode === "claude_transient_upstream") { + return "transient_upstream"; + } + return null; +} + +function readTransientRetryNotBeforeFromRun(run: Pick) { + const resultJson = parseObject(run.resultJson); + const value = resultJson.retryNotBefore ?? resultJson.transientRetryNotBefore; + if (!(typeof value === "string" || typeof value === "number" || value instanceof Date)) { + return null; + } + const parsed = new Date(value); + return Number.isNaN(parsed.getTime()) ? null : parsed; +} + +function readTransientRecoveryContractFromRun( + run: Pick, +) { + return readHeartbeatRunErrorFamily(run) === "transient_upstream" + ? { + errorFamily: "transient_upstream" as const, + retryNotBefore: readTransientRetryNotBeforeFromRun(run), + } + : null; +} + +function mergeAdapterRecoveryMetadata(input: { + resultJson: Record | null | undefined; + errorFamily?: string | null; + retryNotBefore?: string | null; +}) { + const errorFamily = readNonEmptyString(input.errorFamily); + const retryNotBefore = readNonEmptyString(input.retryNotBefore); + if (!input.resultJson && !errorFamily && !retryNotBefore) return input.resultJson ?? null; + + return { + ...(input.resultJson ?? {}), + ...(errorFamily ? { errorFamily } : {}), + ...(retryNotBefore + ? { + retryNotBefore, + transientRetryNotBefore: retryNotBefore, + } + : {}), + }; +} const RUNNING_ISSUE_WAKE_REASONS_REQUIRING_FOLLOWUP = new Set(["approval_approved"]); const SESSIONED_LOCAL_ADAPTERS = new Set([ "claude_local", @@ -3267,13 +3322,18 @@ export function heartbeatService(db: Db) { const retryReason = opts?.retryReason ?? BOUNDED_TRANSIENT_HEARTBEAT_RETRY_REASON; const wakeReason = opts?.wakeReason ?? BOUNDED_TRANSIENT_HEARTBEAT_RETRY_WAKE_REASON; const nextAttempt = (run.scheduledRetryAttempt ?? 0) + 1; - const schedule = computeBoundedTransientHeartbeatRetrySchedule(nextAttempt, now, opts?.random); + const baseSchedule = computeBoundedTransientHeartbeatRetrySchedule(nextAttempt, now, opts?.random); + const transientRecovery = + retryReason === BOUNDED_TRANSIENT_HEARTBEAT_RETRY_REASON + ? readTransientRecoveryContractFromRun(run) + : null; const codexTransientFallbackMode = - agent.adapterType === "codex_local" && retryReason === BOUNDED_TRANSIENT_HEARTBEAT_RETRY_REASON && run.errorCode === "codex_transient_upstream" + agent.adapterType === "codex_local" && transientRecovery ? resolveCodexTransientFallbackMode(nextAttempt) : null; + const transientRetryNotBefore = transientRecovery?.retryNotBefore ?? null; - if (!schedule) { + if (!baseSchedule) { await appendRunEvent(run, await nextRunEventSeq(run.id), { eventType: "lifecycle", stream: "system", @@ -3291,6 +3351,14 @@ export function heartbeatService(db: Db) { maxAttempts: BOUNDED_TRANSIENT_HEARTBEAT_RETRY_MAX_ATTEMPTS, }; } + const schedule = + transientRetryNotBefore && transientRetryNotBefore.getTime() > baseSchedule.dueAt.getTime() + ? { + ...baseSchedule, + dueAt: transientRetryNotBefore, + delayMs: Math.max(0, transientRetryNotBefore.getTime() - now.getTime()), + } + : baseSchedule; const contextSnapshot = parseObject(run.contextSnapshot); const issueId = readNonEmptyString(contextSnapshot.issueId); @@ -3301,8 +3369,10 @@ export function heartbeatService(db: Db) { retryOfRunId: run.id, wakeReason, retryReason, + ...(transientRecovery ? { errorFamily: transientRecovery.errorFamily } : {}), scheduledRetryAttempt: schedule.attempt, scheduledRetryAt: schedule.dueAt.toISOString(), + ...(transientRetryNotBefore ? { transientRetryNotBefore: transientRetryNotBefore.toISOString() } : {}), ...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}), }; @@ -3319,8 +3389,10 @@ export function heartbeatService(db: Db) { ...(issueId ? { issueId } : {}), retryOfRunId: run.id, retryReason, + ...(transientRecovery ? { errorFamily: transientRecovery.errorFamily } : {}), scheduledRetryAttempt: schedule.attempt, scheduledRetryAt: schedule.dueAt.toISOString(), + ...(transientRetryNotBefore ? { transientRetryNotBefore: transientRetryNotBefore.toISOString() } : {}), ...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}), }, status: "queued", @@ -3383,10 +3455,12 @@ export function heartbeatService(db: Db) { payload: { retryRunId: retryRun.id, retryReason, + ...(transientRecovery ? { errorFamily: transientRecovery.errorFamily } : {}), scheduledRetryAttempt: schedule.attempt, scheduledRetryAt: schedule.dueAt.toISOString(), baseDelayMs: schedule.baseDelayMs, delayMs: schedule.delayMs, + ...(transientRetryNotBefore ? { transientRetryNotBefore: transientRetryNotBefore.toISOString() } : {}), ...(codexTransientFallbackMode ? { codexTransientFallbackMode } : {}), }, }); @@ -5872,7 +5946,11 @@ export function heartbeatService(db: Db) { const persistedResultJson = mergeHeartbeatRunResultJson( mergeRunStopMetadataForAgent(agent, outcome, { - resultJson: adapterResult.resultJson ?? null, + resultJson: mergeAdapterRecoveryMetadata({ + resultJson: adapterResult.resultJson ?? null, + errorFamily: adapterResult.errorFamily ?? null, + retryNotBefore: adapterResult.retryNotBefore ?? null, + }), errorCode: runErrorCode, errorMessage: runErrorMessage, }), @@ -5933,7 +6011,7 @@ export function heartbeatService(db: Db) { ); } } - if (outcome === "failed" && livenessRun.errorCode === "codex_transient_upstream") { + if (outcome === "failed" && readTransientRecoveryContractFromRun(livenessRun)) { await scheduleBoundedRetryForRun(livenessRun, agent); } await finalizeIssueCommentPolicy(livenessRun, agent); @@ -6267,8 +6345,16 @@ export function heartbeatService(db: Db) { }; } const deferredCommentIds = extractWakeCommentIds(deferredContextSeed); + const deferredWakeReason = readNonEmptyString(deferredContextSeed.wakeReason); + // Only human/comment-reopen interactions should revive completed issues; + // system follow-ups such as retry or cleanup wakes must not reopen closed work. const shouldReopenDeferredCommentWake = - deferredCommentIds.length > 0 && (issue.status === "done" || issue.status === "cancelled"); + deferredCommentIds.length > 0 && + (issue.status === "done" || issue.status === "cancelled") && + ( + deferred.requestedByActorType === "user" || + deferredWakeReason === "issue_reopened_via_comment" + ); let reopenedActivity: LogActivityInput | null = null; if (shouldReopenDeferredCommentWake) { diff --git a/ui/src/api/agents.ts b/ui/src/api/agents.ts index 9f7c8ba87f..0091be1d27 100644 --- a/ui/src/api/agents.ts +++ b/ui/src/api/agents.ts @@ -164,9 +164,9 @@ export const agentsApi = { api.get(agentPath(id, companyId, "/task-sessions")), resetSession: (id: string, taskKey?: string | null, companyId?: string) => api.post(agentPath(id, companyId, "/runtime-state/reset-session"), { taskKey: taskKey ?? null }), - adapterModels: (companyId: string, type: string) => + adapterModels: (companyId: string, type: string, options?: { refresh?: boolean }) => api.get( - `/companies/${encodeURIComponent(companyId)}/adapters/${encodeURIComponent(type)}/models`, + `/companies/${encodeURIComponent(companyId)}/adapters/${encodeURIComponent(type)}/models${options?.refresh ? "?refresh=1" : ""}`, ), detectModel: (companyId: string, type: string) => api.get( diff --git a/ui/src/components/AgentConfigForm.tsx b/ui/src/components/AgentConfigForm.tsx index c786182063..a63a3b4ad5 100644 --- a/ui/src/components/AgentConfigForm.tsx +++ b/ui/src/components/AgentConfigForm.tsx @@ -302,16 +302,19 @@ export function AgentConfigForm(props: AgentConfigFormProps) { ); // Fetch adapter models for the effective adapter type + const modelQueryKey = selectedCompanyId + ? queryKeys.agents.adapterModels(selectedCompanyId, adapterType) + : ["agents", "none", "adapter-models", adapterType]; const { data: fetchedModels, error: fetchedModelsError, } = useQuery({ - queryKey: selectedCompanyId - ? queryKeys.agents.adapterModels(selectedCompanyId, adapterType) - : ["agents", "none", "adapter-models", adapterType], + queryKey: modelQueryKey, queryFn: () => agentsApi.adapterModels(selectedCompanyId!, adapterType), enabled: Boolean(selectedCompanyId), }); + const [refreshModelsError, setRefreshModelsError] = useState(null); + const [refreshingModels, setRefreshingModels] = useState(false); const models = fetchedModels ?? externalModels ?? []; const adapterCommandField = adapterType === "hermes_local" ? "hermesCommand" : "command"; @@ -401,6 +404,20 @@ export function AgentConfigForm(props: AgentConfigFormProps) { ? val!.model : eff("adapterConfig", "model", String(config.model ?? "")); + async function handleRefreshModels() { + if (!selectedCompanyId) return; + setRefreshingModels(true); + setRefreshModelsError(null); + try { + const refreshed = await agentsApi.adapterModels(selectedCompanyId, adapterType, { refresh: true }); + queryClient.setQueryData(modelQueryKey, refreshed); + } catch (error) { + setRefreshModelsError(error instanceof Error ? error.message : "Failed to refresh adapter models."); + } finally { + setRefreshingModels(false); + } + } + const thinkingEffortKey = adapterType === "codex_local" ? "modelReasoningEffort" @@ -792,14 +809,17 @@ export function AgentConfigForm(props: AgentConfigFormProps) { const result = await refetchDetectedModel(); return result.data?.model ?? null; }} + onRefreshModels={adapterType === "codex_local" ? handleRefreshModels : undefined} + refreshingModels={refreshingModels} detectModelLabel="Detect model" emptyDetectHint="No model detected. Select or enter one manually." /> - {fetchedModelsError && ( + {(refreshModelsError || fetchedModelsError) && (

- {fetchedModelsError instanceof Error - ? fetchedModelsError.message - : "Failed to load adapter models."} + {refreshModelsError + ?? (fetchedModelsError instanceof Error + ? fetchedModelsError.message + : "Failed to load adapter models.")}

)} @@ -1134,6 +1154,8 @@ function ModelDropdown({ detectedModel, detectedModelCandidates, onDetectModel, + onRefreshModels, + refreshingModels, detectModelLabel, emptyDetectHint, }: { @@ -1149,6 +1171,8 @@ function ModelDropdown({ detectedModel?: string | null; detectedModelCandidates?: string[]; onDetectModel?: () => Promise; + onRefreshModels?: () => Promise; + refreshingModels?: boolean; detectModelLabel?: string; emptyDetectHint?: string; }) { @@ -1280,6 +1304,24 @@ function ModelDropdown({ {detectingModel ? "Detecting..." : detectedModel ? (detectModelLabel?.replace(/^Detect\b/, "Re-detect") ?? "Re-detect from config") : (detectModelLabel ?? "Detect from config")} )} + {onRefreshModels && !modelSearch.trim() && ( + + )} {value && (!models.some((m) => m.id === value) || promotedModelIds.has(value)) && (