[codex] Improve agent runtime recovery and governance (#4086)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - The heartbeat runtime, agent import path, and agent configuration
defaults determine whether work is dispatched safely and predictably.
> - Several accumulated fixes all touched agent execution recovery, wake
routing, import behavior, and runtime concurrency defaults.
> - Those changes need to land together so the heartbeat service and
agent creation defaults stay internally consistent.
> - This pull request groups the runtime/governance changes from the
split branch into one standalone branch.
> - The benefit is safer recovery for stranded runs, bounded high-volume
reads, imported-agent approval correctness, skill-template support, and
a clearer default concurrency policy.

## What Changed

- Fixed stranded continuation recovery so successful automatic retries
are requeued instead of incorrectly blocking the issue.
- Bounded high-volume issue/log reads across issue, heartbeat, agent,
project, and workspace paths.
- Fixed imported-agent approval and instruction-path permission
handling.
- Quarantined seeded worktree execution state during worktree
provisioning.
- Queued approval follow-up wakes and hardened SQL_ASCII heartbeat
output handling.
- Added reusable agent instruction templates for hiring flows.
- Set the default max concurrent agent runs to five and updated related
UI/tests/docs.

## Verification

- `pnpm install --frozen-lockfile`
- `pnpm exec vitest run server/src/__tests__/company-portability.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts
server/src/__tests__/heartbeat-comment-wake-batching.test.ts
server/src/__tests__/heartbeat-list.test.ts
server/src/__tests__/issues-service.test.ts
server/src/__tests__/agent-permissions-routes.test.ts
packages/adapter-utils/src/server-utils.test.ts
ui/src/lib/new-agent-runtime-config.test.ts`
- Split integration check: merged this branch first, followed by the
other [PAP-1614](/PAP/issues/PAP-1614) branches, with no merge
conflicts.
- Confirmed this branch does not include `pnpm-lock.yaml`.

## Risks

- Medium risk: touches heartbeat recovery, queueing, and issue list
bounds in central runtime paths.
- Imported-agent and concurrency default behavior changes may affect
existing automation that assumes one-at-a-time default runs.
- No database migrations are included.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5.4 tool-enabled coding model, agentic
code-editing/runtime with local shell and GitHub CLI access; exact
context window and reasoning mode are not exposed by the Paperclip
harness.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta
2026-04-20 06:19:48 -05:00
committed by GitHub
parent 057fee4836
commit 16b2b84d84
38 changed files with 1569 additions and 240 deletions

View File

@@ -3,12 +3,15 @@ import os from "node:os";
import path from "node:path";
import { execFileSync } from "node:child_process";
import { randomUUID } from "node:crypto";
import { eq } from "drizzle-orm";
import { afterEach, describe, expect, it, vi } from "vitest";
import {
agents,
authUsers,
companies,
createDb,
issueComments,
issues,
projects,
routines,
routineTriggers,
@@ -17,6 +20,7 @@ import {
copyGitHooksToWorktreeGitDir,
copySeededSecretsKey,
pauseSeededScheduledRoutines,
quarantineSeededWorktreeExecutionState,
readSourceAttachmentBody,
rebindWorkspaceCwd,
resolveSourceConfigPath,
@@ -282,6 +286,138 @@ describe("worktree helpers", () => {
expect(full.nullifyColumns).toEqual({});
});
itEmbeddedPostgres("quarantines copied live execution state in seeded worktree databases", async () => {
const tempDb = await startEmbeddedPostgresTestDatabase("paperclip-worktree-quarantine-");
const db = createDb(tempDb.connectionString);
const companyId = randomUUID();
const agentId = randomUUID();
const idleAgentId = randomUUID();
const inProgressIssueId = randomUUID();
const todoIssueId = randomUUID();
const reviewIssueId = randomUUID();
const userIssueId = randomUUID();
try {
await db.insert(companies).values({
id: companyId,
name: "Paperclip",
issuePrefix: "WTQ",
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values([
{
id: agentId,
companyId,
name: "CodexCoder",
role: "engineer",
status: "running",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {
heartbeat: { enabled: true, intervalSec: 60 },
wakeOnDemand: true,
},
permissions: {},
},
{
id: idleAgentId,
companyId,
name: "Reviewer",
role: "reviewer",
status: "idle",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: { heartbeat: { enabled: false, intervalSec: 300 } },
permissions: {},
},
]);
await db.insert(issues).values([
{
id: inProgressIssueId,
companyId,
title: "Copied in-flight issue",
status: "in_progress",
priority: "medium",
assigneeAgentId: agentId,
issueNumber: 1,
identifier: "WTQ-1",
executionAgentNameKey: "codexcoder",
executionLockedAt: new Date("2026-04-18T00:00:00.000Z"),
},
{
id: todoIssueId,
companyId,
title: "Copied assigned todo issue",
status: "todo",
priority: "medium",
assigneeAgentId: agentId,
issueNumber: 2,
identifier: "WTQ-2",
},
{
id: reviewIssueId,
companyId,
title: "Copied assigned review issue",
status: "in_review",
priority: "medium",
assigneeAgentId: idleAgentId,
issueNumber: 3,
identifier: "WTQ-3",
},
{
id: userIssueId,
companyId,
title: "Copied user issue",
status: "todo",
priority: "medium",
assigneeUserId: "user-1",
issueNumber: 4,
identifier: "WTQ-4",
},
]);
await expect(quarantineSeededWorktreeExecutionState(tempDb.connectionString)).resolves.toEqual({
disabledTimerHeartbeats: 1,
resetRunningAgents: 1,
quarantinedInProgressIssues: 1,
unassignedTodoIssues: 1,
unassignedReviewIssues: 1,
});
const [quarantinedAgent] = await db.select().from(agents).where(eq(agents.id, agentId));
expect(quarantinedAgent?.status).toBe("idle");
expect(quarantinedAgent?.runtimeConfig).toMatchObject({
heartbeat: { enabled: false, intervalSec: 60 },
wakeOnDemand: true,
});
const [inProgressIssue] = await db.select().from(issues).where(eq(issues.id, inProgressIssueId));
expect(inProgressIssue?.status).toBe("blocked");
expect(inProgressIssue?.assigneeAgentId).toBeNull();
expect(inProgressIssue?.executionAgentNameKey).toBeNull();
expect(inProgressIssue?.executionLockedAt).toBeNull();
const [todoIssue] = await db.select().from(issues).where(eq(issues.id, todoIssueId));
expect(todoIssue?.status).toBe("todo");
expect(todoIssue?.assigneeAgentId).toBeNull();
const [reviewIssue] = await db.select().from(issues).where(eq(issues.id, reviewIssueId));
expect(reviewIssue?.status).toBe("in_review");
expect(reviewIssue?.assigneeAgentId).toBeNull();
const [userIssue] = await db.select().from(issues).where(eq(issues.id, userIssueId));
expect(userIssue?.status).toBe("todo");
expect(userIssue?.assigneeUserId).toBe("user-1");
const comments = await db.select().from(issueComments).where(eq(issueComments.issueId, inProgressIssueId));
expect(comments).toHaveLength(1);
expect(comments[0]?.body).toContain("Quarantined during worktree seed");
} finally {
await db.$client?.end?.({ timeout: 5 }).catch(() => undefined);
await tempDb.cleanup();
}
}, 20_000);
it("copies the source local_encrypted secrets key into the seeded worktree instance", () => {
const tempRoot = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-worktree-secrets-"));
const originalInlineMasterKey = process.env.PAPERCLIP_SECRETS_MASTER_KEY;

View File

@@ -93,6 +93,7 @@ type WorktreeInitOptions = {
dbPort?: number;
seed?: boolean;
seedMode?: string;
preserveLiveWork?: boolean;
force?: boolean;
};
@@ -126,6 +127,7 @@ type WorktreeReseedOptions = {
fromDataDir?: string;
fromInstance?: string;
seedMode?: string;
preserveLiveWork?: boolean;
yes?: boolean;
allowLiveTarget?: boolean;
};
@@ -137,6 +139,7 @@ type WorktreeRepairOptions = {
fromDataDir?: string;
fromInstance?: string;
seedMode?: string;
preserveLiveWork?: boolean;
noSeed?: boolean;
allowLiveTarget?: boolean;
};
@@ -179,6 +182,8 @@ type CopiedGitHooksResult = {
type SeedWorktreeDatabaseResult = {
backupSummary: string;
pausedScheduledRoutines: number;
executionQuarantine: SeededWorktreeExecutionQuarantineSummary;
reboundWorkspaces: Array<{
name: string;
fromCwd: string;
@@ -186,6 +191,14 @@ type SeedWorktreeDatabaseResult = {
}>;
};
export type SeededWorktreeExecutionQuarantineSummary = {
disabledTimerHeartbeats: number;
resetRunningAgents: number;
quarantinedInProgressIssues: number;
unassignedTodoIssues: number;
unassignedReviewIssues: number;
};
function nonEmpty(value: string | null | undefined): string | null {
return typeof value === "string" && value.trim().length > 0 ? value.trim() : null;
}
@@ -198,6 +211,18 @@ function isCurrentSourceConfigPath(sourceConfigPath: string): boolean {
return path.resolve(currentConfigPath) === path.resolve(sourceConfigPath);
}
function formatSeededWorktreeExecutionQuarantineSummary(
summary: SeededWorktreeExecutionQuarantineSummary,
): string {
return [
`disabled timer heartbeats: ${summary.disabledTimerHeartbeats}`,
`reset running agents: ${summary.resetRunningAgents}`,
`quarantined in-progress issues: ${summary.quarantinedInProgressIssues}`,
`unassigned todo issues: ${summary.unassignedTodoIssues}`,
`unassigned review issues: ${summary.unassignedReviewIssues}`,
].join(", ");
}
const WORKTREE_NAME_PREFIX = "paperclip-";
function resolveWorktreeMakeName(name: string): string {
@@ -1119,6 +1144,133 @@ export async function pauseSeededScheduledRoutines(connectionString: string): Pr
}
}
const EMPTY_SEEDED_WORKTREE_EXECUTION_QUARANTINE_SUMMARY: SeededWorktreeExecutionQuarantineSummary = {
disabledTimerHeartbeats: 0,
resetRunningAgents: 0,
quarantinedInProgressIssues: 0,
unassignedTodoIssues: 0,
unassignedReviewIssues: 0,
};
function isRecord(value: unknown): value is Record<string, unknown> {
return Boolean(value) && typeof value === "object" && !Array.isArray(value);
}
function isEnabledValue(value: unknown): boolean {
return value === true || value === "true" || value === 1 || value === "1";
}
function normalizeWorktreeRuntimeConfig(runtimeConfig: unknown): {
runtimeConfig: Record<string, unknown>;
disabledTimerHeartbeat: boolean;
changed: boolean;
} {
const nextRuntimeConfig = isRecord(runtimeConfig) ? { ...runtimeConfig } : {};
const heartbeat = isRecord(nextRuntimeConfig.heartbeat) ? { ...nextRuntimeConfig.heartbeat } : null;
if (!heartbeat) {
return { runtimeConfig: nextRuntimeConfig, disabledTimerHeartbeat: false, changed: false };
}
const disabledTimerHeartbeat = isEnabledValue(heartbeat.enabled);
if (heartbeat.enabled !== false) {
heartbeat.enabled = false;
nextRuntimeConfig.heartbeat = heartbeat;
return { runtimeConfig: nextRuntimeConfig, disabledTimerHeartbeat, changed: true };
}
return { runtimeConfig: nextRuntimeConfig, disabledTimerHeartbeat: false, changed: false };
}
export async function quarantineSeededWorktreeExecutionState(
connectionString: string,
): Promise<SeededWorktreeExecutionQuarantineSummary> {
const db = createDb(connectionString);
const summary = { ...EMPTY_SEEDED_WORKTREE_EXECUTION_QUARANTINE_SUMMARY };
try {
await db.transaction(async (tx) => {
const seededAgents = await tx
.select({
id: agents.id,
status: agents.status,
runtimeConfig: agents.runtimeConfig,
})
.from(agents);
for (const agent of seededAgents) {
const normalized = normalizeWorktreeRuntimeConfig(agent.runtimeConfig);
const nextStatus = agent.status === "running" ? "idle" : agent.status;
if (normalized.disabledTimerHeartbeat) {
summary.disabledTimerHeartbeats += 1;
}
if (agent.status === "running") {
summary.resetRunningAgents += 1;
}
if (normalized.changed || nextStatus !== agent.status) {
await tx
.update(agents)
.set({
runtimeConfig: normalized.runtimeConfig,
status: nextStatus,
updatedAt: new Date(),
})
.where(eq(agents.id, agent.id));
}
}
const affectedIssues = await tx
.select({
id: issues.id,
companyId: issues.companyId,
status: issues.status,
})
.from(issues)
.where(
and(
sql`${issues.assigneeAgentId} is not null`,
sql`${issues.assigneeUserId} is null`,
inArray(issues.status, ["todo", "in_progress", "in_review"]),
),
);
for (const issue of affectedIssues) {
const nextStatus = issue.status === "in_progress" ? "blocked" : issue.status;
await tx
.update(issues)
.set({
status: nextStatus,
assigneeAgentId: null,
checkoutRunId: null,
executionRunId: null,
executionAgentNameKey: null,
executionLockedAt: null,
executionWorkspaceId: null,
updatedAt: new Date(),
})
.where(eq(issues.id, issue.id));
if (issue.status === "in_progress") {
summary.quarantinedInProgressIssues += 1;
await tx.insert(issueComments).values({
companyId: issue.companyId,
issueId: issue.id,
body:
"Quarantined during worktree seed so copied in-flight work does not auto-run in this isolated instance. " +
"Reassign or unblock here only if you intentionally want the worktree instance to own this task.",
});
} else if (issue.status === "todo") {
summary.unassignedTodoIssues += 1;
} else if (issue.status === "in_review") {
summary.unassignedReviewIssues += 1;
}
}
});
return summary;
} finally {
await db.$client?.end?.({ timeout: 5 }).catch(() => undefined);
}
}
async function seedWorktreeDatabase(input: {
sourceConfigPath: string;
sourceConfig: PaperclipConfig;
@@ -1126,6 +1278,7 @@ async function seedWorktreeDatabase(input: {
targetPaths: WorktreeLocalPaths;
instanceId: string;
seedMode: WorktreeSeedMode;
preserveLiveWork?: boolean;
}): Promise<SeedWorktreeDatabaseResult> {
const seedPlan = resolveWorktreeSeedPlan(input.seedMode);
const sourceEnvFile = resolvePaperclipEnvFile(input.sourceConfigPath);
@@ -1176,7 +1329,10 @@ async function seedWorktreeDatabase(input: {
backupFile: backup.backupFile,
});
await applyPendingMigrations(targetConnectionString);
await pauseSeededScheduledRoutines(targetConnectionString);
const executionQuarantine = input.preserveLiveWork
? { ...EMPTY_SEEDED_WORKTREE_EXECUTION_QUARANTINE_SUMMARY }
: await quarantineSeededWorktreeExecutionState(targetConnectionString);
const pausedScheduledRoutines = await pauseSeededScheduledRoutines(targetConnectionString);
const reboundWorkspaces = await rebindSeededProjectWorkspaces({
targetConnectionString,
currentCwd: input.targetPaths.cwd,
@@ -1184,6 +1340,8 @@ async function seedWorktreeDatabase(input: {
return {
backupSummary: formatDatabaseBackupResult(backup),
pausedScheduledRoutines,
executionQuarantine,
reboundWorkspaces,
};
} finally {
@@ -1262,6 +1420,8 @@ async function runWorktreeInit(opts: WorktreeInitOptions): Promise<void> {
const copiedGitHooks = copyGitHooksToWorktreeGitDir(cwd);
let seedSummary: string | null = null;
let seedExecutionQuarantineSummary: SeededWorktreeExecutionQuarantineSummary | null = null;
let pausedScheduledRoutineCount: number | null = null;
let reboundWorkspaceSummary: SeedWorktreeDatabaseResult["reboundWorkspaces"] = [];
if (opts.seed !== false) {
if (!sourceConfig) {
@@ -1279,8 +1439,11 @@ async function runWorktreeInit(opts: WorktreeInitOptions): Promise<void> {
targetPaths: paths,
instanceId,
seedMode,
preserveLiveWork: opts.preserveLiveWork,
});
seedSummary = seeded.backupSummary;
seedExecutionQuarantineSummary = seeded.executionQuarantine;
pausedScheduledRoutineCount = seeded.pausedScheduledRoutines;
reboundWorkspaceSummary = seeded.reboundWorkspaces;
spinner.stop(`Seeded isolated worktree database (${seedMode}).`);
} catch (error) {
@@ -1303,6 +1466,16 @@ async function runWorktreeInit(opts: WorktreeInitOptions): Promise<void> {
if (seedSummary) {
p.log.message(pc.dim(`Seed mode: ${seedMode}`));
p.log.message(pc.dim(`Seed snapshot: ${seedSummary}`));
if (opts.preserveLiveWork) {
p.log.warning("Preserved copied live work; this worktree instance may auto-run source-instance assignments.");
} else if (seedExecutionQuarantineSummary) {
p.log.message(
pc.dim(`Seed execution quarantine: ${formatSeededWorktreeExecutionQuarantineSummary(seedExecutionQuarantineSummary)}`),
);
}
if (pausedScheduledRoutineCount != null) {
p.log.message(pc.dim(`Paused scheduled routines: ${pausedScheduledRoutineCount}`));
}
for (const rebound of reboundWorkspaceSummary) {
p.log.message(
pc.dim(`Rebound workspace ${rebound.name}: ${rebound.fromCwd} -> ${rebound.toCwd}`),
@@ -2947,11 +3120,20 @@ async function runWorktreeReseed(opts: WorktreeReseedOptions): Promise<void> {
targetPaths,
instanceId: targetPaths.instanceId,
seedMode,
preserveLiveWork: opts.preserveLiveWork,
});
spinner.stop(`Reseeded ${targetEndpoint.label} (${seedMode}).`);
p.log.message(pc.dim(`Source: ${source.configPath}`));
p.log.message(pc.dim(`Target: ${targetEndpoint.configPath}`));
p.log.message(pc.dim(`Seed snapshot: ${seeded.backupSummary}`));
if (opts.preserveLiveWork) {
p.log.warning("Preserved copied live work; this worktree instance may auto-run source-instance assignments.");
} else {
p.log.message(
pc.dim(`Seed execution quarantine: ${formatSeededWorktreeExecutionQuarantineSummary(seeded.executionQuarantine)}`),
);
}
p.log.message(pc.dim(`Paused scheduled routines: ${seeded.pausedScheduledRoutines}`));
for (const rebound of seeded.reboundWorkspaces) {
p.log.message(
pc.dim(`Rebound workspace ${rebound.name}: ${rebound.fromCwd} -> ${rebound.toCwd}`),
@@ -3015,6 +3197,7 @@ export async function worktreeRepairCommand(opts: WorktreeRepairOptions): Promis
fromConfig: source.configPath,
to: target.rootPath,
seedMode,
preserveLiveWork: opts.preserveLiveWork,
yes: true,
allowLiveTarget: opts.allowLiveTarget,
});
@@ -3047,6 +3230,7 @@ export async function worktreeRepairCommand(opts: WorktreeRepairOptions): Promis
fromInstance: opts.fromInstance,
seed: opts.noSeed ? false : true,
seedMode,
preserveLiveWork: opts.preserveLiveWork,
force: true,
});
} finally {
@@ -3070,6 +3254,7 @@ export function registerWorktreeCommands(program: Command): void {
.option("--server-port <port>", "Preferred server port", (value) => Number(value))
.option("--db-port <port>", "Preferred embedded Postgres port", (value) => Number(value))
.option("--seed-mode <mode>", "Seed profile: minimal or full (default: minimal)", "minimal")
.option("--preserve-live-work", "Do not quarantine copied agent timers or assigned open issues in the seeded worktree", false)
.option("--no-seed", "Skip database seeding from the source instance")
.option("--force", "Replace existing repo-local config and isolated instance data", false)
.action(worktreeMakeCommand);
@@ -3086,6 +3271,7 @@ export function registerWorktreeCommands(program: Command): void {
.option("--server-port <port>", "Preferred server port", (value) => Number(value))
.option("--db-port <port>", "Preferred embedded Postgres port", (value) => Number(value))
.option("--seed-mode <mode>", "Seed profile: minimal or full (default: minimal)", "minimal")
.option("--preserve-live-work", "Do not quarantine copied agent timers or assigned open issues in the seeded worktree", false)
.option("--no-seed", "Skip database seeding from the source instance")
.option("--force", "Replace existing repo-local config and isolated instance data", false)
.action(worktreeInitCommand);
@@ -3125,6 +3311,7 @@ export function registerWorktreeCommands(program: Command): void {
.option("--from-data-dir <path>", "Source PAPERCLIP_HOME used when deriving the source config")
.option("--from-instance <id>", "Source instance id when deriving the source config")
.option("--seed-mode <mode>", "Seed profile: minimal or full (default: full)", "full")
.option("--preserve-live-work", "Do not quarantine copied agent timers or assigned open issues in the seeded worktree", false)
.option("--yes", "Skip the destructive confirmation prompt", false)
.option("--allow-live-target", "Override the guard that requires the target worktree DB to be stopped first", false)
.action(worktreeReseedCommand);
@@ -3138,6 +3325,7 @@ export function registerWorktreeCommands(program: Command): void {
.option("--from-data-dir <path>", "Source PAPERCLIP_HOME used when deriving the source config")
.option("--from-instance <id>", "Source instance id when deriving the source config (default: default)")
.option("--seed-mode <mode>", "Seed profile: minimal or full (default: minimal)", "minimal")
.option("--preserve-live-work", "Do not quarantine copied agent timers or assigned open issues in the seeded worktree", false)
.option("--no-seed", "Repair metadata only and skip reseeding when bootstrapping a missing worktree config", false)
.option("--allow-live-target", "Override the guard that requires the target worktree DB to be stopped first", false)
.action(worktreeRepairCommand);