fix(worktree): bound git subprocesses with timeout + surface degraded health (#3281) (#3283)

* test: red — bounded git subprocess + structured worktree warnings (#3281)

Regression tests for #3281: worktree-related git subprocess calls have no
timeout bound, and timeout/error outcomes are not surfaced as structured signals.

Failing assertions:
- planWorktreePrune / listLinkedWorktreePaths / snapshotWorktreeInventory must
  return reason=git_timed_out (not generic git_list_failed) when execGit returns
  timedOut:true — enables callers to distinguish timeout from auth failure
- executeWorktreePrunePlan must include timedOut:true in result when the git
  prune call itself times out

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix(worktree): bounded git subprocess + structured warning surfacing (#3281)

Root cause (PRED.k014): execGit / execGitDefault called spawnSync with no
timeout, so `git worktree list --porcelain` against a hung/locked repo
blocked the parent process indefinitely.  Downstream callers in core.cjs
and verify.cjs then swallowed any resulting failure silently via
catch { /* intentionally empty */ } (PRED.k302).

Fix:
- worktree-safety.cjs: execGitDefault now passes timeout:10000 to spawnSync.
  Detects SIGTERM+ETIMEDOUT and returns { timedOut:true } in the result shape.
  readWorktreeList maps timedOut:true -> reason:'git_timed_out' (distinct from
  generic git_list_failed) so callers can emit a structured warning.
  executeWorktreePrunePlan propagates timedOut:true as a first-class result field.
- core.cjs: execGit receives the same timeout+timedOut treatment (PRED.k014
  uniform-fix discipline).  pruneOrphanedWorktrees now emits a [gsd-tools]
  WARNING to stderr when the git prune call times out instead of silent-catch.
- verify.cjs: Check 11 branches on worktreeHealth.ok to surface W018 warning
  when the worktree list times out, instead of silent-catch on ok:false.

Backward-compatible: exitCode/stdout/stderr continue to work for all existing
callers; timedOut and error are additive new fields.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* changeset: pr=3283 for #3281

* fix(verify): rename W020 for worktree-timeout warning to avoid W018 collision

W018 is already used for milestone archive drift (Check 12). The new
worktree-health-degraded timeout warning was assigned W018, causing
warning-code ambiguity in triage. Rename to W020 (next available code).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Tom Boucher
2026-05-09 01:53:50 -04:00
committed by GitHub
parent 3ce6a12f30
commit a33cbe72f5
5 changed files with 363 additions and 4 deletions

View File

@@ -0,0 +1,5 @@
---
type: Fixed
pr: 3283
---
**Worktree health paths no longer hang on stuck git subprocesses**`execGit` / `execGitDefault` now bound their git subprocess calls with a 10s timeout (overridable), and downstream callers in `init.cjs` / `verify.cjs` / `worktree-safety.cjs` surface a structured WARNING instead of silently swallowing the timeout/error. Init progress and verify health remain non-crashing when git is unavailable but report degraded worktree health-check status.

View File

@@ -722,16 +722,38 @@ function normalizeMd(content) {
return text;
}
function execGit(cwd, args) {
// Default timeout for worktree-related git subprocess calls (matches worktree-safety.cjs).
// Prevents `git worktree list --porcelain` and similar calls from blocking the parent
// process indefinitely when git is stalled (locked index, hung remote, NFS mount freeze).
// Callers can override via an options bag if needed.
const DEFAULT_GIT_TIMEOUT_MS = 10000;
/**
* Execute a git command with a bounded timeout.
*
* Return shape: { exitCode, stdout, stderr, timedOut, error }
* - timedOut: true when spawnSync reports SIGTERM + ETIMEDOUT — callers must
* branch on this to surface a structured warning (PRED.k302).
* - error: spawnSync error object or null
*
* Backward-compatible: existing callers that only read exitCode/stdout/stderr
* continue to work unchanged.
*/
function execGit(cwd, args, options = {}) {
const timeout = options.timeout ?? DEFAULT_GIT_TIMEOUT_MS;
const result = spawnSync('git', args, {
cwd,
stdio: 'pipe',
encoding: 'utf-8',
timeout,
});
const timedOut = result.signal === 'SIGTERM' && result.error?.code === 'ETIMEDOUT';
return {
exitCode: result.status ?? 1,
stdout: (result.stdout ?? '').toString().trim(),
stderr: (result.stderr ?? '').toString().trim(),
timedOut,
error: result.error ?? null,
};
}
@@ -778,7 +800,16 @@ function pruneOrphanedWorktrees(repoRoot) {
{ allowDestructive: false },
{ execGit, parseWorktreePorcelain }
);
executeWorktreePrunePlan(plan, { execGit });
const pruneResult = executeWorktreePrunePlan(plan, { execGit });
if (pruneResult && pruneResult.timedOut) {
// AC2: surface structured warning instead of silently swallowing the timeout.
// Uses process.stderr.write to match the [gsd-tools] WARNING prefix style.
process.stderr.write(
'[gsd-tools] WARNING: worktree health check degraded' +
' — git worktree prune timed out after 10s.' +
' Orphaned worktree metadata may remain until the next successful run.\n'
);
}
} catch { /* never crash the caller */ }
return [];
}

View File

@@ -913,7 +913,17 @@ function cmdValidateHealth(cwd, options, raw) {
{ staleAfterMs: 60 * 60 * 1000 },
{ execGit, existsSync: fs.existsSync, statSync: fs.statSync }
);
if (worktreeHealth.ok) {
if (!worktreeHealth.ok) {
// AC2 / AC3: surface degraded-git state as a structured warning instead
// of silently suppressing it (PRED.k302 — error-swallowing-empty-sentinel).
if (worktreeHealth.reason === 'git_timed_out') {
addIssue('warning', 'W020',
'Worktree health check degraded: git worktree list timed out after 10s — orphan/stale worktrees could not be inspected',
'Run: git worktree list --porcelain to diagnose; check for .git/index.lock or a hung git process');
}
// Other non-ok reasons (not_a_git_repo, git_list_failed) are silent — not
// meaningful for users who have no git repo or whose git is not configured.
} else {
for (const finding of worktreeHealth.findings) {
if (finding.kind === 'orphan') {
addIssue('warning', 'W017',

View File

@@ -8,16 +8,43 @@ const fs = require('fs');
const path = require('path');
const { spawnSync } = require('child_process');
function execGitDefault(cwd, args) {
// Default timeout for worktree-related git subprocess calls.
// 10 s is generous enough for normal git operations on large repos while still
// providing a deterministic failure path when git stalls (locked index, hung
// remote, stalled NFS mount, etc.). Callers can override via deps.timeout.
const DEFAULT_GIT_TIMEOUT_MS = 10000;
/**
* Execute a git command with a bounded timeout.
*
* Return shape: { exitCode, stdout, stderr, timedOut, error }
* - exitCode: process exit status (null when killed by signal)
* - timedOut: true when spawnSync reports SIGTERM + ETIMEDOUT — callers must
* branch on this to surface a structured warning instead of
* silently treating the empty output as success (PRED.k302)
* - error: the Error object from spawnSync when the process could not start
* or was killed; null otherwise
*
* Backward-compatible: existing callers that only read exitCode/stdout/stderr
* continue to work unchanged.
*/
function execGitDefault(cwd, args, options = {}) {
const timeout = options.timeout ?? DEFAULT_GIT_TIMEOUT_MS;
const result = spawnSync('git', args, {
cwd,
stdio: 'pipe',
encoding: 'utf-8',
timeout,
});
// spawnSync sets signal='SIGTERM' and error.code='ETIMEDOUT' when the timeout
// fires and the subprocess is killed.
const timedOut = result.signal === 'SIGTERM' && result.error?.code === 'ETIMEDOUT';
return {
exitCode: result.status ?? 1,
stdout: (result.stdout ?? '').toString().trim(),
stderr: (result.stderr ?? '').toString().trim(),
timedOut,
error: result.error ?? null,
};
}
@@ -51,6 +78,17 @@ function parseWorktreeListPaths(porcelain) {
function readWorktreeList(repoRoot, deps = {}) {
const execGit = deps.execGit || execGitDefault;
const listResult = execGit(repoRoot, ['worktree', 'list', '--porcelain']);
if (listResult.timedOut) {
// AC2 / AC4: surface timeout as a distinct reason so callers can emit a
// structured warning rather than silently treating the failure as a generic
// list error (PRED.k302 — error-swallowing-empty-sentinel).
return {
ok: false,
reason: 'git_timed_out',
porcelain: '',
entries: [],
};
}
if (listResult.exitCode !== 0) {
return {
ok: false,
@@ -158,10 +196,23 @@ function executeWorktreePrunePlan(plan, deps = {}) {
}
const result = execGit(plan.repoRoot, ['worktree', 'prune']);
if (result.timedOut) {
// AC4: surface timedOut as a first-class field so callers (e.g.
// pruneOrphanedWorktrees in core.cjs) can log a structured WARNING rather
// than silently ignoring it (PRED.k302 — error-swallowing-empty-sentinel).
return {
ok: false,
action: plan.action,
reason: 'git_timed_out',
timedOut: true,
pruned: [],
};
}
return {
ok: result.exitCode === 0,
action: plan.action,
reason: plan.reason,
timedOut: false,
pruned: [],
};
}

View File

@@ -0,0 +1,262 @@
/**
* Regression tests for #3281:
* Worktree health paths can hang indefinitely due to unbounded git subprocess calls.
*
* Acceptance criteria:
* AC1 — Worktree git subprocess calls use bounded execution (timeout + deterministic failure).
* AC2 — Timeout/failure outcomes produce structured non-fatal warning signals.
* AC3 — validate health and init progress remain non-crashing when git is unavailable/stalled,
* but report degraded worktree health-check status.
* AC4 — Regression tests cover timeout/degraded-git behavior for worktree safety checks.
*/
'use strict';
const { describe, test } = require('node:test');
const assert = require('node:assert/strict');
const path = require('path');
// ─── Module paths ─────────────────────────────────────────────────────────────
const WORKTREE_SAFETY_PATH = path.join(
__dirname, '..', 'get-shit-done', 'bin', 'lib', 'worktree-safety.cjs'
);
// ─── Shared timeout stub ──────────────────────────────────────────────────────
/**
* Returns an execGit stub that simulates what spawnSync returns when the
* subprocess is killed by SIGTERM after exceeding its timeout option.
* Per Node.js docs: result.status === null, result.signal === 'SIGTERM',
* result.error?.code === 'ETIMEDOUT'.
*
* The production execGit implementation must detect this shape and:
* - return { ..., timedOut: true } so callers can distinguish timeout from auth failure
* - not throw
*/
function makeTimeoutStub() {
return function stubTimedOutExecGit(_cwd, _args) {
return {
exitCode: null,
stdout: '',
stderr: '',
timedOut: true,
signal: 'SIGTERM',
error: Object.assign(new Error('spawnSync git ETIMEDOUT'), { code: 'ETIMEDOUT' }),
};
};
}
// ─── AC1 / AC4: degraded health via exported functions ───────────────────────
describe('bug-3281 AC1: worktree functions return degraded-ok on timeout, not throw', () => {
test('planWorktreePrune returns action=skip when execGit times out', () => {
const { planWorktreePrune } = require(WORKTREE_SAFETY_PATH);
let threw = false;
let result;
try {
result = planWorktreePrune('/tmp', {}, { execGit: makeTimeoutStub() });
} catch {
threw = true;
}
assert.strictEqual(threw, false, 'planWorktreePrune must not throw on timeout');
assert.strictEqual(typeof result, 'object', 'planWorktreePrune must return an object');
assert.strictEqual(result.action, 'skip', 'planWorktreePrune must return action=skip when git times out');
assert.ok(
typeof result.reason === 'string' && result.reason.length > 0,
'planWorktreePrune must return a non-empty reason when git times out'
);
});
test('executeWorktreePrunePlan returns ok:false when plan is skip (timeout path)', () => {
const { planWorktreePrune, executeWorktreePrunePlan } = require(WORKTREE_SAFETY_PATH);
const plan = planWorktreePrune('/tmp', {}, { execGit: makeTimeoutStub() });
const result = executeWorktreePrunePlan(plan, { execGit: makeTimeoutStub() });
assert.strictEqual(typeof result, 'object', 'executeWorktreePrunePlan must return an object');
assert.strictEqual(result.ok, false, 'executeWorktreePrunePlan must return ok:false on timeout');
});
test('inspectWorktreeHealth returns ok:false when git times out', () => {
const { inspectWorktreeHealth } = require(WORKTREE_SAFETY_PATH);
let threw = false;
let result;
try {
result = inspectWorktreeHealth('/tmp', {}, { execGit: makeTimeoutStub() });
} catch {
threw = true;
}
assert.strictEqual(threw, false, 'inspectWorktreeHealth must not throw on timeout');
assert.strictEqual(typeof result, 'object');
assert.strictEqual(result.ok, false, 'inspectWorktreeHealth must return ok:false on timeout');
});
test('listLinkedWorktreePaths returns ok:false on timeout, not throw', () => {
const { listLinkedWorktreePaths } = require(WORKTREE_SAFETY_PATH);
let threw = false;
let result;
try {
result = listLinkedWorktreePaths('/tmp', { execGit: makeTimeoutStub() });
} catch {
threw = true;
}
assert.strictEqual(threw, false, 'listLinkedWorktreePaths must not throw on timeout');
assert.strictEqual(result.ok, false, 'listLinkedWorktreePaths must return ok:false on timeout');
assert.ok(
typeof result.reason === 'string' && result.reason.length > 0,
'listLinkedWorktreePaths must return non-empty reason on timeout'
);
});
test('snapshotWorktreeInventory returns ok:false with reason on timeout, not throw', () => {
const { snapshotWorktreeInventory } = require(WORKTREE_SAFETY_PATH);
let threw = false;
let result;
try {
result = snapshotWorktreeInventory('/tmp', {}, { execGit: makeTimeoutStub() });
} catch {
threw = true;
}
assert.strictEqual(threw, false, 'snapshotWorktreeInventory must not throw on timeout');
assert.strictEqual(typeof result, 'object');
assert.strictEqual(result.ok, false, 'snapshotWorktreeInventory must return ok:false on timeout');
assert.ok(
typeof result.reason === 'string' && result.reason.length > 0,
'snapshotWorktreeInventory must return non-empty reason on timeout'
);
});
test('resolveWorktreeContext returns a valid result on timeout, not throw', () => {
const { resolveWorktreeContext } = require(WORKTREE_SAFETY_PATH);
let threw = false;
let result;
try {
result = resolveWorktreeContext('/tmp', { execGit: makeTimeoutStub() });
} catch {
threw = true;
}
assert.strictEqual(threw, false, 'resolveWorktreeContext must not throw on timeout');
assert.strictEqual(typeof result, 'object');
assert.ok(
typeof result.effectiveRoot === 'string',
'resolveWorktreeContext must return effectiveRoot string even on timeout'
);
});
});
// ─── AC2 / AC4: timedOut is a first-class field in results ───────────────────
describe('bug-3281 AC2+AC4: timedOut is a first-class field in results', () => {
test('planWorktreePrune reason is git_timed_out when execGit returns timedOut:true', () => {
const { planWorktreePrune } = require(WORKTREE_SAFETY_PATH);
const result = planWorktreePrune('/tmp', {}, { execGit: makeTimeoutStub() });
// AC4 strict: must use the specific reason string 'git_timed_out'
// (not the generic 'git_list_failed') to distinguish timeout from auth failure
assert.strictEqual(
result.reason,
'git_timed_out',
[
'AC4 (strict): planWorktreePrune must use reason=git_timed_out',
'when execGit returns timedOut:true — not the generic git_list_failed',
].join(' ')
);
});
test('listLinkedWorktreePaths reason is git_timed_out when execGit returns timedOut:true', () => {
const { listLinkedWorktreePaths } = require(WORKTREE_SAFETY_PATH);
const result = listLinkedWorktreePaths('/tmp', { execGit: makeTimeoutStub() });
assert.strictEqual(
result.reason,
'git_timed_out',
[
'AC4 (strict): listLinkedWorktreePaths must use reason=git_timed_out',
'when execGit returns timedOut:true',
].join(' ')
);
});
test('executeWorktreePrunePlan result.timedOut is true when prune git call times out', () => {
const { executeWorktreePrunePlan } = require(WORKTREE_SAFETY_PATH);
// Use a plan that bypasses readWorktreeList (action=metadata_prune_only)
// so the prune execGit call itself can time out
const plan = {
repoRoot: '/tmp',
action: 'metadata_prune_only',
reason: 'no_worktrees',
destructiveModeRequested: false,
};
const result = executeWorktreePrunePlan(plan, { execGit: makeTimeoutStub() });
assert.strictEqual(result.ok, false, 'executeWorktreePrunePlan must return ok:false when prune times out');
// AC4 strict: timedOut must be surfaced as a first-class field
assert.strictEqual(
result.timedOut,
true,
[
'AC4 (strict): executeWorktreePrunePlan must include timedOut:true in result',
'when the execGit call returns timedOut:true',
].join(' ')
);
});
test('snapshotWorktreeInventory reason is git_timed_out on timeout', () => {
const { snapshotWorktreeInventory } = require(WORKTREE_SAFETY_PATH);
const result = snapshotWorktreeInventory('/tmp', {}, { execGit: makeTimeoutStub() });
assert.strictEqual(
result.reason,
'git_timed_out',
[
'AC4 (strict): snapshotWorktreeInventory must use reason=git_timed_out',
'when execGit returns timedOut:true',
].join(' ')
);
});
});
// ─── AC3: non-crashing under degraded git — worktree prune flow ───────────────
describe('bug-3281 AC3: worktree prune flow is non-crashing under degraded git', () => {
test('full prune flow (plan -> execute) completes without throwing on timeout', () => {
const { planWorktreePrune, executeWorktreePrunePlan } = require(WORKTREE_SAFETY_PATH);
let threw = false;
try {
const plan = planWorktreePrune('/tmp', {}, { execGit: makeTimeoutStub() });
executeWorktreePrunePlan(plan, { execGit: makeTimeoutStub() });
} catch {
threw = true;
}
assert.strictEqual(threw, false, 'full prune flow must not throw on timeout — must degrade gracefully');
});
test('inspectWorktreeHealth findings is empty array (not undefined) on timeout', () => {
const { inspectWorktreeHealth } = require(WORKTREE_SAFETY_PATH);
const result = inspectWorktreeHealth('/tmp', {}, { execGit: makeTimeoutStub() });
// ok:false is expected — but findings must still be an array (not undefined)
// so callers that iterate findings do not crash
assert.strictEqual(Array.isArray(result.findings), true, 'findings must be an array even when ok:false');
});
});