mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
Merge branch 'pr-1491' into integration/validation-batch
# Conflicts: # plugin/scripts/mcp-server.cjs # plugin/scripts/worker-service.cjs # src/shared/hook-constants.ts
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -453,6 +453,19 @@ export async function aggressiveStartupCleanup(): Promise<void> {
|
||||
const pidsToKill: number[] = [];
|
||||
const allPatterns = [...AGGRESSIVE_CLEANUP_PATTERNS, ...AGE_GATED_CLEANUP_PATTERNS];
|
||||
|
||||
// Protect parent process (the hook that spawned us) from being killed.
|
||||
// Without this, a new daemon kills its own parent hook process (#1426).
|
||||
//
|
||||
// Note: readPidFile() is not used here because start() writes the new PID
|
||||
// before initializeBackground() calls this function, so readPidFile() would
|
||||
// just return process.pid (already protected). If a pre-existing worker needs
|
||||
// protection, ensureWorkerStarted() handles that by returning early when a
|
||||
// healthy worker is detected — we never reach this code in that case.
|
||||
const protectedPids = new Set<number>([currentPid]);
|
||||
if (process.ppid && process.ppid > 0) {
|
||||
protectedPids.add(process.ppid);
|
||||
}
|
||||
|
||||
try {
|
||||
if (isWindows) {
|
||||
// Use WQL -Filter for server-side filtering (no $_ pipeline syntax).
|
||||
@@ -475,7 +488,7 @@ export async function aggressiveStartupCleanup(): Promise<void> {
|
||||
|
||||
for (const proc of processList) {
|
||||
const pid = proc.ProcessId;
|
||||
if (!Number.isInteger(pid) || pid <= 0 || pid === currentPid) continue;
|
||||
if (!Number.isInteger(pid) || pid <= 0 || protectedPids.has(pid)) continue;
|
||||
|
||||
const commandLine = proc.CommandLine || '';
|
||||
const isAggressive = AGGRESSIVE_CLEANUP_PATTERNS.some(p => commandLine.includes(p));
|
||||
@@ -518,7 +531,7 @@ export async function aggressiveStartupCleanup(): Promise<void> {
|
||||
const etime = match[2];
|
||||
const command = match[3];
|
||||
|
||||
if (!Number.isInteger(pid) || pid <= 0 || pid === currentPid) continue;
|
||||
if (!Number.isInteger(pid) || pid <= 0 || protectedPids.has(pid)) continue;
|
||||
|
||||
const isAggressive = AGGRESSIVE_CLEANUP_PATTERNS.some(p => command.includes(p));
|
||||
|
||||
|
||||
@@ -80,7 +80,6 @@ import {
|
||||
cleanStalePidFile,
|
||||
isProcessAlive,
|
||||
spawnDaemon,
|
||||
isPidFileRecent,
|
||||
touchPidFile
|
||||
} from './infrastructure/ProcessManager.js';
|
||||
import {
|
||||
@@ -88,8 +87,7 @@ import {
|
||||
waitForHealth,
|
||||
waitForReadiness,
|
||||
waitForPortFree,
|
||||
httpShutdown,
|
||||
checkVersionMatch
|
||||
httpShutdown
|
||||
} from './infrastructure/HealthMonitor.js';
|
||||
import { performGracefulShutdown } from './infrastructure/GracefulShutdown.js';
|
||||
|
||||
@@ -1011,43 +1009,25 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if worker is already running and healthy
|
||||
// Check if worker is already running and healthy.
|
||||
// NOTE: Version mismatch auto-restart intentionally removed (#1435).
|
||||
// The marketplace bundle ships with __DEFAULT_PACKAGE_VERSION__ unbaked, causing
|
||||
// BUILT_IN_VERSION to fall back to "development". This creates a 100% reproducible
|
||||
// mismatch on every hook call, killing a healthy worker and often failing to restart
|
||||
// (cold start exceeds POST_SPAWN_WAIT). A working-but-old worker is strictly better
|
||||
// than a dead worker. Users must manually restart after genuine plugin updates.
|
||||
// See also: #566, #665, #667, #669, #689, #1124, #1145 (same pattern across 8+ releases).
|
||||
if (await waitForHealth(port, 1000)) {
|
||||
const versionCheck = await checkVersionMatch(port);
|
||||
if (!versionCheck.matches) {
|
||||
// Guard: If PID file was written recently, another session is likely already
|
||||
// restarting the worker. Poll health instead of starting a concurrent restart.
|
||||
// This prevents the "100 sessions all restart simultaneously" storm (#1145).
|
||||
const RESTART_COORDINATION_THRESHOLD_MS = 15000;
|
||||
if (isPidFileRecent(RESTART_COORDINATION_THRESHOLD_MS)) {
|
||||
logger.info('SYSTEM', 'Version mismatch detected but PID file is recent — another restart likely in progress, polling health', {
|
||||
pluginVersion: versionCheck.pluginVersion,
|
||||
workerVersion: versionCheck.workerVersion
|
||||
});
|
||||
const healthy = await waitForHealth(port, RESTART_COORDINATION_THRESHOLD_MS);
|
||||
if (healthy) {
|
||||
logger.info('SYSTEM', 'Worker became healthy after waiting for concurrent restart');
|
||||
return true;
|
||||
}
|
||||
logger.warn('SYSTEM', 'Worker did not become healthy after waiting — proceeding with own restart');
|
||||
}
|
||||
|
||||
logger.info('SYSTEM', 'Worker version mismatch detected - auto-restarting', {
|
||||
pluginVersion: versionCheck.pluginVersion,
|
||||
workerVersion: versionCheck.workerVersion
|
||||
});
|
||||
|
||||
await httpShutdown(port);
|
||||
const freed = await waitForPortFree(port, getPlatformTimeout(HOOK_TIMEOUTS.PORT_IN_USE_WAIT));
|
||||
if (!freed) {
|
||||
logger.error('SYSTEM', 'Port did not free up after shutdown for version mismatch restart', { port });
|
||||
return false;
|
||||
}
|
||||
removePidFile();
|
||||
} else {
|
||||
logger.info('SYSTEM', 'Worker already running and healthy');
|
||||
return true;
|
||||
// Health passed — worker is listening. Also wait for readiness in case
|
||||
// another hook just spawned it and background init is still running.
|
||||
// This mirrors the fresh-spawn path (line ~1025) so concurrent hooks
|
||||
// don't race past a cold-starting worker's initialization guard.
|
||||
const ready = await waitForReadiness(port, getPlatformTimeout(HOOK_TIMEOUTS.READINESS_WAIT));
|
||||
if (!ready) {
|
||||
logger.warn('SYSTEM', 'Worker is alive but readiness timed out — proceeding anyway');
|
||||
}
|
||||
logger.info('SYSTEM', 'Worker already running and healthy');
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if port is in use by something else
|
||||
@@ -1096,8 +1076,7 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
}
|
||||
|
||||
clearWorkerSpawnAttempted();
|
||||
// Touch PID file to signal other sessions that a restart just completed.
|
||||
// Other sessions checking isPidFileRecent() will see this and skip their own restart.
|
||||
// Touch PID file to signal other sessions that a spawn just completed.
|
||||
touchPidFile();
|
||||
logger.info('SYSTEM', 'Worker started successfully');
|
||||
return true;
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
export const HOOK_TIMEOUTS = {
|
||||
DEFAULT: 300000, // Standard HTTP timeout (5 min for slow systems)
|
||||
HEALTH_CHECK: 3000, // Worker health check (3s — healthy worker responds in <100ms)
|
||||
POST_SPAWN_WAIT: 5000, // Wait for daemon to start after spawn (starts in <1s on Linux)
|
||||
READINESS_WAIT: 30000, // Wait for DB + search init after spawn (typically <5s)
|
||||
PORT_IN_USE_WAIT: 3000, // Wait when port occupied but health failing
|
||||
DEFAULT: 300000, // Standard HTTP timeout (5 min for slow systems)
|
||||
HEALTH_CHECK: 3000, // Worker health check (3s — healthy worker responds in <100ms)
|
||||
POST_SPAWN_WAIT: 15000, // Wait for daemon to start after spawn (starts in <1s on Linux, 6-8s on macOS with Chroma)
|
||||
READINESS_WAIT: 30000, // Wait for DB + search init after spawn (typically <5s)
|
||||
PORT_IN_USE_WAIT: 3000, // Wait when port occupied but health failing
|
||||
WORKER_STARTUP_WAIT: 1000,
|
||||
PRE_RESTART_SETTLE_DELAY: 2000, // Give files time to sync before restart
|
||||
POWERSHELL_COMMAND: 10000, // PowerShell process enumeration (10s - typically completes in <1s)
|
||||
WINDOWS_MULTIPLIER: 1.5, // Platform-specific adjustment for hook-side operations
|
||||
PRE_RESTART_SETTLE_DELAY: 2000, // Give files time to sync before restart
|
||||
POWERSHELL_COMMAND: 10000, // PowerShell process enumeration (10s - typically completes in <1s)
|
||||
WINDOWS_MULTIPLIER: 1.5 // Platform-specific adjustment for hook-side operations
|
||||
} as const;
|
||||
|
||||
/**
|
||||
@@ -16,21 +16,19 @@ export const HOOK_TIMEOUTS = {
|
||||
* Exit code behavior per Claude Code docs:
|
||||
* - 0: Success. For SessionStart/UserPromptSubmit, stdout added to context.
|
||||
* - 2: Blocking error. For SessionStart, stderr shown to user only.
|
||||
* - Other non-zero: treated as hook error (stderr shown in verbose mode only).
|
||||
*
|
||||
* IMPORTANT: Only exit codes 0 and 2 are recognized by Claude Code.
|
||||
* Any other non-zero exit code (including 1, 3, etc.) is treated as a
|
||||
* hook failure and triggers the "hook error" UI message on every session start.
|
||||
* - Other non-zero: stderr shown in verbose mode only.
|
||||
*/
|
||||
export const HOOK_EXIT_CODES = {
|
||||
SUCCESS: 0,
|
||||
FAILURE: 1,
|
||||
/** Blocking error - for SessionStart, shows stderr to user only */
|
||||
BLOCKING_ERROR: 2,
|
||||
/** Show stderr to user only, don't inject into context. Used by user-message handler (Cursor). */
|
||||
USER_MESSAGE_ONLY: 3,
|
||||
} as const;
|
||||
|
||||
export function getTimeout(baseTimeout: number): number {
|
||||
return process.platform === "win32"
|
||||
return process.platform === 'win32'
|
||||
? Math.round(baseTimeout * HOOK_TIMEOUTS.WINDOWS_MULTIPLIER)
|
||||
: baseTimeout;
|
||||
}
|
||||
|
||||
@@ -32,8 +32,8 @@ describe('hook-constants', () => {
|
||||
expect(HOOK_TIMEOUTS.HEALTH_CHECK).toBe(3000);
|
||||
});
|
||||
|
||||
it('should define POST_SPAWN_WAIT as 5s', () => {
|
||||
expect(HOOK_TIMEOUTS.POST_SPAWN_WAIT).toBe(5000);
|
||||
it('should define POST_SPAWN_WAIT as 15s', () => {
|
||||
expect(HOOK_TIMEOUTS.POST_SPAWN_WAIT).toBe(15000);
|
||||
});
|
||||
|
||||
it('should define PORT_IN_USE_WAIT as 3s', () => {
|
||||
|
||||
Reference in New Issue
Block a user