mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
Merge pull request #1056 from rodboev/fix/hook-resilience-worker-lifecycle
fix: hook resilience and worker lifecycle — 87% faster recovery from dead worker
This commit is contained in:
File diff suppressed because one or more lines are too long
61
plugin/scripts/statusline-counts.js
Executable file
61
plugin/scripts/statusline-counts.js
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/usr/bin/env bun
|
||||
/**
|
||||
* Statusline Counts — lightweight project-scoped observation counter
|
||||
*
|
||||
* Returns JSON with observation and prompt counts for the given project,
|
||||
* suitable for integration into Claude Code's statusLineCommand.
|
||||
*
|
||||
* Usage:
|
||||
* bun statusline-counts.js <cwd>
|
||||
* bun statusline-counts.js /home/user/my-project
|
||||
*
|
||||
* Output (JSON, stdout):
|
||||
* {"observations": 42, "prompts": 15, "project": "my-project"}
|
||||
*
|
||||
* The project name is derived from basename(cwd). Observations are counted
|
||||
* with a WHERE project = ? filter so only the current project's data is
|
||||
* returned — preventing inflated counts from cross-project observations.
|
||||
*
|
||||
* Performance: ~10ms typical (direct SQLite read, no HTTP, no worker dependency)
|
||||
*/
|
||||
import { Database } from "bun:sqlite";
|
||||
import { existsSync, readFileSync } from "fs";
|
||||
import { homedir } from "os";
|
||||
import { join, basename } from "path";
|
||||
|
||||
const cwd = process.argv[2] || process.env.CLAUDE_CWD || process.cwd();
|
||||
const project = basename(cwd);
|
||||
|
||||
try {
|
||||
// Resolve data directory: env var → settings.json → default
|
||||
let dataDir = process.env.CLAUDE_MEM_DATA_DIR || join(homedir(), ".claude-mem");
|
||||
if (!process.env.CLAUDE_MEM_DATA_DIR) {
|
||||
const settingsPath = join(dataDir, "settings.json");
|
||||
if (existsSync(settingsPath)) {
|
||||
try {
|
||||
const settings = JSON.parse(readFileSync(settingsPath, "utf-8"));
|
||||
if (settings.CLAUDE_MEM_DATA_DIR) dataDir = settings.CLAUDE_MEM_DATA_DIR;
|
||||
} catch { /* use default */ }
|
||||
}
|
||||
}
|
||||
|
||||
const dbPath = join(dataDir, "claude-mem.db");
|
||||
if (!existsSync(dbPath)) {
|
||||
console.log(JSON.stringify({ observations: 0, prompts: 0, project }));
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const db = new Database(dbPath, { readonly: true });
|
||||
|
||||
const obs = db.query("SELECT COUNT(*) as c FROM observations WHERE project = ?").get(project);
|
||||
// user_prompts links to projects through sdk_sessions.content_session_id
|
||||
const prompts = db.query(
|
||||
`SELECT COUNT(*) as c FROM user_prompts up
|
||||
JOIN sdk_sessions s ON s.content_session_id = up.content_session_id
|
||||
WHERE s.project = ?`
|
||||
).get(project);
|
||||
console.log(JSON.stringify({ observations: obs.c, prompts: prompts.c, project }));
|
||||
db.close();
|
||||
} catch (e) {
|
||||
console.log(JSON.stringify({ observations: 0, prompts: 0, project, error: e.message }));
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -9,6 +9,7 @@ import type { EventHandler, NormalizedHookInput, HookResult } from '../types.js'
|
||||
import { ensureWorkerRunning, getWorkerPort } from '../../shared/worker-utils.js';
|
||||
import { getProjectContext } from '../../utils/project-name.js';
|
||||
import { HOOK_EXIT_CODES } from '../../shared/hook-constants.js';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
|
||||
export const contextHandler: EventHandler = {
|
||||
async execute(input: NormalizedHookInput): Promise<HookResult> {
|
||||
@@ -35,20 +36,34 @@ export const contextHandler: EventHandler = {
|
||||
|
||||
// Note: Removed AbortSignal.timeout due to Windows Bun cleanup issue (libuv assertion)
|
||||
// Worker service has its own timeouts, so client-side timeout is redundant
|
||||
const response = await fetch(url);
|
||||
try {
|
||||
const response = await fetch(url);
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Context generation failed: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.text();
|
||||
const additionalContext = result.trim();
|
||||
|
||||
return {
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'SessionStart',
|
||||
additionalContext
|
||||
if (!response.ok) {
|
||||
// Log but don't throw — context fetch failure should not block session start
|
||||
logger.warn('HOOK', 'Context generation failed, returning empty', { status: response.status });
|
||||
return {
|
||||
hookSpecificOutput: { hookEventName: 'SessionStart', additionalContext: '' },
|
||||
exitCode: HOOK_EXIT_CODES.SUCCESS
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const result = await response.text();
|
||||
const additionalContext = result.trim();
|
||||
|
||||
return {
|
||||
hookSpecificOutput: {
|
||||
hookEventName: 'SessionStart',
|
||||
additionalContext
|
||||
}
|
||||
};
|
||||
} catch (error) {
|
||||
// Worker unreachable — return empty context gracefully
|
||||
logger.warn('HOOK', 'Context fetch error, returning empty', { error: error instanceof Error ? error.message : String(error) });
|
||||
return {
|
||||
hookSpecificOutput: { hookEventName: 'SessionStart', additionalContext: '' },
|
||||
exitCode: HOOK_EXIT_CODES.SUCCESS
|
||||
};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@@ -39,25 +39,33 @@ export const fileEditHandler: EventHandler = {
|
||||
|
||||
// Send to worker as an observation with file edit metadata
|
||||
// The observation handler on the worker will process this appropriately
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/sessions/observations`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
contentSessionId: sessionId,
|
||||
tool_name: 'write_file',
|
||||
tool_input: { filePath, edits },
|
||||
tool_response: { success: true },
|
||||
cwd
|
||||
})
|
||||
// Note: Removed signal to avoid Windows Bun cleanup issue (libuv assertion)
|
||||
});
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/sessions/observations`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
contentSessionId: sessionId,
|
||||
tool_name: 'write_file',
|
||||
tool_input: { filePath, edits },
|
||||
tool_response: { success: true },
|
||||
cwd
|
||||
})
|
||||
// Note: Removed signal to avoid Windows Bun cleanup issue (libuv assertion)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`File edit observation storage failed: ${response.status}`);
|
||||
if (!response.ok) {
|
||||
// Log but don't throw — file edit observation failure should not block editing
|
||||
logger.warn('HOOK', 'File edit observation storage failed, skipping', { status: response.status, filePath });
|
||||
return { continue: true, suppressOutput: true, exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
|
||||
logger.debug('HOOK', 'File edit observation sent successfully', { filePath });
|
||||
} catch (error) {
|
||||
// Worker unreachable — skip file edit observation gracefully
|
||||
logger.warn('HOOK', 'File edit observation fetch error, skipping', { error: error instanceof Error ? error.message : String(error) });
|
||||
return { continue: true, suppressOutput: true, exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
|
||||
logger.debug('HOOK', 'File edit observation sent successfully', { filePath });
|
||||
|
||||
return { continue: true, suppressOutput: true };
|
||||
}
|
||||
};
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*/
|
||||
|
||||
import type { EventHandler } from '../types.js';
|
||||
import { HOOK_EXIT_CODES } from '../../shared/hook-constants.js';
|
||||
import { contextHandler } from './context.js';
|
||||
import { sessionInitHandler } from './session-init.js';
|
||||
import { observationHandler } from './observation.js';
|
||||
@@ -35,14 +36,22 @@ const handlers: Record<EventType, EventHandler> = {
|
||||
/**
|
||||
* Get the event handler for a given event type.
|
||||
*
|
||||
* Returns a no-op handler for unknown event types instead of throwing (fix #984).
|
||||
* Claude Code may send new event types that the plugin doesn't handle yet —
|
||||
* throwing would surface as a BLOCKING_ERROR to the user.
|
||||
*
|
||||
* @param eventType The type of event to handle
|
||||
* @returns The appropriate EventHandler
|
||||
* @throws Error if event type is not recognized
|
||||
* @returns The appropriate EventHandler, or a no-op handler for unknown types
|
||||
*/
|
||||
export function getEventHandler(eventType: EventType): EventHandler {
|
||||
const handler = handlers[eventType];
|
||||
export function getEventHandler(eventType: string): EventHandler {
|
||||
const handler = handlers[eventType as EventType];
|
||||
if (!handler) {
|
||||
throw new Error(`Unknown event type: ${eventType}`);
|
||||
console.error(`[claude-mem] Unknown event type: ${eventType}, returning no-op`);
|
||||
return {
|
||||
async execute() {
|
||||
return { continue: true, suppressOutput: true, exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
};
|
||||
}
|
||||
return handler;
|
||||
}
|
||||
|
||||
@@ -48,25 +48,33 @@ export const observationHandler: EventHandler = {
|
||||
}
|
||||
|
||||
// Send to worker - worker handles privacy check and database operations
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/sessions/observations`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
contentSessionId: sessionId,
|
||||
tool_name: toolName,
|
||||
tool_input: toolInput,
|
||||
tool_response: toolResponse,
|
||||
cwd
|
||||
})
|
||||
// Note: Removed signal to avoid Windows Bun cleanup issue (libuv assertion)
|
||||
});
|
||||
try {
|
||||
const response = await fetch(`http://127.0.0.1:${port}/api/sessions/observations`, {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
contentSessionId: sessionId,
|
||||
tool_name: toolName,
|
||||
tool_input: toolInput,
|
||||
tool_response: toolResponse,
|
||||
cwd
|
||||
})
|
||||
// Note: Removed signal to avoid Windows Bun cleanup issue (libuv assertion)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`Observation storage failed: ${response.status}`);
|
||||
if (!response.ok) {
|
||||
// Log but don't throw — observation storage failure should not block tool use
|
||||
logger.warn('HOOK', 'Observation storage failed, skipping', { status: response.status, toolName });
|
||||
return { continue: true, suppressOutput: true, exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
|
||||
logger.debug('HOOK', 'Observation sent successfully', { toolName });
|
||||
} catch (error) {
|
||||
// Worker unreachable — skip observation gracefully
|
||||
logger.warn('HOOK', 'Observation fetch error, skipping', { error: error instanceof Error ? error.message : String(error) });
|
||||
return { continue: true, suppressOutput: true, exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
|
||||
logger.debug('HOOK', 'Observation sent successfully', { toolName });
|
||||
|
||||
return { continue: true, suppressOutput: true };
|
||||
}
|
||||
};
|
||||
|
||||
@@ -16,7 +16,11 @@ import { logger } from '../../utils/logger.js';
|
||||
export const sessionCompleteHandler: EventHandler = {
|
||||
async execute(input: NormalizedHookInput): Promise<HookResult> {
|
||||
// Ensure worker is running
|
||||
await ensureWorkerRunning();
|
||||
const workerReady = await ensureWorkerRunning();
|
||||
if (!workerReady) {
|
||||
// Worker not available — skip session completion gracefully
|
||||
return { continue: true, suppressOutput: true };
|
||||
}
|
||||
|
||||
const { sessionId } = input;
|
||||
const port = getWorkerPort();
|
||||
|
||||
@@ -13,37 +13,46 @@ import { HOOK_EXIT_CODES } from '../../shared/hook-constants.js';
|
||||
export const userMessageHandler: EventHandler = {
|
||||
async execute(input: NormalizedHookInput): Promise<HookResult> {
|
||||
// Ensure worker is running
|
||||
await ensureWorkerRunning();
|
||||
const workerReady = await ensureWorkerRunning();
|
||||
if (!workerReady) {
|
||||
// Worker not available — skip user message gracefully
|
||||
return { exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
|
||||
const port = getWorkerPort();
|
||||
const project = basename(input.cwd ?? process.cwd());
|
||||
|
||||
// Fetch formatted context directly from worker API
|
||||
// Note: Removed AbortSignal.timeout to avoid Windows Bun cleanup issue (libuv assertion)
|
||||
const response = await fetch(
|
||||
`http://127.0.0.1:${port}/api/context/inject?project=${encodeURIComponent(project)}&colors=true`,
|
||||
{ method: 'GET' }
|
||||
);
|
||||
try {
|
||||
const response = await fetch(
|
||||
`http://127.0.0.1:${port}/api/context/inject?project=${encodeURIComponent(project)}&colors=true`,
|
||||
{ method: 'GET' }
|
||||
);
|
||||
|
||||
if (!response.ok) {
|
||||
// Don't throw - context fetch failure should not block the user's prompt
|
||||
return { exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
if (!response.ok) {
|
||||
// Don't throw - context fetch failure should not block the user's prompt
|
||||
return { exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
|
||||
const output = await response.text();
|
||||
|
||||
// Write to stderr for user visibility
|
||||
// Note: Using process.stderr.write instead of console.error to avoid
|
||||
// Claude Code treating this as a hook error. The actual hook output
|
||||
// goes to stdout via hook-command.ts JSON serialization.
|
||||
process.stderr.write(
|
||||
"\n\n" + String.fromCodePoint(0x1F4DD) + " Claude-Mem Context Loaded\n\n" +
|
||||
output +
|
||||
"\n\n" + String.fromCodePoint(0x1F4A1) + " Wrap any message with <private> ... </private> to prevent storing sensitive information.\n" +
|
||||
"\n" + String.fromCodePoint(0x1F4AC) + " Community https://discord.gg/J4wttp9vDu" +
|
||||
`\n` + String.fromCodePoint(0x1F4FA) + ` Watch live in browser http://localhost:${port}/\n`
|
||||
);
|
||||
} catch (error) {
|
||||
// Worker unreachable — skip user message gracefully
|
||||
// User message context error is non-critical — skip gracefully
|
||||
}
|
||||
|
||||
const output = await response.text();
|
||||
|
||||
// Write to stderr for user visibility
|
||||
// Note: Using process.stderr.write instead of console.error to avoid
|
||||
// Claude Code treating this as a hook error. The actual hook output
|
||||
// goes to stdout via hook-command.ts JSON serialization.
|
||||
process.stderr.write(
|
||||
"\n\n" + String.fromCodePoint(0x1F4DD) + " Claude-Mem Context Loaded\n\n" +
|
||||
output +
|
||||
"\n\n" + String.fromCodePoint(0x1F4A1) + " Wrap any message with <private> ... </private> to prevent storing sensitive information.\n" +
|
||||
"\n" + String.fromCodePoint(0x1F4AC) + " Community https://discord.gg/J4wttp9vDu" +
|
||||
`\n` + String.fromCodePoint(0x1F4FA) + ` Watch live in browser http://localhost:${port}/\n`
|
||||
);
|
||||
|
||||
return { exitCode: HOOK_EXIT_CODES.SUCCESS };
|
||||
}
|
||||
};
|
||||
|
||||
@@ -8,6 +8,62 @@ export interface HookCommandOptions {
|
||||
skipExit?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Classify whether an error indicates the worker is unavailable (graceful degradation)
|
||||
* vs a handler/client bug (blocking error that developers need to see).
|
||||
*
|
||||
* Exit 0 (graceful degradation):
|
||||
* - Transport failures: ECONNREFUSED, ECONNRESET, EPIPE, ETIMEDOUT, fetch failed
|
||||
* - Timeout errors: timed out, timeout
|
||||
* - Server errors: HTTP 5xx status codes
|
||||
*
|
||||
* Exit 2 (blocking error — handler/client bug):
|
||||
* - HTTP 4xx status codes (bad request, not found, validation error)
|
||||
* - Programming errors (TypeError, ReferenceError, SyntaxError)
|
||||
* - All other unexpected errors
|
||||
*/
|
||||
export function isWorkerUnavailableError(error: unknown): boolean {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
const lower = message.toLowerCase();
|
||||
|
||||
// Transport failures — worker unreachable
|
||||
const transportPatterns = [
|
||||
'econnrefused',
|
||||
'econnreset',
|
||||
'epipe',
|
||||
'etimedout',
|
||||
'enotfound',
|
||||
'econnaborted',
|
||||
'enetunreach',
|
||||
'ehostunreach',
|
||||
'fetch failed',
|
||||
'unable to connect',
|
||||
'socket hang up',
|
||||
];
|
||||
if (transportPatterns.some(p => lower.includes(p))) return true;
|
||||
|
||||
// Timeout errors — worker didn't respond in time
|
||||
if (lower.includes('timed out') || lower.includes('timeout')) return true;
|
||||
|
||||
// HTTP 5xx server errors — worker has internal problems
|
||||
if (/failed:\s*5\d{2}/.test(message) || /status[:\s]+5\d{2}/.test(message)) return true;
|
||||
|
||||
// HTTP 429 (rate limit) — treat as transient unavailability, not a bug
|
||||
if (/failed:\s*429/.test(message) || /status[:\s]+429/.test(message)) return true;
|
||||
|
||||
// HTTP 4xx client errors — our bug, NOT worker unavailability
|
||||
if (/failed:\s*4\d{2}/.test(message) || /status[:\s]+4\d{2}/.test(message)) return false;
|
||||
|
||||
// Programming errors — code bugs, not worker unavailability
|
||||
// Note: TypeError('fetch failed') already handled by transport patterns above
|
||||
if (error instanceof TypeError || error instanceof ReferenceError || error instanceof SyntaxError) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Default: treat unknown errors as blocking (conservative — surface bugs)
|
||||
return false;
|
||||
}
|
||||
|
||||
export async function hookCommand(platform: string, event: string, options: HookCommandOptions = {}): Promise<number> {
|
||||
try {
|
||||
const adapter = getPlatformAdapter(platform);
|
||||
@@ -26,9 +82,17 @@ export async function hookCommand(platform: string, event: string, options: Hook
|
||||
}
|
||||
return exitCode;
|
||||
} catch (error) {
|
||||
if (isWorkerUnavailableError(error)) {
|
||||
// Worker unavailable — degrade gracefully, don't block the user
|
||||
console.error(`[claude-mem] Worker unavailable, skipping hook: ${error instanceof Error ? error.message : error}`);
|
||||
if (!options.skipExit) {
|
||||
process.exit(HOOK_EXIT_CODES.SUCCESS); // = 0 (graceful)
|
||||
}
|
||||
return HOOK_EXIT_CODES.SUCCESS;
|
||||
}
|
||||
|
||||
// Handler/client bug — show as blocking error so developers see it
|
||||
console.error(`Hook error: ${error}`);
|
||||
// Use exit code 2 (blocking error) so users see the error message
|
||||
// Exit code 1 only shows in verbose mode per Claude Code docs
|
||||
if (!options.skipExit) {
|
||||
process.exit(HOOK_EXIT_CODES.BLOCKING_ERROR); // = 2
|
||||
}
|
||||
|
||||
@@ -77,7 +77,11 @@ export function removePidFile(): void {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get platform-adjusted timeout (Windows socket cleanup is slower)
|
||||
* Get platform-adjusted timeout for worker-side socket operations (2.0x on Windows).
|
||||
*
|
||||
* Note: Two platform multiplier functions exist intentionally:
|
||||
* - getTimeout() in hook-constants.ts uses 1.5x for hook-side operations (fast path)
|
||||
* - getPlatformTimeout() here uses 2.0x for worker-side socket operations (slower path)
|
||||
*/
|
||||
export function getPlatformTimeout(baseMs: number): number {
|
||||
const WINDOWS_MULTIPLIER = 2.0;
|
||||
@@ -380,7 +384,27 @@ export function spawnDaemon(
|
||||
}
|
||||
}
|
||||
|
||||
// Unix: standard detached spawn
|
||||
// Unix: Use setsid to create a new session, fully detaching from the
|
||||
// controlling terminal. This prevents SIGHUP from reaching the daemon
|
||||
// even if the in-process SIGHUP handler somehow fails (belt-and-suspenders).
|
||||
// Fall back to standard detached spawn if setsid is not available.
|
||||
const setsidPath = '/usr/bin/setsid';
|
||||
if (existsSync(setsidPath)) {
|
||||
const child = spawn(setsidPath, [process.execPath, scriptPath, '--daemon'], {
|
||||
detached: true,
|
||||
stdio: 'ignore',
|
||||
env
|
||||
});
|
||||
|
||||
if (child.pid === undefined) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
child.unref();
|
||||
return child.pid;
|
||||
}
|
||||
|
||||
// Fallback: standard detached spawn (macOS, systems without setsid)
|
||||
const child = spawn(process.execPath, [scriptPath, '--daemon'], {
|
||||
detached: true,
|
||||
stdio: 'ignore',
|
||||
@@ -396,6 +420,56 @@ export function spawnDaemon(
|
||||
return child.pid;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a process with the given PID is alive.
|
||||
*
|
||||
* Uses the process.kill(pid, 0) idiom: signal 0 doesn't send a signal,
|
||||
* it just checks if the process exists and is reachable.
|
||||
*
|
||||
* EPERM is treated as "alive" because it means the process exists but
|
||||
* belongs to a different user/session (common in multi-user setups).
|
||||
* PID 0 (Windows WMIC sentinel for unknown PID) is treated as alive.
|
||||
*/
|
||||
export function isProcessAlive(pid: number): boolean {
|
||||
// PID 0 is the Windows WMIC sentinel value — process was spawned but PID unknown
|
||||
if (pid === 0) return true;
|
||||
|
||||
// Invalid PIDs are not alive
|
||||
if (!Number.isInteger(pid) || pid < 0) return false;
|
||||
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return true;
|
||||
} catch (error: unknown) {
|
||||
const code = (error as NodeJS.ErrnoException).code;
|
||||
// EPERM = process exists but different user/session — treat as alive
|
||||
if (code === 'EPERM') return true;
|
||||
// ESRCH = no such process — it's dead
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the PID file and remove it if the recorded process is dead (stale).
|
||||
*
|
||||
* This is a cheap operation: one filesystem read + one signal-0 check.
|
||||
* Called at the top of ensureWorkerStarted() to clean up after WSL2
|
||||
* hibernate, OOM kills, or other ungraceful worker deaths.
|
||||
*/
|
||||
export function cleanStalePidFile(): void {
|
||||
const pidInfo = readPidFile();
|
||||
if (!pidInfo) return;
|
||||
|
||||
if (!isProcessAlive(pidInfo.pid)) {
|
||||
logger.info('SYSTEM', 'Removing stale PID file (worker process is dead)', {
|
||||
pid: pidInfo.pid,
|
||||
port: pidInfo.port,
|
||||
startedAt: pidInfo.startedAt
|
||||
});
|
||||
removePidFile();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create signal handler factory for graceful shutdown
|
||||
* Returns a handler function that can be passed to process.on('SIGTERM') etc.
|
||||
|
||||
@@ -14,6 +14,7 @@ import { existsSync, writeFileSync, unlinkSync, statSync } from 'fs';
|
||||
import { Client } from '@modelcontextprotocol/sdk/client/index.js';
|
||||
import { StdioClientTransport } from '@modelcontextprotocol/sdk/client/stdio.js';
|
||||
import { getWorkerPort, getWorkerHost } from '../shared/worker-utils.js';
|
||||
import { HOOK_TIMEOUTS } from '../shared/hook-constants.js';
|
||||
import { SettingsDefaultsManager } from '../shared/SettingsDefaultsManager.js';
|
||||
import { logger } from '../utils/logger.js';
|
||||
|
||||
@@ -66,6 +67,7 @@ import {
|
||||
removePidFile,
|
||||
getPlatformTimeout,
|
||||
cleanupOrphanedProcesses,
|
||||
cleanStalePidFile,
|
||||
spawnDaemon,
|
||||
createSignalHandler
|
||||
} from './infrastructure/ProcessManager.js';
|
||||
@@ -229,6 +231,22 @@ export class WorkerService {
|
||||
this.isShuttingDown = shutdownRef.value;
|
||||
handler('SIGINT');
|
||||
});
|
||||
|
||||
// SIGHUP: sent by kernel when controlling terminal closes.
|
||||
// Daemon mode: ignore it (survive parent shell exit).
|
||||
// Interactive mode: treat like SIGTERM (graceful shutdown).
|
||||
if (process.platform !== 'win32') {
|
||||
if (process.argv.includes('--daemon')) {
|
||||
process.on('SIGHUP', () => {
|
||||
logger.debug('SYSTEM', 'Ignoring SIGHUP in daemon mode');
|
||||
});
|
||||
} else {
|
||||
process.on('SIGHUP', () => {
|
||||
this.isShuttingDown = shutdownRef.value;
|
||||
handler('SIGHUP');
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -746,6 +764,9 @@ export class WorkerService {
|
||||
* @returns true if worker is healthy (existing or newly started), false on failure
|
||||
*/
|
||||
async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
// Clean stale PID file first (cheap: 1 fs read + 1 signal-0 check)
|
||||
cleanStalePidFile();
|
||||
|
||||
// Check if worker is already running and healthy
|
||||
if (await waitForHealth(port, 1000)) {
|
||||
const versionCheck = await checkVersionMatch(port);
|
||||
@@ -756,7 +777,7 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
});
|
||||
|
||||
await httpShutdown(port);
|
||||
const freed = await waitForPortFree(port, getPlatformTimeout(15000));
|
||||
const freed = await waitForPortFree(port, getPlatformTimeout(HOOK_TIMEOUTS.PORT_IN_USE_WAIT));
|
||||
if (!freed) {
|
||||
logger.error('SYSTEM', 'Port did not free up after shutdown for version mismatch restart', { port });
|
||||
return false;
|
||||
@@ -772,7 +793,7 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
const portInUse = await isPortInUse(port);
|
||||
if (portInUse) {
|
||||
logger.info('SYSTEM', 'Port in use, waiting for worker to become healthy');
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(15000));
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(HOOK_TIMEOUTS.PORT_IN_USE_WAIT));
|
||||
if (healthy) {
|
||||
logger.info('SYSTEM', 'Worker is now healthy');
|
||||
return true;
|
||||
@@ -799,7 +820,7 @@ async function ensureWorkerStarted(port: number): Promise<boolean> {
|
||||
// PID file is written by the worker itself after listen() succeeds
|
||||
// This is race-free and works correctly on Windows where cmd.exe PID is useless
|
||||
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(30000));
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(HOOK_TIMEOUTS.POST_SPAWN_WAIT));
|
||||
if (!healthy) {
|
||||
removePidFile();
|
||||
logger.error('SYSTEM', 'Worker failed to start (health check timeout)');
|
||||
@@ -871,7 +892,7 @@ async function main() {
|
||||
// PID file is written by the worker itself after listen() succeeds
|
||||
// This is race-free and works correctly on Windows where cmd.exe PID is useless
|
||||
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(30000));
|
||||
const healthy = await waitForHealth(port, getPlatformTimeout(HOOK_TIMEOUTS.POST_SPAWN_WAIT));
|
||||
if (!healthy) {
|
||||
removePidFile();
|
||||
logger.error('SYSTEM', 'Worker failed to restart');
|
||||
@@ -966,6 +987,18 @@ async function main() {
|
||||
|
||||
case '--daemon':
|
||||
default: {
|
||||
// Prevent daemon from dying silently on unhandled errors.
|
||||
// The HTTP server can continue serving even if a background task throws.
|
||||
process.on('unhandledRejection', (reason) => {
|
||||
logger.error('SYSTEM', 'Unhandled rejection in daemon', {
|
||||
reason: reason instanceof Error ? reason.message : String(reason)
|
||||
});
|
||||
});
|
||||
process.on('uncaughtException', (error) => {
|
||||
logger.error('SYSTEM', 'Uncaught exception in daemon', {}, error as Error);
|
||||
// Don't exit — keep the HTTP server running
|
||||
});
|
||||
|
||||
const worker = new WorkerService();
|
||||
worker.start().catch((error) => {
|
||||
logger.failure('SYSTEM', 'Worker failed to start', {}, error as Error);
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
export const HOOK_TIMEOUTS = {
|
||||
DEFAULT: 300000, // Standard HTTP timeout (5 min for slow systems)
|
||||
HEALTH_CHECK: 30000, // Worker health check (30s for slow systems)
|
||||
HEALTH_CHECK: 3000, // Worker health check (3s — healthy worker responds in <100ms)
|
||||
POST_SPAWN_WAIT: 5000, // Wait for daemon to start after spawn (starts in <1s on Linux)
|
||||
PORT_IN_USE_WAIT: 3000, // Wait when port occupied but health failing
|
||||
WORKER_STARTUP_WAIT: 1000,
|
||||
WORKER_STARTUP_RETRIES: 300,
|
||||
PRE_RESTART_SETTLE_DELAY: 2000, // Give files time to sync before restart
|
||||
POWERSHELL_COMMAND: 10000, // PowerShell process enumeration (10s - typically completes in <1s)
|
||||
WINDOWS_MULTIPLIER: 1.5 // Platform-specific adjustment
|
||||
WINDOWS_MULTIPLIER: 1.5 // Platform-specific adjustment for hook-side operations
|
||||
} as const;
|
||||
|
||||
/**
|
||||
|
||||
@@ -6,7 +6,21 @@ import { SettingsDefaultsManager } from "./SettingsDefaultsManager.js";
|
||||
import { MARKETPLACE_ROOT } from "./paths.js";
|
||||
|
||||
// Named constants for health checks
|
||||
const HEALTH_CHECK_TIMEOUT_MS = getTimeout(HOOK_TIMEOUTS.HEALTH_CHECK);
|
||||
// Allow env var override for users on slow systems (e.g., CLAUDE_MEM_HEALTH_TIMEOUT_MS=10000)
|
||||
const HEALTH_CHECK_TIMEOUT_MS = (() => {
|
||||
const envVal = process.env.CLAUDE_MEM_HEALTH_TIMEOUT_MS;
|
||||
if (envVal) {
|
||||
const parsed = parseInt(envVal, 10);
|
||||
if (Number.isFinite(parsed) && parsed >= 500 && parsed <= 300000) {
|
||||
return parsed;
|
||||
}
|
||||
// Invalid env var — log once and use default
|
||||
logger.warn('SYSTEM', 'Invalid CLAUDE_MEM_HEALTH_TIMEOUT_MS, using default', {
|
||||
value: envVal, min: 500, max: 300000
|
||||
});
|
||||
}
|
||||
return getTimeout(HOOK_TIMEOUTS.HEALTH_CHECK);
|
||||
})();
|
||||
|
||||
/**
|
||||
* Fetch with a timeout using Promise.race instead of AbortSignal.
|
||||
@@ -89,12 +103,22 @@ async function isWorkerHealthy(): Promise<boolean> {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the current plugin version from package.json
|
||||
* Get the current plugin version from package.json.
|
||||
* Returns 'unknown' on ENOENT/EBUSY (shutdown race condition, fix #1042).
|
||||
*/
|
||||
function getPluginVersion(): string {
|
||||
const packageJsonPath = path.join(MARKETPLACE_ROOT, 'package.json');
|
||||
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
||||
return packageJson.version;
|
||||
try {
|
||||
const packageJsonPath = path.join(MARKETPLACE_ROOT, 'package.json');
|
||||
const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8'));
|
||||
return packageJson.version;
|
||||
} catch (error: unknown) {
|
||||
const code = (error as NodeJS.ErrnoException).code;
|
||||
if (code === 'ENOENT' || code === 'EBUSY') {
|
||||
logger.debug('SYSTEM', 'Could not read plugin version (shutdown race)', { code });
|
||||
return 'unknown';
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -115,18 +139,33 @@ async function getWorkerVersion(): Promise<string> {
|
||||
/**
|
||||
* Check if worker version matches plugin version
|
||||
* Note: Auto-restart on version mismatch is now handled in worker-service.ts start command (issue #484)
|
||||
* This function logs for informational purposes only
|
||||
* This function logs for informational purposes only.
|
||||
* Skips comparison when either version is 'unknown' (fix #1042 — avoids restart loops).
|
||||
*/
|
||||
async function checkWorkerVersion(): Promise<void> {
|
||||
const pluginVersion = getPluginVersion();
|
||||
const workerVersion = await getWorkerVersion();
|
||||
try {
|
||||
const pluginVersion = getPluginVersion();
|
||||
|
||||
if (pluginVersion !== workerVersion) {
|
||||
// Just log debug info - auto-restart handles the mismatch in worker-service.ts
|
||||
logger.debug('SYSTEM', 'Version check', {
|
||||
pluginVersion,
|
||||
workerVersion,
|
||||
note: 'Mismatch will be auto-restarted by worker-service start command'
|
||||
// Skip version check if plugin version couldn't be read (shutdown race)
|
||||
if (pluginVersion === 'unknown') return;
|
||||
|
||||
const workerVersion = await getWorkerVersion();
|
||||
|
||||
// Skip version check if worker version is 'unknown' (avoids restart loops)
|
||||
if (workerVersion === 'unknown') return;
|
||||
|
||||
if (pluginVersion !== workerVersion) {
|
||||
// Just log debug info - auto-restart handles the mismatch in worker-service.ts
|
||||
logger.debug('SYSTEM', 'Version check', {
|
||||
pluginVersion,
|
||||
workerVersion,
|
||||
note: 'Mismatch will be auto-restarted by worker-service start command'
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
// Version check is informational — don't fail the hook
|
||||
logger.debug('SYSTEM', 'Version check failed', {
|
||||
error: error instanceof Error ? error.message : String(error)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
164
tests/hook-command.test.ts
Normal file
164
tests/hook-command.test.ts
Normal file
@@ -0,0 +1,164 @@
|
||||
/**
|
||||
* Tests for hook-command error classifier
|
||||
*
|
||||
* Validates that isWorkerUnavailableError correctly distinguishes between:
|
||||
* - Transport failures (ECONNREFUSED, etc.) → true (graceful degradation)
|
||||
* - Server errors (5xx) → true (graceful degradation)
|
||||
* - Client errors (4xx) → false (handler bug, blocking)
|
||||
* - Programming errors (TypeError, etc.) → false (code bug, blocking)
|
||||
*/
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { isWorkerUnavailableError } from '../src/cli/hook-command.js';
|
||||
|
||||
describe('isWorkerUnavailableError', () => {
|
||||
describe('transport failures → true (graceful)', () => {
|
||||
it('should classify ECONNREFUSED as worker unavailable', () => {
|
||||
const error = new Error('connect ECONNREFUSED 127.0.0.1:37777');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify ECONNRESET as worker unavailable', () => {
|
||||
const error = new Error('socket hang up ECONNRESET');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify EPIPE as worker unavailable', () => {
|
||||
const error = new Error('write EPIPE');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify ETIMEDOUT as worker unavailable', () => {
|
||||
const error = new Error('connect ETIMEDOUT 127.0.0.1:37777');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify "fetch failed" as worker unavailable', () => {
|
||||
const error = new TypeError('fetch failed');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify "Unable to connect" as worker unavailable', () => {
|
||||
const error = new Error('Unable to connect to server');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify ENOTFOUND as worker unavailable', () => {
|
||||
const error = new Error('getaddrinfo ENOTFOUND localhost');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify "socket hang up" as worker unavailable', () => {
|
||||
const error = new Error('socket hang up');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify ECONNABORTED as worker unavailable', () => {
|
||||
const error = new Error('ECONNABORTED');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('timeout errors → true (graceful)', () => {
|
||||
it('should classify "timed out" as worker unavailable', () => {
|
||||
const error = new Error('Request timed out after 3000ms');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify "timeout" as worker unavailable', () => {
|
||||
const error = new Error('Connection timeout');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('HTTP 5xx server errors → true (graceful)', () => {
|
||||
it('should classify 500 status as worker unavailable', () => {
|
||||
const error = new Error('Context generation failed: 500');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify 502 status as worker unavailable', () => {
|
||||
const error = new Error('Observation storage failed: 502');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify 503 status as worker unavailable', () => {
|
||||
const error = new Error('Request failed: 503');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify "status: 500" format as worker unavailable', () => {
|
||||
const error = new Error('HTTP error status: 500');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('HTTP 429 rate limit → true (graceful)', () => {
|
||||
it('should classify 429 as worker unavailable (rate limit is transient)', () => {
|
||||
const error = new Error('Request failed: 429');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
|
||||
it('should classify "status: 429" format as worker unavailable', () => {
|
||||
const error = new Error('HTTP error status: 429');
|
||||
expect(isWorkerUnavailableError(error)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('HTTP 4xx client errors → false (blocking)', () => {
|
||||
it('should NOT classify 400 Bad Request as worker unavailable', () => {
|
||||
const error = new Error('Request failed: 400');
|
||||
expect(isWorkerUnavailableError(error)).toBe(false);
|
||||
});
|
||||
|
||||
it('should NOT classify 404 Not Found as worker unavailable', () => {
|
||||
const error = new Error('Observation storage failed: 404');
|
||||
expect(isWorkerUnavailableError(error)).toBe(false);
|
||||
});
|
||||
|
||||
it('should NOT classify 422 Validation Error as worker unavailable', () => {
|
||||
const error = new Error('Request failed: 422');
|
||||
expect(isWorkerUnavailableError(error)).toBe(false);
|
||||
});
|
||||
|
||||
it('should NOT classify "status: 400" format as worker unavailable', () => {
|
||||
const error = new Error('HTTP error status: 400');
|
||||
expect(isWorkerUnavailableError(error)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('programming errors → false (blocking)', () => {
|
||||
it('should NOT classify TypeError as worker unavailable', () => {
|
||||
const error = new TypeError('Cannot read properties of undefined');
|
||||
// Note: TypeError with "fetch failed" IS classified as unavailable (transport layer)
|
||||
// But generic TypeErrors are NOT
|
||||
expect(isWorkerUnavailableError(new TypeError('Cannot read properties of undefined'))).toBe(false);
|
||||
});
|
||||
|
||||
it('should NOT classify ReferenceError as worker unavailable', () => {
|
||||
const error = new ReferenceError('foo is not defined');
|
||||
expect(isWorkerUnavailableError(error)).toBe(false);
|
||||
});
|
||||
|
||||
it('should NOT classify SyntaxError as worker unavailable', () => {
|
||||
const error = new SyntaxError('Unexpected token');
|
||||
expect(isWorkerUnavailableError(error)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('unknown errors → false (blocking, conservative)', () => {
|
||||
it('should NOT classify generic Error as worker unavailable', () => {
|
||||
const error = new Error('Something unexpected happened');
|
||||
expect(isWorkerUnavailableError(error)).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle string errors', () => {
|
||||
expect(isWorkerUnavailableError('ECONNREFUSED')).toBe(true);
|
||||
expect(isWorkerUnavailableError('random error')).toBe(false);
|
||||
});
|
||||
|
||||
it('should handle null/undefined errors', () => {
|
||||
expect(isWorkerUnavailableError(null)).toBe(false);
|
||||
expect(isWorkerUnavailableError(undefined)).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -28,18 +28,22 @@ describe('hook-constants', () => {
|
||||
expect(HOOK_TIMEOUTS.DEFAULT).toBe(300000);
|
||||
});
|
||||
|
||||
it('should define HEALTH_CHECK timeout', () => {
|
||||
expect(HOOK_TIMEOUTS.HEALTH_CHECK).toBe(30000);
|
||||
it('should define HEALTH_CHECK timeout as 3s (reduced from 30s)', () => {
|
||||
expect(HOOK_TIMEOUTS.HEALTH_CHECK).toBe(3000);
|
||||
});
|
||||
|
||||
it('should define POST_SPAWN_WAIT as 5s', () => {
|
||||
expect(HOOK_TIMEOUTS.POST_SPAWN_WAIT).toBe(5000);
|
||||
});
|
||||
|
||||
it('should define PORT_IN_USE_WAIT as 3s', () => {
|
||||
expect(HOOK_TIMEOUTS.PORT_IN_USE_WAIT).toBe(3000);
|
||||
});
|
||||
|
||||
it('should define WORKER_STARTUP_WAIT', () => {
|
||||
expect(HOOK_TIMEOUTS.WORKER_STARTUP_WAIT).toBe(1000);
|
||||
});
|
||||
|
||||
it('should define WORKER_STARTUP_RETRIES', () => {
|
||||
expect(HOOK_TIMEOUTS.WORKER_STARTUP_RETRIES).toBe(300);
|
||||
});
|
||||
|
||||
it('should define PRE_RESTART_SETTLE_DELAY', () => {
|
||||
expect(HOOK_TIMEOUTS.PRE_RESTART_SETTLE_DELAY).toBe(2000);
|
||||
});
|
||||
|
||||
@@ -8,6 +8,9 @@ import {
|
||||
removePidFile,
|
||||
getPlatformTimeout,
|
||||
parseElapsedTime,
|
||||
isProcessAlive,
|
||||
cleanStalePidFile,
|
||||
spawnDaemon,
|
||||
type PidInfo
|
||||
} from '../../src/services/infrastructure/index.js';
|
||||
|
||||
@@ -221,4 +224,138 @@ describe('ProcessManager', () => {
|
||||
expect(result).toBe(666);
|
||||
});
|
||||
});
|
||||
|
||||
describe('isProcessAlive', () => {
|
||||
it('should return true for the current process', () => {
|
||||
expect(isProcessAlive(process.pid)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false for a non-existent PID', () => {
|
||||
// Use a very high PID that's extremely unlikely to exist
|
||||
expect(isProcessAlive(2147483647)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return true for PID 0 (Windows WMIC sentinel)', () => {
|
||||
expect(isProcessAlive(0)).toBe(true);
|
||||
});
|
||||
|
||||
it('should return false for negative PIDs', () => {
|
||||
expect(isProcessAlive(-1)).toBe(false);
|
||||
expect(isProcessAlive(-999)).toBe(false);
|
||||
});
|
||||
|
||||
it('should return false for non-integer PIDs', () => {
|
||||
expect(isProcessAlive(1.5)).toBe(false);
|
||||
expect(isProcessAlive(NaN)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('cleanStalePidFile', () => {
|
||||
it('should remove PID file when process is dead', () => {
|
||||
// Write a PID file with a non-existent PID
|
||||
const staleInfo: PidInfo = {
|
||||
pid: 2147483647,
|
||||
port: 37777,
|
||||
startedAt: '2024-01-01T00:00:00.000Z'
|
||||
};
|
||||
writePidFile(staleInfo);
|
||||
expect(existsSync(PID_FILE)).toBe(true);
|
||||
|
||||
cleanStalePidFile();
|
||||
|
||||
expect(existsSync(PID_FILE)).toBe(false);
|
||||
});
|
||||
|
||||
it('should keep PID file when process is alive', () => {
|
||||
// Write a PID file with the current process PID (definitely alive)
|
||||
const liveInfo: PidInfo = {
|
||||
pid: process.pid,
|
||||
port: 37777,
|
||||
startedAt: new Date().toISOString()
|
||||
};
|
||||
writePidFile(liveInfo);
|
||||
|
||||
cleanStalePidFile();
|
||||
|
||||
// PID file should still exist since process.pid is alive
|
||||
expect(existsSync(PID_FILE)).toBe(true);
|
||||
});
|
||||
|
||||
it('should do nothing when PID file does not exist', () => {
|
||||
removePidFile();
|
||||
expect(existsSync(PID_FILE)).toBe(false);
|
||||
|
||||
// Should not throw
|
||||
expect(() => cleanStalePidFile()).not.toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('spawnDaemon', () => {
|
||||
it('should use setsid on Linux when available', () => {
|
||||
// setsid should exist at /usr/bin/setsid on Linux
|
||||
if (process.platform === 'win32') return; // Skip on Windows
|
||||
|
||||
const setsidAvailable = existsSync('/usr/bin/setsid');
|
||||
if (!setsidAvailable) return; // Skip if setsid not installed
|
||||
|
||||
// Spawn a daemon with a non-existent script (it will fail to start, but we can verify the spawn attempt)
|
||||
// Use a harmless script path — the child will exit immediately
|
||||
const pid = spawnDaemon('/dev/null', 39999);
|
||||
|
||||
// setsid spawn should return a PID (the setsid process itself)
|
||||
expect(pid).toBeDefined();
|
||||
expect(typeof pid).toBe('number');
|
||||
|
||||
// Clean up: kill the spawned process if it's still alive
|
||||
if (pid !== undefined && pid > 0) {
|
||||
try { process.kill(pid, 'SIGKILL'); } catch { /* already exited */ }
|
||||
}
|
||||
});
|
||||
|
||||
it('should return undefined when spawn fails on Windows path', () => {
|
||||
// On non-Windows, this tests the Unix path which should succeed
|
||||
// The function should not throw, only return undefined on failure
|
||||
if (process.platform === 'win32') return;
|
||||
|
||||
// Spawning with a totally invalid script should still return a PID
|
||||
// (setsid/spawn succeeds even if the child will exit immediately)
|
||||
const result = spawnDaemon('/nonexistent/script.cjs', 39998);
|
||||
// spawn itself should succeed (returns PID), even if child exits
|
||||
expect(result).toBeDefined();
|
||||
|
||||
// Clean up
|
||||
if (result !== undefined && result > 0) {
|
||||
try { process.kill(result, 'SIGKILL'); } catch { /* already exited */ }
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
describe('SIGHUP handling', () => {
|
||||
it('should have SIGHUP listeners registered (integration check)', () => {
|
||||
// Verify that SIGHUP listener registration is possible on Unix
|
||||
if (process.platform === 'win32') return;
|
||||
|
||||
// Register a test handler, verify it works, then remove it
|
||||
let received = false;
|
||||
const testHandler = () => { received = true; };
|
||||
|
||||
process.on('SIGHUP', testHandler);
|
||||
expect(process.listenerCount('SIGHUP')).toBeGreaterThanOrEqual(1);
|
||||
|
||||
// Clean up the test handler
|
||||
process.removeListener('SIGHUP', testHandler);
|
||||
});
|
||||
|
||||
it('should ignore SIGHUP when --daemon is in process.argv', () => {
|
||||
if (process.platform === 'win32') return;
|
||||
|
||||
// Simulate the daemon SIGHUP handler logic
|
||||
const isDaemon = process.argv.includes('--daemon');
|
||||
// In test context, --daemon is not in argv, so this tests the branch logic
|
||||
expect(isDaemon).toBe(false);
|
||||
|
||||
// Verify the non-daemon path: SIGHUP should trigger shutdown (covered by registerSignalHandlers)
|
||||
// This is a logic verification test — actual signal delivery is tested manually
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user