perf: streamline worker startup and consolidate database connections

1. Database Pooling: Modified DatabaseManager, SessionStore, and SessionSearch to share a single bun:sqlite connection, eliminating redundant file descriptors.
2. Non-blocking Startup: Refactored WorktreeAdoption and Chroma backfill to run in the background (fire-and-forget), preventing them from stalling core initialization.
3. Diagnostic Routes: Added /api/chroma/status and bypassed the initialization guard for health/readiness endpoints to allow diagnostics during startup.
4. Robust Search: Implemented reliable SQLite FTS5 fallback in SearchManager for when Chroma (uvx) fails or is unavailable.
5. Code Cleanup: Removed redundant loopback MCP checks and mangled initialization logic from WorkerService.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Alex Newman
2026-04-24 17:34:32 -07:00
parent 43a7ab8964
commit dfbff9cd8c
9 changed files with 422 additions and 222 deletions

156
chroma-flowcharts.md Normal file
View File

@@ -0,0 +1,156 @@
# Chroma System Flowcharts
## AS BUILT
```mermaid
flowchart TD
subgraph Boot["Worker Boot (worker-service.ts:428-509)"]
B1["worker-service start"] --> B2{"CLAUDE_MEM_CHROMA_ENABLED?"}
B2 -- no --> B3["skip Chroma init"]
B2 -- yes --> B4["ChromaMcpManager.getInstance() (no connect)"]
B4 --> B5["dbManager.initialize()"]
B5 --> B6["new ChromaSync('claude-mem') -> cm__claude-mem"]
B6 --> B7["SearchOrchestrator + CorpusBuilder receive shared instance"]
B7 --> B8["mark init complete"]
B8 --> B9["fire-and-forget backfillAllProjects()"]
end
subgraph Producers["Write Call Sites"]
P1["ResponseProcessor.syncAndBroadcastObservations"]
P2["ResponseProcessor.syncAndBroadcastSummary"]
P3["SessionRoutes UserPromptSubmit"]
P4["MemoryRoutes POST /api/memory/save"]
P5["DataRoutes manual import (awaited)"]
P6["WorktreeAdoption updateMergedIntoProject (awaited)"]
end
P1 & P2 & P3 & P4 & P5 & P6 --> GW["dbManager.getChromaSync()?"]
subgraph SyncLayer["ChromaSync.ts (sync layer)"]
GW --> FMT["format*Docs (explode obs->narrative/text/fact_i; summary->6 fields; prompt->1)"]
FMT --> META["attach metadata (sqlite_id, doc_type, project, field_type, fact_index, ...)"]
META --> SAN["sanitize null/empty metadata"]
SAN --> ADD["addDocuments (batch 100)"]
ADD --> DUP{"already exists?"}
DUP -- yes --> REC["delete-then-add reconcile"]
DUP -- no --> CALL["callTool('chroma_add_documents')"]
REC --> CALL
UMP["updateMergedIntoProject (rewrite metadata)"] --> CALL
P6 --> UMP
end
subgraph Backfill["Backfill Loop (startup, per project)"]
BF1["backfillAllProjects"] --> BF2["for each project"]
BF2 --> BF3["getExistingChromaIds (paged 1000)"]
BF3 --> BF4["diff vs SQLite sqlite_ids"]
BF4 --> BF5["batch-add missing"]
BF5 --> ADD
B9 --> BF1
end
subgraph MCP["ChromaMcpManager (process layer)"]
CALL --> LOCK{"connecting lock / connected?"}
LOCK -- not connected --> SPAWN["lazy connect"]
SPAWN --> OS{"platform"}
OS -- Windows --> WIN["cmd.exe /c uvx chroma-mcp"]
OS -- macOS --> MAC["build Zscaler-merged CA bundle + 4 SSL env vars"]
OS -- Linux --> LIN["uvx chroma-mcp"]
WIN & MAC & LIN --> MODE{"mode"}
MODE -- local --> ML["--client-type persistent --data-dir ~/.claude-mem/chroma"]
MODE -- remote --> MR["--client-type http --host --port [--ssl --tenant --database --api-key]"]
ML & MR --> SPN["spawn subprocess (cwd=os.homedir())"]
SPN --> SUP["register with supervisor"]
SPN --> STDIO["MCP over stdio (30s timeout)"]
STDIO --> ONCLOSE["transport.onclose -> stale-handler guard, flip state"]
ONCLOSE --> BACKOFF["10s reconnect backoff"]
LOCK -- connected --> SEND["send tool call"]
STDIO --> SEND
SEND --> RETRY{"transport error?"}
RETRY -- yes --> ONCE["single retry"]
RETRY -- no --> OK["return result"]
ONCE --> SEND
end
subgraph Subproc["uvx chroma-mcp subprocess"]
SEND --> CMP["chroma-mcp server"]
CMP --> STORE[("~/.claude-mem/chroma/")]
end
subgraph Read["Read Path"]
H1["HTTP GET /search"] --> H2["SearchManager"]
H2 --> H3["SearchOrchestrator.executeWithFallback"]
H3 --> DT{"decision tree"}
DT -- "no query" --> S1["SQLiteSearchStrategy"]
DT -- "query + chroma" --> S2["ChromaSearchStrategy"]
DT -- "concept/file/type + query" --> S3["HybridSearchStrategy"]
S2 --> WF["buildWhereFilter(searchType, project)"]
WF --> QC["queryChroma -> chroma_query_documents"]
S3 --> SQF["SQLite filter"] --> CR["Chroma rank"] --> INTX["intersection"] --> QC
QC --> CALL
OK --> ERRC{"connection error string match? ECONNREFUSED|ENOTFOUND|fetch failed|subprocess closed|timed out"}
ERRC -- yes --> RST["reset collectionCreated + wrap ChromaUnavailableError -> HTTP 503"]
ERRC -- no --> DEDUP["deduplicateQueryResults (parse doc IDs -> sqlite_ids)"]
DEDUP --> RECF["filterByRecency (90 days)"]
RECF --> CAT["categorizeByDocType"]
CAT --> HYD["SessionStore hydrate by ID"]
S1 --> HYD
HYD --> RESP["HTTP response"]
end
subgraph Shutdown["GracefulShutdown.performGracefulShutdown"]
SD1["HTTP server close"] --> SD2["SessionManager flush"]
SD2 --> SD3["close loopback MCP client"]
SD3 --> SD4["ChromaMcpManager.stop() SIGTERM/SIGKILL"]
SD4 --> SD5["dbManager.close() (ChromaSync.close = no-op log)"]
SD5 --> SD6["supervisor reaps remaining children"]
end
```
## MINIMAL PATH
**Removed:**
- **Granular per-field doc explosion** — one concatenated doc per observation/summary preserves recall with ~6× fewer vectors and no fact_index/field_type bookkeeping.
- **`field_type` metadata** — never used as a semantic filter; `sqlite_id` already covers hydration.
- **Shared collection + project filter** — per-project collections give cheaper queries and remove the `merged_into_project` rewrite path entirely.
- **`WorktreeAdoption.updateMergedIntoProject`** — dies with the shared-collection model.
- **Backfill on startup** — if writes are awaited and idempotent (upsert), the diff-and-fill loop is dead weight.
- **Dup-reconcile delete+add** — replaced by `upsert` which is one round trip and naturally idempotent.
- **HybridSearchStrategy** — SQLite filter + Chroma rank intersection is a small win for a lot of code; plain Chroma with `where` covers it.
- **90-day recency filter** — not core to "query semantically"; push to caller if needed.
- **MCP-stdio indirection** — chromadb persistent client in-process removes subprocess, supervisor registration, Windows `cmd` shim, Zscaler cert bundle, reconnect backoff, connecting lock, transport retry, and `onclose` stale-handler logic.
- **Singleton + connection-lock + backoff machinery** — gone with the subprocess.
- **Zscaler bundle, Windows `cmd.exe` shim, supervisor registration** — only exist to feed/reap the subprocess.
- **Six write call sites** — collapse to a single ingress; removes the `dbManager.getChromaSync()?` null-dance everywhere.
- **Fire-and-forget vs awaited split** — one awaited path with a bounded queue; failures log and drop, no silent divergence between SQLite and vector store.
```mermaid
flowchart TD
subgraph Boot["Boot"]
B1["worker start"] --> B2{"CHROMA_ENABLED?"}
B2 -- no --> B3["skip"]
B2 -- yes --> B4["new ChromaStore() -> in-process chromadb persistent client"]
B4 --> B5["open ~/.claude-mem/chroma/"]
end
subgraph Ingress["Single Write Ingress"]
P["producers (observations, summaries, prompts)"] --> ING["ChromaStore.ingest(doc, metadata)"]
ING --> ONE["one concatenated doc per item"]
ONE --> META["metadata: sqlite_id, doc_type, created_at_epoch"]
META --> UP["collection.upsert (idempotent)"]
UP --> COL[("per-project collection")]
end
subgraph Read["Read"]
Q1["HTTP GET /search"] --> Q2["ChromaStore.query(text, where)"]
Q2 --> COL
COL --> Q3["results -> sqlite_ids"]
Q3 --> Q4["SessionStore hydrate"]
Q4 --> Q5["HTTP response"]
end
subgraph Shutdown["Shutdown"]
SD1["HTTP server close"] --> SD2["ChromaStore.close() (flush persistent client)"]
end
B5 -.-> COL
```

View File

@@ -48,7 +48,7 @@ interface WorktreeEntry {
branch: string | null;
}
const GIT_TIMEOUT_MS = 5000;
const GIT_TIMEOUT_MS = 15000;
class DryRunRollback extends Error {
constructor() {
@@ -58,11 +58,31 @@ class DryRunRollback extends Error {
}
function gitCapture(cwd: string, args: string[]): string | null {
const startTime = Date.now();
const r = spawnSync('git', ['-C', cwd, ...args], {
encoding: 'utf8',
timeout: GIT_TIMEOUT_MS
});
if (r.status !== 0) return null;
const duration = Date.now() - startTime;
if (duration > 1000) {
logger.debug('GIT', `Slow git operation: git -C ${cwd} ${args.join(' ')} took ${duration}ms`);
}
if (r.error) {
logger.warn('GIT', `Git operation failed: git -C ${cwd} ${args.join(' ')}`, {
error: r.error.message,
timedOut: r.error.name === 'ETIMEDOUT' || (r.status === null && r.signal === 'SIGTERM')
});
return null;
}
if (r.status !== 0) {
logger.debug('GIT', `Git returned non-zero exit code ${r.status}: git -C ${cwd} ${args.join(' ')}`, {
stderr: r.stderr?.toString().trim()
});
return null;
}
return (r.stdout ?? '').trim();
}

View File

@@ -25,13 +25,14 @@ export class SessionSearch {
private static readonly MISSING_SEARCH_INPUT_MESSAGE = 'Either query or filters required for search';
constructor(dbPath?: string) {
if (!dbPath) {
constructor(dbPathOrDb: string | Database = DB_PATH) {
if (dbPathOrDb instanceof Database) {
this.db = dbPathOrDb;
} else {
ensureDir(DATA_DIR);
dbPath = DB_PATH;
this.db = new Database(dbPathOrDb);
this.db.run('PRAGMA journal_mode = WAL');
}
this.db = new Database(dbPath);
this.db.run('PRAGMA journal_mode = WAL');
// Cache FTS5 availability once at construction (avoids DDL probe on every query)
this._fts5Available = this.isFts5Available();

View File

@@ -35,17 +35,21 @@ function resolveCreateSessionArgs(
export class SessionStore {
public db: Database;
constructor(dbPath: string = DB_PATH) {
if (dbPath !== ':memory:') {
ensureDir(DATA_DIR);
}
this.db = new Database(dbPath);
constructor(dbPathOrDb: string | Database = DB_PATH) {
if (dbPathOrDb instanceof Database) {
this.db = dbPathOrDb;
} else {
if (dbPathOrDb !== ':memory:') {
ensureDir(DATA_DIR);
}
this.db = new Database(dbPathOrDb);
// Ensure optimized settings
this.db.run('PRAGMA journal_mode = WAL');
this.db.run('PRAGMA synchronous = NORMAL');
this.db.run('PRAGMA foreign_keys = ON');
this.db.run('PRAGMA journal_size_limit = 4194304'); // 4MB WAL cap (#1956)
// Ensure optimized settings only for new connections
this.db.run('PRAGMA journal_mode = WAL');
this.db.run('PRAGMA synchronous = NORMAL');
this.db.run('PRAGMA foreign_keys = ON');
this.db.run('PRAGMA journal_size_limit = 4194304'); // 4MB WAL cap (#1956)
}
// Initialize schema if needed (fresh database)
this.initializeSchema();

View File

@@ -549,9 +549,10 @@ export class ChromaSync {
* Reads from SQLite and syncs in batches
* @param projectOverride - If provided, backfill this project instead of this.project.
* Used by backfillAllProjects() to iterate projects without mutating instance state.
* @param storeOverride - If provided, use this SessionStore instead of creating a new one.
* Throws error if backfill fails
*/
async ensureBackfilled(projectOverride?: string): Promise<void> {
async ensureBackfilled(projectOverride?: string, storeOverride?: SessionStore): Promise<void> {
const backfillProject = projectOverride ?? this.project;
logger.info('CHROMA_SYNC', 'Starting smart backfill', { project: backfillProject });
@@ -560,7 +561,7 @@ export class ChromaSync {
// Fetch existing IDs from Chroma (fast, metadata only)
const existing = await this.getExistingChromaIds(backfillProject);
const db = new SessionStore();
const db = storeOverride ?? new SessionStore();
try {
await this.runBackfillPipeline(db, backfillProject, existing);
@@ -568,7 +569,10 @@ export class ChromaSync {
logger.error('CHROMA_SYNC', 'Backfill failed', { project: backfillProject }, error instanceof Error ? error : new Error(String(error)));
throw new Error(`Backfill failed: ${error instanceof Error ? error.message : String(error)}`);
} finally {
db.close();
// Only close if we created it
if (!storeOverride) {
db.close();
}
}
}
@@ -861,8 +865,8 @@ export class ChromaSync {
* with project scoped via metadata, matching how DatabaseManager and SearchManager operate.
* Designed to be called fire-and-forget on worker startup.
*/
static async backfillAllProjects(): Promise<void> {
const db = new SessionStore();
static async backfillAllProjects(storeOverride?: SessionStore): Promise<void> {
const db = storeOverride ?? new SessionStore();
const sync = new ChromaSync('claude-mem');
try {
const projects = db.db.prepare(
@@ -873,7 +877,7 @@ export class ChromaSync {
for (const { project } of projects) {
try {
await sync.ensureBackfilled(project);
await sync.ensureBackfilled(project, db);
} catch (error) {
if (error instanceof Error) {
logger.error('CHROMA_SYNC', `Backfill failed for project: ${project}`, {}, error);
@@ -885,7 +889,10 @@ export class ChromaSync {
}
} finally {
await sync.close();
db.close();
// Only close if we created it
if (!storeOverride) {
db.close();
}
}
}

View File

@@ -102,6 +102,7 @@ import { SettingsRoutes } from './worker/http/routes/SettingsRoutes.js';
import { LogsRoutes } from './worker/http/routes/LogsRoutes.js';
import { MemoryRoutes } from './worker/http/routes/MemoryRoutes.js';
import { CorpusRoutes } from './worker/http/routes/CorpusRoutes.js';
import { ChromaRoutes } from './worker/http/routes/ChromaRoutes.js';
// Knowledge agent services
import { CorpusStore } from './worker/knowledge/CorpusStore.js';
@@ -275,6 +276,9 @@ export class WorkerService implements WorkerRef {
private registerRoutes(): void {
// IMPORTANT: Middleware must be registered BEFORE routes (Express processes in order)
// Register Chroma routes immediately so they bypass the initialization guard
this.server.registerRoutes(new ChromaRoutes());
// Early handler for /api/context/inject — fail open if not yet initialized
this.server.app.get('/api/context/inject', async (req, res, next) => {
if (!this.initializationCompleteFlag || !this.searchRoutes) {
@@ -288,14 +292,20 @@ export class WorkerService implements WorkerRef {
// Guard ALL /api/* routes during initialization — wait for DB with timeout
// Exceptions: /api/health, /api/readiness, /api/version (handled by Server.ts core routes)
// and /api/context/inject (handled above with fail-open)
// and /api/chroma/status (diagnostic endpoint)
this.server.app.use('/api', async (req, res, next) => {
// Bypass guard for diagnostic endpoints
if (req.path === '/chroma/status' || req.path === '/health' || req.path === '/readiness' || req.path === '/version') {
next();
return;
}
if (this.initializationCompleteFlag) {
next();
return;
}
const timeoutMs = 30000;
const timeoutMs = 120000; // 2 minutes
const timeoutPromise = new Promise<void>((_, reject) =>
setTimeout(() => reject(new Error('Database initialization timeout')), timeoutMs)
);
@@ -374,6 +384,7 @@ export class WorkerService implements WorkerRef {
*/
private async initializeBackground(): Promise<void> {
try {
logger.info('WORKER', 'Background initialization starting...');
await aggressiveStartupCleanup();
// Load mode configuration
@@ -383,47 +394,39 @@ export class WorkerService implements WorkerRef {
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
const modeId = settings.CLAUDE_MEM_MODE;
ModeManager.getInstance().loadMode(modeId);
logger.info('SYSTEM', `Mode loaded: ${modeId}`);
// One-time chroma wipe for users upgrading from versions with duplicate worker bugs.
// Only runs in local mode (chroma is local-only). Backfill at line ~414 rebuilds from SQLite.
if (settings.CLAUDE_MEM_MODE === 'local' || !settings.CLAUDE_MEM_MODE) {
logger.info('WORKER', 'Checking for one-time Chroma migration...');
runOneTimeChromaMigration();
}
// One-time remap of pre-worktree project names using pending_messages.cwd.
// Must run before dbManager.initialize() so we don't hold the DB open.
logger.info('WORKER', 'Checking for one-time CWD remap...');
runOneTimeCwdRemap();
// Stamp merged worktrees so their observations surface under the parent
// project. Runs every startup (not marker-gated) because git state evolves
// and the engine is fully idempotent. Must also precede dbManager.initialize().
//
// The worker daemon is spawned with cwd=marketplace-plugin-dir (not a git
// repo), so we can't seed adoption with process.cwd(). Instead, discover
// parent repos from recorded pending_messages.cwd values.
let adoptions: Awaited<ReturnType<typeof adoptMergedWorktreesForAllKnownRepos>> | null = null;
try {
adoptions = await adoptMergedWorktreesForAllKnownRepos({});
} catch (err) {
// [ANTI-PATTERN IGNORED]: Worktree adoption is best-effort on startup; failure must not block worker initialization
if (err instanceof Error) {
logger.error('WORKER', 'Worktree adoption failed (non-fatal)', {}, err);
} else {
logger.error('WORKER', 'Worktree adoption failed (non-fatal) with non-Error', {}, new Error(String(err)));
}
}
if (adoptions) {
for (const adoption of adoptions) {
if (adoption.adoptedObservations > 0 || adoption.adoptedSummaries > 0 || adoption.chromaUpdates > 0) {
logger.info('SYSTEM', 'Merged worktrees adopted on startup', adoption);
}
if (adoption.errors.length > 0) {
logger.warn('SYSTEM', 'Worktree adoption had per-branch errors', {
repoPath: adoption.repoPath,
errors: adoption.errors
});
// Stamp merged worktrees (Non-blocking, fire-and-forget)
logger.info('WORKER', 'Adopting merged worktrees (background)...');
adoptMergedWorktreesForAllKnownRepos({}).then(adoptions => {
if (adoptions) {
for (const adoption of adoptions) {
if (adoption.adoptedObservations > 0 || adoption.adoptedSummaries > 0 || adoption.chromaUpdates > 0) {
logger.info('SYSTEM', 'Merged worktrees adopted in background', adoption);
}
if (adoption.errors.length > 0) {
logger.warn('SYSTEM', 'Worktree adoption had per-branch errors', {
repoPath: adoption.repoPath,
errors: adoption.errors
});
}
}
}
}
}).catch(err => {
logger.error('WORKER', 'Worktree adoption failed (background)', {}, err instanceof Error ? err : new Error(String(err)));
});
// Initialize ChromaMcpManager only if Chroma is enabled
const chromaEnabled = settings.CLAUDE_MEM_CHROMA_ENABLED !== 'false';
@@ -434,22 +437,12 @@ export class WorkerService implements WorkerRef {
logger.info('SYSTEM', 'Chroma disabled via CLAUDE_MEM_CHROMA_ENABLED=false, skipping ChromaMcpManager');
}
const modeId = settings.CLAUDE_MEM_MODE;
ModeManager.getInstance().loadMode(modeId);
logger.info('SYSTEM', `Mode loaded: ${modeId}`);
logger.info('WORKER', 'Initializing database manager...');
await this.dbManager.initialize();
// No startup recovery sweep: claimNextMessage's self-healing predicate
// (worker_pid NOT IN live_worker_pids) reclaims any 'processing' rows
// left by a previous worker incarnation on the next claim. See
// PATHFINDER-2026-04-22 Plan 01 Phase 3.
// One-shot GC for terminally-failed rows so pending_messages does not
// grow unbounded on long-running or high-failure-rate installations.
// Not a reaper — runs once per worker start. 7 days retains enough
// history for operator inspection without degrading claim latency.
// One-shot GC for terminally-failed rows
try {
logger.info('WORKER', 'Running startup GC for pending messages...');
const { PendingMessageStore } = await import('./sqlite/PendingMessageStore.js');
const pendingStore = new PendingMessageStore(this.dbManager.getSessionStore().db, 3);
const cleared = pendingStore.clearFailedOlderThan(7 * 24 * 60 * 60 * 1000);
@@ -461,6 +454,7 @@ export class WorkerService implements WorkerRef {
}
// Initialize search services
logger.info('WORKER', 'Initializing search services...');
const formattingService = new FormattingService();
const timelineService = new TimelineService();
const searchManager = new SearchManager(
@@ -491,8 +485,6 @@ export class WorkerService implements WorkerRef {
logger.info('WORKER', 'CorpusRoutes registered');
// DB and search are ready — mark initialization complete so hooks can proceed.
// MCP connection is tracked separately via mcpReady and is NOT required for
// the worker to serve context/search requests.
this.initializationCompleteFlag = true;
this.resolveInitialization();
logger.info('SYSTEM', 'Core initialization complete (DB + search ready)');
@@ -501,7 +493,7 @@ export class WorkerService implements WorkerRef {
// Auto-backfill Chroma for all projects if out of sync with SQLite (fire-and-forget)
if (this.chromaMcpManager) {
ChromaSync.backfillAllProjects().then(() => {
ChromaSync.backfillAllProjects(this.dbManager.getSessionStore()).then(() => {
logger.info('CHROMA_SYNC', 'Backfill check complete for all projects');
}).catch(error => {
logger.error('CHROMA_SYNC', 'Backfill failed (non-blocking)', {}, error as Error);
@@ -509,92 +501,55 @@ export class WorkerService implements WorkerRef {
}
// Mark MCP as externally ready once the bundled stdio server binary exists.
// Codex/Claude Desktop connect to this binary directly; the loopback client
// below is only a best-effort self-check and should not mark health false.
const mcpServerPath = path.join(__dirname, 'mcp-server.cjs');
this.mcpReady = existsSync(mcpServerPath);
// Best-effort loopback MCP self-check
// Best-effort loopback MCP self-check (Non-blocking, F&F)
this.runMcpSelfCheck(mcpServerPath).catch(err => {
logger.debug('WORKER', 'MCP self-check failed (non-fatal)', { error: err.message });
});
return;
} catch (error) {
// Background initialization failed - log and let worker fail health checks
logger.error('SYSTEM', 'Background initialization failed', {}, error instanceof Error ? error : undefined);
}
}
/**
* Run a best-effort loopback MCP self-check to verify the bundled server can start.
* This is entirely diagnostic and does not block worker availability.
*/
private async runMcpSelfCheck(mcpServerPath: string): Promise<void> {
try {
getSupervisor().assertCanSpawn('mcp server');
const transport = new StdioClientTransport({
command: process.execPath, // Use resolved path, not bare 'node' which fails on non-interactive PATH (#1876)
command: process.execPath,
args: [mcpServerPath],
env: Object.fromEntries(
Object.entries(sanitizeEnv(process.env)).filter(([, value]) => value !== undefined)
) as Record<string, string>
});
const MCP_INIT_TIMEOUT_MS = 300000;
const MCP_INIT_TIMEOUT_MS = 60000; // 1 minute is plenty for local check
const mcpConnectionPromise = this.mcpClient.connect(transport);
let timeoutId: ReturnType<typeof setTimeout>;
const timeoutPromise = new Promise<never>((_, reject) => {
timeoutId = setTimeout(
() => reject(new Error('MCP connection timeout after 5 minutes')),
MCP_INIT_TIMEOUT_MS
setTimeout(
() => reject(new Error('MCP connection timeout')),
60000
);
});
try {
await Promise.race([mcpConnectionPromise, timeoutPromise]);
} catch (connectionError) {
clearTimeout(timeoutId!);
logger.warn('WORKER', 'MCP loopback self-check failed, cleaning up subprocess', {
error: connectionError instanceof Error ? connectionError.message : String(connectionError)
});
try {
await transport.close();
} catch (transportCloseError) {
// [ANTI-PATTERN IGNORED]: transport.close() is best-effort cleanup after MCP connection already failed; supervisor handles orphan processes
logger.debug('WORKER', 'transport.close() failed during MCP cleanup', {
error: transportCloseError instanceof Error ? transportCloseError.message : String(transportCloseError)
});
}
logger.info('WORKER', 'Bundled MCP server remains available for external stdio clients', {
path: mcpServerPath
});
return;
}
clearTimeout(timeoutId!);
await Promise.race([mcpConnectionPromise, timeoutPromise]);
logger.info('WORKER', 'MCP loopback self-check connected successfully');
const mcpProcess = (transport as unknown as { _process?: import('child_process').ChildProcess })._process;
if (mcpProcess?.pid) {
getSupervisor().registerProcess('mcp-server', {
pid: mcpProcess.pid,
type: 'mcp',
startedAt: new Date().toISOString()
}, mcpProcess);
mcpProcess.once('exit', () => {
getSupervisor().unregisterProcess('mcp-server');
});
}
logger.success('WORKER', 'MCP loopback self-check connected');
// No orphan reaper, no stale-session reaper, no periodic WAL checkpoint.
// - Orphan prevention: SDK subprocesses spawn in their own process group
// via createSdkSpawnFactory so `kill(-pgid, signal)` tears down every
// descendant in one syscall (Principle 5).
// - Stale sessions: session cleanup runs in the `generatorPromise.finally`
// block of startSessionProcessor — primary-path teardown on generator
// exit, not a periodic sweep (Principle 4).
// - WAL growth: handled by SQLite's built-in `PRAGMA wal_autocheckpoint`
// (applied at DB open time); no app-level timer is required.
// Auto-recover orphaned queues (fire-and-forget with error logging)
this.processPendingQueues(50).then(result => {
if (result.sessionsStarted > 0) {
logger.info('SYSTEM', `Auto-recovered ${result.sessionsStarted} sessions with pending work`, {
totalPending: result.totalPendingSessions,
started: result.sessionsStarted,
sessionIds: result.startedSessionIds
});
}
}).catch(error => {
logger.error('SYSTEM', 'Auto-recovery of pending queues failed', {}, error as Error);
});
// Cleanup
await transport.close();
} catch (error) {
logger.error('SYSTEM', 'Background initialization failed', {}, error as Error);
throw error;
logger.warn('WORKER', 'MCP loopback self-check failed', {
error: error instanceof Error ? error.message : String(error)
});
}
}

View File

@@ -8,15 +8,17 @@
* - ChromaSync integration
*/
import { Database } from 'bun:sqlite';
import { SessionStore } from '../sqlite/SessionStore.js';
import { SessionSearch } from '../sqlite/SessionSearch.js';
import { ChromaSync } from '../sync/ChromaSync.js';
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
import { USER_SETTINGS_PATH } from '../../shared/paths.js';
import { USER_SETTINGS_PATH, DB_PATH } from '../../shared/paths.js';
import { logger } from '../../utils/logger.js';
import type { DBSession } from '../worker-types.js';
export class DatabaseManager {
private db: Database | null = null;
private sessionStore: SessionStore | null = null;
private sessionSearch: SessionSearch | null = null;
private chromaSync: ChromaSync | null = null;
@@ -26,8 +28,11 @@ export class DatabaseManager {
*/
async initialize(): Promise<void> {
// Open database connection (ONCE)
this.sessionStore = new SessionStore();
this.sessionSearch = new SessionSearch();
this.db = new Database(DB_PATH);
// Shared connection between store and search
this.sessionStore = new SessionStore(this.db);
this.sessionSearch = new SessionSearch(this.db);
// Initialize ChromaSync only if Chroma is enabled (SQLite-only fallback when disabled)
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
@@ -38,7 +43,7 @@ export class DatabaseManager {
logger.info('DB', 'Chroma disabled via CLAUDE_MEM_CHROMA_ENABLED=false, using SQLite-only search');
}
logger.info('DB', 'Database initialized');
logger.info('DB', 'Database initialized (shared connection)');
}
/**
@@ -51,13 +56,14 @@ export class DatabaseManager {
this.chromaSync = null;
}
if (this.sessionStore) {
this.sessionStore.close();
this.sessionStore = null;
}
if (this.sessionSearch) {
this.sessionSearch.close();
this.sessionSearch = null;
// We don't call sessionStore.close() or sessionSearch.close()
// because they share this.db which we close below.
this.sessionStore = null;
this.sessionSearch = null;
if (this.db) {
this.db.close();
this.db = null;
}
logger.info('DB', 'Database closed');
}

View File

@@ -218,12 +218,6 @@ export class SearchManager {
whereFilter = { doc_type: 'user_prompt' };
}
// Include project in the Chroma where clause to scope vector search.
// Without this, larger projects dominate the top-N results and smaller
// projects get crowded out before the post-hoc SQLite filter.
// Match both native-provenance rows (project) and adopted merged-worktree
// rows (merged_into_project) so a parent-project query surfaces its
// merged children's observations too.
if (options.project) {
const projectFilter = {
$or: [
@@ -236,82 +230,92 @@ export class SearchManager {
: projectFilter;
}
// Step 1: Chroma semantic search with optional type + project filter
const chromaResults = await this.queryChroma(query, 100, whereFilter);
chromaSucceeded = true; // Chroma didn't throw error
logger.debug('SEARCH', 'ChromaDB returned semantic matches', { matchCount: chromaResults.ids.length });
try {
// Step 1: Chroma semantic search with optional type + project filter
const chromaResults = await this.queryChroma(query, 100, whereFilter);
chromaSucceeded = true; // Chroma didn't throw error
logger.debug('SEARCH', 'ChromaDB returned semantic matches', { matchCount: chromaResults.ids.length });
if (chromaResults.ids.length > 0) {
// Step 2: Filter by date range
// Use user-provided dateRange if available, otherwise fall back to 90-day recency window
const { dateRange } = options;
let startEpoch: number | undefined;
let endEpoch: number | undefined;
if (chromaResults.ids.length > 0) {
// Step 2: Filter by date range
const { dateRange } = options;
let startEpoch: number | undefined;
let endEpoch: number | undefined;
if (dateRange) {
if (dateRange.start) {
startEpoch = typeof dateRange.start === 'number'
? dateRange.start
: new Date(dateRange.start).getTime();
if (dateRange) {
if (dateRange.start) {
startEpoch = typeof dateRange.start === 'number'
? dateRange.start
: new Date(dateRange.start).getTime();
}
if (dateRange.end) {
endEpoch = typeof dateRange.end === 'number'
? dateRange.end
: new Date(dateRange.end).getTime();
}
} else {
// Default: 90-day recency window
startEpoch = Date.now() - SEARCH_CONSTANTS.RECENCY_WINDOW_MS;
}
if (dateRange.end) {
endEpoch = typeof dateRange.end === 'number'
? dateRange.end
: new Date(dateRange.end).getTime();
const recentMetadata = chromaResults.metadatas.map((meta, idx) => ({
id: chromaResults.ids[idx],
meta,
isRecent: meta && meta.created_at_epoch != null
&& (!startEpoch || meta.created_at_epoch >= startEpoch)
&& (!endEpoch || meta.created_at_epoch <= endEpoch)
})).filter(item => item.isRecent);
logger.debug('SEARCH', dateRange ? 'Results within user date range' : 'Results within 90-day window', { count: recentMetadata.length });
// Step 3: Categorize IDs by document type
const obsIds: number[] = [];
const sessionIds: number[] = [];
const promptIds: number[] = [];
for (const item of recentMetadata) {
const docType = item.meta?.doc_type;
if (docType === 'observation' && searchObservations) {
obsIds.push(item.id);
} else if (docType === 'session_summary' && searchSessions) {
sessionIds.push(item.id);
} else if (docType === 'user_prompt' && searchPrompts) {
promptIds.push(item.id);
}
}
// Step 4: Hydrate from SQLite with additional filters
if (obsIds.length > 0) {
const obsOptions = { ...options, type: obs_type, concepts, files };
observations = this.sessionStore.getObservationsByIds(obsIds, obsOptions);
}
if (sessionIds.length > 0) {
sessions = this.sessionStore.getSessionSummariesByIds(sessionIds, { orderBy: 'date_desc', limit: options.limit, project: options.project });
}
if (promptIds.length > 0) {
prompts = this.sessionStore.getUserPromptsByIds(promptIds, { orderBy: 'date_desc', limit: options.limit, project: options.project });
}
} else {
// Default: 90-day recency window
startEpoch = Date.now() - SEARCH_CONSTANTS.RECENCY_WINDOW_MS;
logger.debug('SEARCH', 'ChromaDB found no matches (final result, no FTS5 fallback)', {});
}
} catch (chromaError) {
const errorObject = chromaError instanceof Error ? chromaError : new Error(String(chromaError));
logger.warn('SEARCH', 'ChromaDB semantic search failed, falling back to FTS5 keyword search', {}, errorObject);
chromaFailed = true;
const recentMetadata = chromaResults.metadatas.map((meta, idx) => ({
id: chromaResults.ids[idx],
meta,
isRecent: meta && meta.created_at_epoch != null
&& (!startEpoch || meta.created_at_epoch >= startEpoch)
&& (!endEpoch || meta.created_at_epoch <= endEpoch)
})).filter(item => item.isRecent);
logger.debug('SEARCH', dateRange ? 'Results within user date range' : 'Results within 90-day window', { count: recentMetadata.length });
// Step 3: Categorize IDs by document type
const obsIds: number[] = [];
const sessionIds: number[] = [];
const promptIds: number[] = [];
for (const item of recentMetadata) {
const docType = item.meta?.doc_type;
if (docType === 'observation' && searchObservations) {
obsIds.push(item.id);
} else if (docType === 'session_summary' && searchSessions) {
sessionIds.push(item.id);
} else if (docType === 'user_prompt' && searchPrompts) {
promptIds.push(item.id);
}
// Fallback to FTS5 path since Chroma failed
if (searchObservations) {
observations = this.sessionSearch.searchObservations(query, { ...options, type: obs_type, concepts, files });
}
logger.debug('SEARCH', 'Categorized results by type', { observations: obsIds.length, sessions: sessionIds.length, prompts: prompts.length });
// Step 4: Hydrate from SQLite with additional filters
if (obsIds.length > 0) {
// Apply obs_type, concepts, files filters if provided
const obsOptions = { ...options, type: obs_type, concepts, files };
observations = this.sessionStore.getObservationsByIds(obsIds, obsOptions);
if (searchSessions) {
sessions = this.sessionSearch.searchSessions(query, options);
}
if (sessionIds.length > 0) {
sessions = this.sessionStore.getSessionSummariesByIds(sessionIds, { orderBy: 'date_desc', limit: options.limit, project: options.project });
if (searchPrompts) {
prompts = this.sessionSearch.searchUserPrompts(query, options);
}
if (promptIds.length > 0) {
prompts = this.sessionStore.getUserPromptsByIds(promptIds, { orderBy: 'date_desc', limit: options.limit, project: options.project });
}
logger.debug('SEARCH', 'Hydrated results from SQLite', { observations: observations.length, sessions: sessions.length, prompts: prompts.length });
} else {
// Chroma returned 0 results - this is the correct answer, don't fall back to FTS5
logger.debug('SEARCH', 'ChromaDB found no matches (final result, no FTS5 fallback)', {});
}
}
// ChromaDB not initialized - fall back to FTS5 keyword search (#1913, #2048)
// PATH 3: FTS5 KEYWORD SEARCH (Chroma not initialized)
else if (query) {
logger.debug('SEARCH', 'ChromaDB not initialized — falling back to FTS5 keyword search', {});
try {

View File

@@ -0,0 +1,47 @@
/**
* Chroma Routes
*
* Provides diagnostic endpoints for ChromaDB integration.
*/
import express, { Request, Response } from 'express';
import { BaseRouteHandler } from '../BaseRouteHandler.js';
import { ChromaMcpManager } from '../../../sync/ChromaMcpManager.js';
import { logger } from '../../../../utils/logger.js';
import { SettingsDefaultsManager } from '../../../../shared/SettingsDefaultsManager.js';
import { USER_SETTINGS_PATH } from '../../../../shared/paths.js';
export class ChromaRoutes extends BaseRouteHandler {
setupRoutes(app: express.Application): void {
app.get('/api/chroma/status', this.handleGetStatus.bind(this));
}
/**
* GET /api/chroma/status
* Returns current health and connection status of chroma-mcp.
*/
private handleGetStatus = this.wrapHandler(async (_req: Request, res: Response): Promise<void> => {
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
const chromaEnabled = settings.CLAUDE_MEM_CHROMA_ENABLED !== 'false';
if (!chromaEnabled) {
res.json({
status: 'disabled',
connected: false,
timestamp: new Date().toISOString(),
details: 'Chroma is disabled via CLAUDE_MEM_CHROMA_ENABLED=false'
});
return;
}
const chromaMcp = ChromaMcpManager.getInstance();
const isHealthy = await chromaMcp.isHealthy();
res.json({
status: isHealthy ? 'healthy' : 'unhealthy',
connected: isHealthy,
timestamp: new Date().toISOString(),
details: isHealthy ? 'chroma-mcp is responding to tool calls' : 'chroma-mcp health check failed'
});
});
}