mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
fix: GC failed pending_messages rows at startup (Greptile iter 4)
Plan 07 deleted clearFailed/clearFailedOlderThan as "dead code", but with the periodic sweep also removed, nothing reaps status='failed' rows now — they accumulate indefinitely. Since claimNextMessage's self-healing subquery scans this table, unbounded growth degrades claim latency over time. Re-introduces clearFailedOlderThan and calls it once at worker startup (not a reaper — one-shot, idempotent). 7-day retention keeps enough history for operator inspection while bounding the table. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -198,6 +198,22 @@ export class PendingMessageStore {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete `status='failed'` rows older than `thresholdMs`. Called once at
|
||||
* worker startup so `pending_messages` does not grow unbounded on long-
|
||||
* running or high-failure-rate installations; `claimNextMessage`'s
|
||||
* self-healing subquery scans this table, so bounded rows keep claim
|
||||
* latency predictable. Not a reaper — one-shot, idempotent.
|
||||
*/
|
||||
clearFailedOlderThan(thresholdMs: number): number {
|
||||
const cutoff = Date.now() - thresholdMs;
|
||||
const stmt = this.db.prepare(`
|
||||
DELETE FROM pending_messages
|
||||
WHERE status = 'failed' AND COALESCE(failed_at_epoch, completed_at_epoch, 0) < ?
|
||||
`);
|
||||
return stmt.run(cutoff).changes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all pending messages for session (ordered by creation time)
|
||||
*/
|
||||
|
||||
@@ -445,6 +445,21 @@ export class WorkerService implements WorkerRef {
|
||||
// left by a previous worker incarnation on the next claim. See
|
||||
// PATHFINDER-2026-04-22 Plan 01 Phase 3.
|
||||
|
||||
// One-shot GC for terminally-failed rows so pending_messages does not
|
||||
// grow unbounded on long-running or high-failure-rate installations.
|
||||
// Not a reaper — runs once per worker start. 7 days retains enough
|
||||
// history for operator inspection without degrading claim latency.
|
||||
try {
|
||||
const { PendingMessageStore } = await import('./sqlite/PendingMessageStore.js');
|
||||
const pendingStore = new PendingMessageStore(this.dbManager.getSessionStore().db, 3);
|
||||
const cleared = pendingStore.clearFailedOlderThan(7 * 24 * 60 * 60 * 1000);
|
||||
if (cleared > 0) {
|
||||
logger.info('QUEUE', 'Startup GC cleared old failed pending_messages rows', { cleared });
|
||||
}
|
||||
} catch (err) {
|
||||
logger.warn('QUEUE', 'Startup GC for failed pending_messages rows failed', {}, err instanceof Error ? err : undefined);
|
||||
}
|
||||
|
||||
// Initialize search services
|
||||
const formattingService = new FormattingService();
|
||||
const timelineService = new TimelineService();
|
||||
|
||||
Reference in New Issue
Block a user