mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
* Initial plan * fix: break infinite summary-retry loop (#1633) Three-part fix: 1. Parser coercion: When LLM returns <observation> tags instead of <summary>, coerce observation content into summary fields (root cause fix) 2. Stronger summary prompt: Add clearer tag requirements with warnings 3. Circuit breaker: Track consecutive summary failures per session, skip further attempts after 3 failures to prevent unbounded prompt growth Agent-Logs-Url: https://github.com/thedotmack/claude-mem/sessions/e345e8ec-bc97-4eaa-94bd-6e951fda8f77 Co-authored-by: thedotmack <683968+thedotmack@users.noreply.github.com> * refactor: extract shared constants for summary mode marker and failure threshold Addresses code review feedback: SUMMARY_MODE_MARKER and MAX_CONSECUTIVE_SUMMARY_FAILURES are now defined once in sdk/prompts.ts and imported by ResponseProcessor and SessionManager. Agent-Logs-Url: https://github.com/thedotmack/claude-mem/sessions/e345e8ec-bc97-4eaa-94bd-6e951fda8f77 Co-authored-by: thedotmack <683968+thedotmack@users.noreply.github.com> * fix: guard summary failure counter on summaryExpected (Greptile P1) The circuit breaker counter previously incremented on any response containing <observation> or <summary> tags — which matches virtually every normal observation response. After 3 observations the breaker would open and permanently block summarization, reproducing the data-loss scenario #1633 was meant to prevent. Gate the increment block on summaryExpected (already computed for parseSummary coercion) so the counter only tracks actual summary attempts. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * test: cover circuit-breaker + apply review polish - Use findLast / at(-1) for last-user-message lookup instead of filter + index (O(1) common case). - Drop redundant `|| 0` fallback — field is required and initialized. - Add comment noting counter is ephemeral by design. - Add ResponseProcessor tests covering: * counter NOT incrementing on normal observation responses (regression guard for the Greptile P1) * counter incrementing when a summary was expected but missing * counter resetting to 0 on successful summary storage Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix: iterate all observation blocks; don't count skip_summary as failure Addresses CodeRabbit review on #2072: - coerceObservationToSummary now iterates all <observation> blocks with a global regex and returns the first block that has title, narrative, or facts. Previously, an empty leading observation would short-circuit and discard populated follow-ups. - Circuit-breaker counter now treats explicit <skip_summary/> as neutral — neither a failure nor a success — so a run that happens to end on a skip doesn't punish the session or mask a prior bad streak. Real failures (no summary, no skip) still increment. - Tests added for both cases. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * test: reference SUMMARY_MODE_MARKER constant instead of hardcoded string Addresses CodeRabbit nitpick: tests should pull the marker from the canonical source so they don't silently drift when the constant is renamed or edited. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix: also coerce observations when <summary> has empty sub-tags When the LLM wraps an empty <summary></summary> around real observation content, the #1360 empty-subtag guard rejects the summary and returns null — which would lose the observation content and resurrect the #1633 retry loop. Fall back to coerceObservationToSummary in that branch too, mirroring the unmatched-<summary> path. Adds a test covering the empty-summary-wraps-observation case and a guard test for empty summary with no observation content. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: thedotmack <683968+thedotmack@users.noreply.github.com> Co-authored-by: Alex Newman <thedotmack@gmail.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
831 lines
26 KiB
TypeScript
831 lines
26 KiB
TypeScript
import { describe, it, expect, mock, beforeEach, afterEach, spyOn } from 'bun:test';
|
|
import { logger } from '../../../src/utils/logger.js';
|
|
|
|
// Mock modules that cause import chain issues - MUST be before imports
|
|
// Use full paths from test file location
|
|
mock.module('../../../src/services/worker-service.js', () => ({
|
|
updateCursorContextForProject: () => Promise.resolve(),
|
|
}));
|
|
|
|
mock.module('../../../src/shared/worker-utils.js', () => ({
|
|
getWorkerPort: () => 37777,
|
|
}));
|
|
|
|
// Mock the ModeManager
|
|
mock.module('../../../src/services/domain/ModeManager.js', () => ({
|
|
ModeManager: {
|
|
getInstance: () => ({
|
|
getActiveMode: () => ({
|
|
name: 'code',
|
|
prompts: {
|
|
init: 'init prompt',
|
|
observation: 'obs prompt',
|
|
summary: 'summary prompt',
|
|
},
|
|
observation_types: [{ id: 'discovery' }, { id: 'bugfix' }, { id: 'refactor' }],
|
|
observation_concepts: [],
|
|
}),
|
|
}),
|
|
},
|
|
}));
|
|
|
|
// Import after mocks
|
|
import { processAgentResponse } from '../../../src/services/worker/agents/ResponseProcessor.js';
|
|
import { SUMMARY_MODE_MARKER } from '../../../src/sdk/prompts.js';
|
|
import type { WorkerRef, StorageResult } from '../../../src/services/worker/agents/types.js';
|
|
import type { ActiveSession } from '../../../src/services/worker-types.js';
|
|
import type { DatabaseManager } from '../../../src/services/worker/DatabaseManager.js';
|
|
import type { SessionManager } from '../../../src/services/worker/SessionManager.js';
|
|
|
|
// Spy on logger methods to suppress output during tests
|
|
let loggerSpies: ReturnType<typeof spyOn>[] = [];
|
|
|
|
describe('ResponseProcessor', () => {
|
|
// Mocks
|
|
let mockStoreObservations: ReturnType<typeof mock>;
|
|
let mockChromaSyncObservation: ReturnType<typeof mock>;
|
|
let mockChromaSyncSummary: ReturnType<typeof mock>;
|
|
let mockBroadcast: ReturnType<typeof mock>;
|
|
let mockBroadcastProcessingStatus: ReturnType<typeof mock>;
|
|
let mockDbManager: DatabaseManager;
|
|
let mockSessionManager: SessionManager;
|
|
let mockWorker: WorkerRef;
|
|
|
|
beforeEach(() => {
|
|
// Spy on logger to suppress output
|
|
loggerSpies = [
|
|
spyOn(logger, 'info').mockImplementation(() => {}),
|
|
spyOn(logger, 'debug').mockImplementation(() => {}),
|
|
spyOn(logger, 'warn').mockImplementation(() => {}),
|
|
spyOn(logger, 'error').mockImplementation(() => {}),
|
|
];
|
|
|
|
// Create fresh mocks for each test
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [1, 2],
|
|
summaryId: 1,
|
|
createdAtEpoch: 1700000000000,
|
|
} as StorageResult));
|
|
|
|
mockChromaSyncObservation = mock(() => Promise.resolve());
|
|
mockChromaSyncSummary = mock(() => Promise.resolve());
|
|
|
|
mockDbManager = {
|
|
getSessionStore: () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}), // FK fix (Issue #846)
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })), // FK fix (Issue #846)
|
|
}),
|
|
getChromaSync: () => ({
|
|
syncObservation: mockChromaSyncObservation,
|
|
syncSummary: mockChromaSyncSummary,
|
|
}),
|
|
} as unknown as DatabaseManager;
|
|
|
|
mockSessionManager = {
|
|
getMessageIterator: async function* () {
|
|
yield* [];
|
|
},
|
|
getPendingMessageStore: () => ({
|
|
markProcessed: mock(() => {}),
|
|
confirmProcessed: mock(() => {}), // CLAIM-CONFIRM pattern: confirm after successful storage
|
|
cleanupProcessed: mock(() => 0),
|
|
resetStuckMessages: mock(() => 0),
|
|
}),
|
|
} as unknown as SessionManager;
|
|
|
|
mockBroadcast = mock(() => {});
|
|
mockBroadcastProcessingStatus = mock(() => {});
|
|
|
|
mockWorker = {
|
|
sseBroadcaster: {
|
|
broadcast: mockBroadcast,
|
|
},
|
|
broadcastProcessingStatus: mockBroadcastProcessingStatus,
|
|
};
|
|
});
|
|
|
|
afterEach(() => {
|
|
loggerSpies.forEach(spy => spy.mockRestore());
|
|
mock.restore();
|
|
});
|
|
|
|
// Helper to create mock session
|
|
function createMockSession(
|
|
overrides: Partial<ActiveSession> = {}
|
|
): ActiveSession {
|
|
return {
|
|
sessionDbId: 1,
|
|
contentSessionId: 'content-session-123',
|
|
memorySessionId: 'memory-session-456',
|
|
project: 'test-project',
|
|
userPrompt: 'Test prompt',
|
|
pendingMessages: [],
|
|
abortController: new AbortController(),
|
|
generatorPromise: null,
|
|
lastPromptNumber: 5,
|
|
startTime: Date.now(),
|
|
cumulativeInputTokens: 100,
|
|
cumulativeOutputTokens: 50,
|
|
earliestPendingTimestamp: Date.now() - 10000,
|
|
conversationHistory: [],
|
|
currentProvider: 'claude',
|
|
processingMessageIds: [], // CLAIM-CONFIRM pattern: track message IDs being processed
|
|
consecutiveSummaryFailures: 0,
|
|
...overrides,
|
|
} as ActiveSession;
|
|
}
|
|
|
|
describe('parsing observations from XML response', () => {
|
|
it('should parse single observation from response', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Found important pattern</title>
|
|
<subtitle>In auth module</subtitle>
|
|
<narrative>Discovered reusable authentication pattern.</narrative>
|
|
<facts><fact>Uses JWT</fact></facts>
|
|
<concepts><concept>authentication</concept></concepts>
|
|
<files_read><file>src/auth.ts</file></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
expect(mockStoreObservations).toHaveBeenCalledTimes(1);
|
|
const [memorySessionId, project, observations, summary] =
|
|
mockStoreObservations.mock.calls[0];
|
|
expect(memorySessionId).toBe('memory-session-456');
|
|
expect(project).toBe('test-project');
|
|
expect(observations).toHaveLength(1);
|
|
expect(observations[0].type).toBe('discovery');
|
|
expect(observations[0].title).toBe('Found important pattern');
|
|
});
|
|
|
|
it('should parse multiple observations from response', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>First discovery</title>
|
|
<narrative>First narrative</narrative>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
<observation>
|
|
<type>bugfix</type>
|
|
<title>Fixed null pointer</title>
|
|
<narrative>Second narrative</narrative>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
const [, , observations] = mockStoreObservations.mock.calls[0];
|
|
expect(observations).toHaveLength(2);
|
|
expect(observations[0].type).toBe('discovery');
|
|
expect(observations[1].type).toBe('bugfix');
|
|
});
|
|
});
|
|
|
|
describe('non-XML observer responses', () => {
|
|
it('warns when the observer returns prose that will be discarded', async () => {
|
|
const session = createMockSession();
|
|
const responseText = 'Skipping — repeated log scan with no new findings.';
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
expect(logger.warn).toHaveBeenCalledWith(
|
|
'PARSER',
|
|
'TestAgent returned non-XML response; observation content was discarded',
|
|
expect.objectContaining({
|
|
sessionId: 1,
|
|
preview: responseText
|
|
})
|
|
);
|
|
const [, , observations, summary] = mockStoreObservations.mock.calls[0];
|
|
expect(observations).toHaveLength(0);
|
|
expect(summary).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe('parsing summary from XML response', () => {
|
|
it('should parse summary from response', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Test</title>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
<summary>
|
|
<request>Build login form</request>
|
|
<investigated>Reviewed existing forms</investigated>
|
|
<learned>React Hook Form works well</learned>
|
|
<completed>Form skeleton created</completed>
|
|
<next_steps>Add validation</next_steps>
|
|
<notes>Some notes</notes>
|
|
</summary>
|
|
`;
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
const [, , , summary] = mockStoreObservations.mock.calls[0];
|
|
expect(summary).not.toBeNull();
|
|
expect(summary.request).toBe('Build login form');
|
|
expect(summary.investigated).toBe('Reviewed existing forms');
|
|
expect(summary.learned).toBe('React Hook Form works well');
|
|
});
|
|
|
|
it('should handle response without summary', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Test</title>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
// Mock to return result without summary
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [1],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
}));
|
|
(mockDbManager.getSessionStore as any) = () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}),
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
|
});
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
const [, , , summary] = mockStoreObservations.mock.calls[0];
|
|
expect(summary).toBeNull();
|
|
});
|
|
});
|
|
|
|
describe('atomic database transactions', () => {
|
|
it('should call storeObservations atomically', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Test</title>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
<summary>
|
|
<request>Test request</request>
|
|
<investigated>Test investigated</investigated>
|
|
<learned>Test learned</learned>
|
|
<completed>Test completed</completed>
|
|
<next_steps>Test next steps</next_steps>
|
|
</summary>
|
|
`;
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
1700000000000,
|
|
'TestAgent'
|
|
);
|
|
|
|
// Verify storeObservations was called exactly once (atomic)
|
|
expect(mockStoreObservations).toHaveBeenCalledTimes(1);
|
|
|
|
// Verify all parameters passed correctly
|
|
const [
|
|
memorySessionId,
|
|
project,
|
|
observations,
|
|
summary,
|
|
promptNumber,
|
|
tokens,
|
|
timestamp,
|
|
] = mockStoreObservations.mock.calls[0];
|
|
|
|
expect(memorySessionId).toBe('memory-session-456');
|
|
expect(project).toBe('test-project');
|
|
expect(observations).toHaveLength(1);
|
|
expect(summary).not.toBeNull();
|
|
expect(promptNumber).toBe(5);
|
|
expect(tokens).toBe(100);
|
|
expect(timestamp).toBe(1700000000000);
|
|
});
|
|
});
|
|
|
|
describe('SSE broadcasting', () => {
|
|
it('should broadcast observations via SSE', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Broadcast Test</title>
|
|
<subtitle>Testing broadcast</subtitle>
|
|
<narrative>Testing SSE broadcast</narrative>
|
|
<facts><fact>Fact 1</fact></facts>
|
|
<concepts><concept>testing</concept></concepts>
|
|
<files_read><file>test.ts</file></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
// Mock returning single observation ID
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [42],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
}));
|
|
(mockDbManager.getSessionStore as any) = () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}),
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
|
});
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
// Should broadcast observation
|
|
expect(mockBroadcast).toHaveBeenCalled();
|
|
|
|
// Find the observation broadcast call
|
|
const observationCall = mockBroadcast.mock.calls.find(
|
|
(call: any[]) => call[0].type === 'new_observation'
|
|
);
|
|
expect(observationCall).toBeDefined();
|
|
expect(observationCall[0].observation.id).toBe(42);
|
|
expect(observationCall[0].observation.title).toBe('Broadcast Test');
|
|
expect(observationCall[0].observation.type).toBe('discovery');
|
|
});
|
|
|
|
it('should broadcast summary via SSE', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Test</title>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
<summary>
|
|
<request>Build feature</request>
|
|
<investigated>Reviewed code</investigated>
|
|
<learned>Found patterns</learned>
|
|
<completed>Feature built</completed>
|
|
<next_steps>Add tests</next_steps>
|
|
</summary>
|
|
`;
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
// Find the summary broadcast call
|
|
const summaryCall = mockBroadcast.mock.calls.find(
|
|
(call: any[]) => call[0].type === 'new_summary'
|
|
);
|
|
expect(summaryCall).toBeDefined();
|
|
expect(summaryCall[0].summary.request).toBe('Build feature');
|
|
});
|
|
});
|
|
|
|
describe('handling empty response', () => {
|
|
it('should handle empty response gracefully', async () => {
|
|
const session = createMockSession();
|
|
const responseText = '';
|
|
|
|
// Mock to handle empty observations
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
}));
|
|
(mockDbManager.getSessionStore as any) = () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}),
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
|
});
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
// Should still call storeObservations with empty arrays
|
|
expect(mockStoreObservations).toHaveBeenCalledTimes(1);
|
|
const [, , observations, summary] = mockStoreObservations.mock.calls[0];
|
|
expect(observations).toHaveLength(0);
|
|
expect(summary).toBeNull();
|
|
});
|
|
|
|
it('should handle response with only text (no XML)', async () => {
|
|
const session = createMockSession();
|
|
const responseText = 'This is just plain text without any XML tags.';
|
|
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
}));
|
|
(mockDbManager.getSessionStore as any) = () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}),
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
|
});
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
expect(mockStoreObservations).toHaveBeenCalledTimes(1);
|
|
const [, , observations] = mockStoreObservations.mock.calls[0];
|
|
expect(observations).toHaveLength(0);
|
|
});
|
|
});
|
|
|
|
describe('session cleanup', () => {
|
|
it('should reset earliestPendingTimestamp after processing', async () => {
|
|
const session = createMockSession({
|
|
earliestPendingTimestamp: 1700000000000,
|
|
});
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Test</title>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [1],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
}));
|
|
(mockDbManager.getSessionStore as any) = () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}),
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
|
});
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
expect(session.earliestPendingTimestamp).toBeNull();
|
|
});
|
|
|
|
it('should call broadcastProcessingStatus after processing', async () => {
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Test</title>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [1],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
}));
|
|
(mockDbManager.getSessionStore as any) = () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}),
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
|
});
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
expect(mockBroadcastProcessingStatus).toHaveBeenCalled();
|
|
});
|
|
});
|
|
|
|
describe('conversation history', () => {
|
|
it('should add assistant response to conversation history', async () => {
|
|
const session = createMockSession({
|
|
conversationHistory: [],
|
|
});
|
|
const responseText = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>Test</title>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
mockStoreObservations = mock(() => ({
|
|
observationIds: [1],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
}));
|
|
(mockDbManager.getSessionStore as any) = () => ({
|
|
storeObservations: mockStoreObservations,
|
|
ensureMemorySessionIdRegistered: mock(() => {}),
|
|
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
|
});
|
|
|
|
await processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
);
|
|
|
|
expect(session.conversationHistory).toHaveLength(1);
|
|
expect(session.conversationHistory[0].role).toBe('assistant');
|
|
expect(session.conversationHistory[0].content).toBe(responseText);
|
|
});
|
|
});
|
|
|
|
describe('error handling', () => {
|
|
it('should throw error if memorySessionId is missing from session', async () => {
|
|
const session = createMockSession({
|
|
memorySessionId: null, // Missing memory session ID
|
|
});
|
|
const responseText = '<observation><type>discovery</type></observation>';
|
|
|
|
await expect(
|
|
processAgentResponse(
|
|
responseText,
|
|
session,
|
|
mockDbManager,
|
|
mockSessionManager,
|
|
mockWorker,
|
|
100,
|
|
null,
|
|
'TestAgent'
|
|
)
|
|
).rejects.toThrow('Cannot store observations: memorySessionId not yet captured');
|
|
});
|
|
});
|
|
|
|
describe('lastSummaryStored tracking (#1633)', () => {
|
|
it('should set lastSummaryStored=true when storage returns a summaryId', async () => {
|
|
mockStoreObservations.mockImplementation(() => ({
|
|
observationIds: [],
|
|
summaryId: 42,
|
|
createdAtEpoch: 1700000000000,
|
|
} as StorageResult));
|
|
|
|
const session = createMockSession();
|
|
const responseText = `
|
|
<summary>
|
|
<request>user asked to fix bug</request>
|
|
<investigated>looked at auth module</investigated>
|
|
<learned>JWT tokens were expiring</learned>
|
|
<completed>fixed expiry check</completed>
|
|
<next_steps>write tests</next_steps>
|
|
</summary>
|
|
`;
|
|
|
|
await processAgentResponse(responseText, session, mockDbManager, mockSessionManager, mockWorker, 0, null, 'TestAgent');
|
|
|
|
expect(session.lastSummaryStored).toBe(true);
|
|
});
|
|
|
|
it('should set lastSummaryStored=false when storage returns summaryId=null (silent loss path, #1633)', async () => {
|
|
// Simulate the silent failure: agent returns no parseable <summary> tags,
|
|
// storeObservations skips summary and returns summaryId=null.
|
|
mockStoreObservations.mockImplementation(() => ({
|
|
observationIds: [],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
} as StorageResult));
|
|
|
|
const session = createMockSession();
|
|
// Response with no <summary> block — LLM failed to produce structured output
|
|
const responseText = '<skip_summary/>';
|
|
|
|
await processAgentResponse(responseText, session, mockDbManager, mockSessionManager, mockWorker, 0, null, 'TestAgent');
|
|
|
|
expect(session.lastSummaryStored).toBe(false);
|
|
});
|
|
});
|
|
|
|
describe('circuit breaker: consecutiveSummaryFailures counter (#1633)', () => {
|
|
const SUMMARY_PROMPT = `--- ${SUMMARY_MODE_MARKER} ---\nDo the summary now.`;
|
|
|
|
it('does NOT increment the counter on normal observation responses (P1 regression guard)', async () => {
|
|
// Session where the last user message is an OBSERVATION request, not a summary request.
|
|
// The counter must stay at 0 even though the response has <observation> tags and no summary.
|
|
mockStoreObservations.mockImplementation(() => ({
|
|
observationIds: [1],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
} as StorageResult));
|
|
|
|
const session = createMockSession({
|
|
conversationHistory: [{ role: 'user', content: 'record a new observation' }],
|
|
});
|
|
const obsResponse = `
|
|
<observation>
|
|
<type>discovery</type>
|
|
<title>found a thing</title>
|
|
<narrative>it happened</narrative>
|
|
<facts></facts>
|
|
<concepts></concepts>
|
|
<files_read></files_read>
|
|
<files_modified></files_modified>
|
|
</observation>
|
|
`;
|
|
|
|
// Drive multiple observation responses — counter must never increment.
|
|
for (let i = 0; i < 5; i++) {
|
|
await processAgentResponse(obsResponse, session, mockDbManager, mockSessionManager, mockWorker, 0, null, 'TestAgent');
|
|
}
|
|
|
|
expect(session.consecutiveSummaryFailures).toBe(0);
|
|
});
|
|
|
|
it('increments the counter when a summary was expected but none was stored', async () => {
|
|
mockStoreObservations.mockImplementation(() => ({
|
|
observationIds: [],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
} as StorageResult));
|
|
|
|
const session = createMockSession({
|
|
conversationHistory: [{ role: 'user', content: SUMMARY_PROMPT }],
|
|
});
|
|
// LLM returned nothing structured — no summary stored
|
|
const badResponse = 'I cannot comply with that request.';
|
|
|
|
await processAgentResponse(badResponse, session, mockDbManager, mockSessionManager, mockWorker, 0, null, 'TestAgent');
|
|
|
|
expect(session.consecutiveSummaryFailures).toBe(1);
|
|
});
|
|
|
|
it('does NOT increment the counter on intentional <skip_summary/> responses', async () => {
|
|
mockStoreObservations.mockImplementation(() => ({
|
|
observationIds: [],
|
|
summaryId: null,
|
|
createdAtEpoch: 1700000000000,
|
|
} as StorageResult));
|
|
|
|
const session = createMockSession({
|
|
consecutiveSummaryFailures: 1,
|
|
conversationHistory: [{ role: 'user', content: SUMMARY_PROMPT }],
|
|
});
|
|
const skipResponse = '<skip_summary reason="no meaningful work this session"/>';
|
|
|
|
await processAgentResponse(skipResponse, session, mockDbManager, mockSessionManager, mockWorker, 0, null, 'TestAgent');
|
|
|
|
// Skip is neutral — counter stays where it was, no spurious increment
|
|
expect(session.consecutiveSummaryFailures).toBe(1);
|
|
});
|
|
|
|
it('resets the counter to 0 when a summary is successfully stored', async () => {
|
|
mockStoreObservations.mockImplementation(() => ({
|
|
observationIds: [],
|
|
summaryId: 42,
|
|
createdAtEpoch: 1700000000000,
|
|
} as StorageResult));
|
|
|
|
const session = createMockSession({
|
|
consecutiveSummaryFailures: 2,
|
|
conversationHistory: [{ role: 'user', content: SUMMARY_PROMPT }],
|
|
});
|
|
const goodResponse = `
|
|
<summary>
|
|
<request>wrap it up</request>
|
|
<investigated>the thing</investigated>
|
|
<learned>the answer</learned>
|
|
<completed>the work</completed>
|
|
<next_steps>none</next_steps>
|
|
</summary>
|
|
`;
|
|
|
|
await processAgentResponse(goodResponse, session, mockDbManager, mockSessionManager, mockWorker, 0, null, 'TestAgent');
|
|
|
|
expect(session.consecutiveSummaryFailures).toBe(0);
|
|
});
|
|
});
|
|
});
|