mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
refactor: land PATHFINDER Plan 03 — ingestion path
Fail-fast parser, direct in-process ingest, recursive fs.watch,
DB-backed tool pairing. Worker-internal HTTP loopback eliminated.
- Phase 0: Created src/services/worker/http/shared.ts exporting
ingestObservation/ingestPrompt/ingestSummary as direct
in-process functions plus ingestEventBus (Node EventEmitter,
reusing existing pattern — no third event bus introduced).
setIngestContext wires the SessionManager dependency from
worker-service constructor.
- Phase 1: src/sdk/parser.ts collapsed to one parseAgentXml
returning { valid:true; kind: 'observation'|'summary'; data }
| { valid:false; reason: string }. Inspects root element;
<skip_summary reason="…"/> is a first-class summary case
with skipped:true. NEVER returns undefined. NEVER coerces.
- Phase 2: ResponseProcessor calls parseAgentXml exactly once,
branches on the discriminated union. On invalid → markFailed
+ logger.warn(reason). On observation → ingestObservation.
On summary → ingestSummary then emit summaryStoredEvent
{ sessionId, messageId } (consumed by Plan 05's blocking
/api/session/end).
- Phase 3: Deleted consecutiveSummaryFailures field
(ResponseProcessor + SessionManager + worker-types) and
MAX_CONSECUTIVE_SUMMARY_FAILURES constant. Circuit-breaker
guards and "tripped" log lines removed.
- Phase 4: coerceObservationToSummary deleted from sdk/parser.ts.
- Phase 5: src/services/transcripts/watcher.ts rescan setInterval
replaced with fs.watch(transcriptsRoot, { recursive: true,
persistent: true }) — Node 20+ recursive mode.
- Phase 6: src/services/transcripts/processor.ts pendingTools
Map deleted. tool_use rows insert with INSERT OR IGNORE on
UNIQUE(session_id, tool_use_id) (added by Plan 01). New
pairToolUsesByJoin query in PendingMessageStore for read-time
pairing (UNIQUE INDEX provides idempotency; explicit consumer
not yet wired).
- Phase 7: HTTP loopback at processor.ts:252 replaced with
direct ingestObservation call. maybeParseJson silent-passthrough
rewritten to fail-fast (throws on malformed JSON).
- Phase 8: src/utils/tag-stripping.ts countTags + stripTagsInternal
collapsed into one alternation regex, single-pass over input.
- Phase 9: src/utils/transcript-parser.ts (dead TranscriptParser
class) deleted. The active extractLastMessage at
src/shared/transcript-parser.ts:41-144 is the sole survivor.
Tests updated (Principle 7 — same-PR delete):
- tests/sdk/parser.test.ts + parse-summary.test.ts: rewritten
to assert discriminated-union shape; coercion-specific
scenarios collapse into { valid:false } assertions.
- tests/worker/agents/response-processor.test.ts: circuit-breaker
describe block skipped; non-XML/empty-response tests assert
fail-fast markFailed behavior.
Verification: every grep returns 0. transcript-parser.ts deleted.
bun run build succeeds. bun test → 1399 pass / 28 fail / 7 skip
(net -8 pass = the 4 retired circuit-breaker tests + 4 collapsed
parser cases). Zero new failures vs baseline.
Deferred (out of Plan 03 scope, will land in Plan 06): SessionRoutes
HTTP route handlers still call sessionManager.queueObservation
inline rather than the new shared helpers — the helpers are ready,
the route swap is mechanical and belongs with the Zod refactor.
Plan: PATHFINDER-2026-04-22/03-ingestion-path.md
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -1,6 +1,13 @@
|
||||
/**
|
||||
* XML Parser Module
|
||||
* Parses observation and summary XML blocks from SDK responses
|
||||
*
|
||||
* Single fail-fast entry point for SDK agent XML responses.
|
||||
*
|
||||
* Per PATHFINDER-2026-04-22 plan 03 phase 1:
|
||||
* - One function (`parseAgentXml`) for all agent responses.
|
||||
* - Discriminated-union return: `{ valid: true, kind, data }` or `{ valid: false, reason }`.
|
||||
* - No coercion. No silent passthrough. No "lenient mode".
|
||||
* - `<skip_summary reason="…"/>` is a first-class summary case (skipped: true).
|
||||
*/
|
||||
|
||||
import { logger } from '../utils/logger.js';
|
||||
@@ -24,23 +31,103 @@ export interface ParsedSummary {
|
||||
completed: string | null;
|
||||
next_steps: string | null;
|
||||
notes: string | null;
|
||||
/** True when the response was an explicit `<skip_summary reason="…"/>` bypass. */
|
||||
skipped?: boolean;
|
||||
/** Non-null when `skipped: true`. */
|
||||
skip_reason?: string | null;
|
||||
}
|
||||
|
||||
export type ParseResult =
|
||||
| { valid: true; kind: 'observation'; data: ParsedObservation[] }
|
||||
| { valid: true; kind: 'summary'; data: ParsedSummary }
|
||||
| { valid: false; reason: string };
|
||||
|
||||
/**
|
||||
* Parse an SDK agent response. Inspects the first significant XML root element
|
||||
* and returns a discriminated union. Never coerces. Never returns null/undefined.
|
||||
*
|
||||
* Recognised roots:
|
||||
* <observation> … </observation> → { kind: 'observation', data: ParsedObservation[] }
|
||||
* <summary> … </summary> → { kind: 'summary', data: ParsedSummary }
|
||||
* <skip_summary reason="…" /> → { kind: 'summary', data: { skipped: true, … } }
|
||||
*
|
||||
* Anything else → { valid: false, reason }. The caller is responsible for
|
||||
* surfacing the reason (markFailed, log, etc.). No retry coercion.
|
||||
*/
|
||||
export function parseAgentXml(raw: string, correlationId?: string | number): ParseResult {
|
||||
if (typeof raw !== 'string' || !raw.trim()) {
|
||||
return { valid: false, reason: 'empty: response had no content' };
|
||||
}
|
||||
|
||||
// Skip-summary is recognised even when wrapped in other text, but only as the
|
||||
// sole structural signal. It outranks <observation> / <summary> matches because
|
||||
// it is an explicit protocol bypass. `reason` is optional.
|
||||
const skipMatch = /<skip_summary(?:\s+reason="([^"]*)")?\s*\/>/.exec(raw);
|
||||
if (skipMatch) {
|
||||
return {
|
||||
valid: true,
|
||||
kind: 'summary',
|
||||
data: {
|
||||
request: null,
|
||||
investigated: null,
|
||||
learned: null,
|
||||
completed: null,
|
||||
next_steps: null,
|
||||
notes: null,
|
||||
skipped: true,
|
||||
skip_reason: skipMatch[1] ?? null,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
// Find the first significant element by scanning for the first `<…>` opener
|
||||
// that is one of the recognised roots. This tolerates leading prose / debug
|
||||
// output from the model while still failing fast on entirely-non-XML payloads.
|
||||
const firstRoot = /<(observation|summary)\b/i.exec(raw);
|
||||
if (!firstRoot) {
|
||||
const preview = raw.length > 120 ? `${raw.slice(0, 120)}…` : raw;
|
||||
return {
|
||||
valid: false,
|
||||
reason: `unknown root: response contained no <observation>, <summary>, or <skip_summary/> element (preview: ${preview.replace(/\s+/g, ' ')})`,
|
||||
};
|
||||
}
|
||||
|
||||
const rootName = firstRoot[1].toLowerCase();
|
||||
if (rootName === 'observation') {
|
||||
const observations = parseObservationBlocks(raw, correlationId);
|
||||
if (observations.length === 0) {
|
||||
return {
|
||||
valid: false,
|
||||
reason: '<observation>: no parseable observation block (every block was empty or ghost)',
|
||||
};
|
||||
}
|
||||
return { valid: true, kind: 'observation', data: observations };
|
||||
}
|
||||
|
||||
// rootName === 'summary'
|
||||
const summary = parseSummaryBlock(raw, correlationId);
|
||||
if (!summary) {
|
||||
return {
|
||||
valid: false,
|
||||
reason: '<summary>: empty or missing every required sub-tag (request/investigated/learned/completed/next_steps)',
|
||||
};
|
||||
}
|
||||
return { valid: true, kind: 'summary', data: summary };
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse observation XML blocks from SDK response
|
||||
* Returns all observations found in the response
|
||||
* Parse all <observation>…</observation> blocks. Filters out ghost
|
||||
* observations (every content field empty). Returns the surviving list.
|
||||
*/
|
||||
export function parseObservations(text: string, correlationId?: string): ParsedObservation[] {
|
||||
function parseObservationBlocks(text: string, correlationId?: string | number): ParsedObservation[] {
|
||||
const observations: ParsedObservation[] = [];
|
||||
|
||||
// Match <observation>...</observation> blocks (non-greedy)
|
||||
const observationRegex = /<observation>([\s\S]*?)<\/observation>/g;
|
||||
|
||||
let match;
|
||||
while ((match = observationRegex.exec(text)) !== null) {
|
||||
const obsContent = match[1];
|
||||
|
||||
// Extract all fields
|
||||
const type = extractField(obsContent, 'type');
|
||||
const title = extractField(obsContent, 'title');
|
||||
const subtitle = extractField(obsContent, 'subtitle');
|
||||
@@ -50,13 +137,13 @@ export function parseObservations(text: string, correlationId?: string): ParsedO
|
||||
const files_read = extractArrayElements(obsContent, 'files_read', 'file');
|
||||
const files_modified = extractArrayElements(obsContent, 'files_modified', 'file');
|
||||
|
||||
// All fields except type are nullable in schema.
|
||||
// If type is missing or invalid, use first type from mode as fallback.
|
||||
|
||||
// Determine final type using active mode's valid types
|
||||
// Type fallback: per existing semantics, missing/invalid type degrades to the
|
||||
// first type in the active mode. This is parser-internal validation, not
|
||||
// recovery from a contract violation: every mode's first type is intentionally
|
||||
// the catch-all bucket.
|
||||
const mode = ModeManager.getInstance().getActiveMode();
|
||||
const validTypes = mode.observation_types.map(t => t.id);
|
||||
const fallbackType = validTypes[0]; // First type in mode's list is the fallback
|
||||
const fallbackType = validTypes[0];
|
||||
let finalType = fallbackType;
|
||||
if (type) {
|
||||
if (validTypes.includes(type.trim())) {
|
||||
@@ -68,8 +155,6 @@ export function parseObservations(text: string, correlationId?: string): ParsedO
|
||||
logger.error('PARSER', `Observation missing type field, using "${fallbackType}"`, { correlationId });
|
||||
}
|
||||
|
||||
// All other fields are optional - save whatever we have
|
||||
|
||||
// Filter out type from concepts array (types and concepts are separate dimensions)
|
||||
const cleanedConcepts = concepts.filter(c => c !== finalType);
|
||||
|
||||
@@ -83,10 +168,8 @@ export function parseObservations(text: string, correlationId?: string): ParsedO
|
||||
}
|
||||
|
||||
// Skip ghost observations — records where every content field is null/empty.
|
||||
// These accumulate when the LLM emits a bare <observation/> (or one with only <type>)
|
||||
// due to context overflow. They carry no information and pollute the context window.
|
||||
// (subtitle and file lists are intentionally excluded from this guard: an observation
|
||||
// with only a subtitle is still too thin to be useful on its own.)
|
||||
// (subtitle and file lists are intentionally excluded from this guard:
|
||||
// an observation with only a subtitle is still too thin to be useful.)
|
||||
if (!title && !narrative && facts.length === 0 && cleanedConcepts.length === 0) {
|
||||
logger.warn('PARSER', 'Skipping empty observation (all content fields null)', {
|
||||
correlationId,
|
||||
@@ -111,96 +194,29 @@ export function parseObservations(text: string, correlationId?: string): ParsedO
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse summary XML block from SDK response
|
||||
* Returns null if no valid summary found or if summary was skipped
|
||||
*
|
||||
* @param coerceFromObservation - When true, attempts to convert <observation> tags
|
||||
* into summary fields if no <summary> tags are found. Only set this when the
|
||||
* response was expected to be a summary (i.e., a summarize message was sent).
|
||||
* Prevents the infinite retry loop described in #1633.
|
||||
* Parse a single <summary>…</summary> block. Returns null when the block has
|
||||
* no usable sub-tags (every required field empty) — the caller maps this to
|
||||
* a fail-fast `{ valid: false, reason }` result.
|
||||
*/
|
||||
export function parseSummary(text: string, sessionId?: number, coerceFromObservation: boolean = false): ParsedSummary | null {
|
||||
// Check for skip_summary first
|
||||
const skipRegex = /<skip_summary\s+reason="([^"]+)"\s*\/>/;
|
||||
const skipMatch = skipRegex.exec(text);
|
||||
|
||||
if (skipMatch) {
|
||||
logger.info('PARSER', 'Summary skipped', {
|
||||
sessionId,
|
||||
reason: skipMatch[1]
|
||||
});
|
||||
return null;
|
||||
}
|
||||
|
||||
// Match <summary>...</summary> block (non-greedy)
|
||||
function parseSummaryBlock(text: string, correlationId?: string | number): ParsedSummary | null {
|
||||
const summaryRegex = /<summary>([\s\S]*?)<\/summary>/;
|
||||
const summaryMatch = summaryRegex.exec(text);
|
||||
|
||||
if (!summaryMatch) {
|
||||
// When the LLM returns <observation> tags instead of <summary> tags on a
|
||||
// summary turn, coerce the observation content into summary fields rather
|
||||
// than discarding it. This breaks the infinite retry loop described in
|
||||
// #1633: without coercion, the summary is silently dropped, the session
|
||||
// completes without a summary, a new session is spawned with an ever-growing
|
||||
// prompt, and the cycle repeats.
|
||||
//
|
||||
// parseSummary is called on every response (see ResponseProcessor), not just
|
||||
// summary turns — so the absence of <summary> in an observation response is
|
||||
// expected, not a prompt-conditioning failure. Only act when the caller
|
||||
// actually expected a summary (coerceFromObservation=true).
|
||||
if (coerceFromObservation && /<observation>/.test(text)) {
|
||||
const coerced = coerceObservationToSummary(text, sessionId);
|
||||
if (coerced) {
|
||||
return coerced;
|
||||
}
|
||||
logger.warn('PARSER', 'Summary response contained <observation> tags instead of <summary> — coercion failed, no usable content', { sessionId });
|
||||
}
|
||||
return null;
|
||||
}
|
||||
if (!summaryMatch) return null;
|
||||
|
||||
const summaryContent = summaryMatch[1];
|
||||
|
||||
// Extract fields
|
||||
const request = extractField(summaryContent, 'request');
|
||||
const investigated = extractField(summaryContent, 'investigated');
|
||||
const learned = extractField(summaryContent, 'learned');
|
||||
const completed = extractField(summaryContent, 'completed');
|
||||
const next_steps = extractField(summaryContent, 'next_steps');
|
||||
const notes = extractField(summaryContent, 'notes'); // Optional
|
||||
const notes = extractField(summaryContent, 'notes'); // optional
|
||||
|
||||
// NOTE FROM THEDOTMACK: 100% of the time we must SAVE the summary, even if fields are missing. 10/24/2025
|
||||
// NEVER DO THIS NONSENSE AGAIN.
|
||||
|
||||
// Validate required fields are present (notes is optional)
|
||||
// if (!request || !investigated || !learned || !completed || !next_steps) {
|
||||
// logger.warn('PARSER', 'Summary missing required fields', {
|
||||
// sessionId,
|
||||
// hasRequest: !!request,
|
||||
// hasInvestigated: !!investigated,
|
||||
// hasLearned: !!learned,
|
||||
// hasCompleted: !!completed,
|
||||
// hasNextSteps: !!next_steps
|
||||
// });
|
||||
// return null;
|
||||
// }
|
||||
|
||||
// Guard: if NO sub-tags matched at all, this is a false positive —
|
||||
// <summary> accidentally appeared inside an <observation> response with no structured content.
|
||||
// This is NOT the same as missing some fields (which we intentionally allow above).
|
||||
// Fix for #1360.
|
||||
// Per maintainer note: a summary with at least one populated sub-tag must be
|
||||
// saved. Missing sub-tags are tolerated; an entirely empty <summary> block is
|
||||
// a false-positive (covered the #1360 regression) and is rejected.
|
||||
if (!request && !investigated && !learned && !completed && !next_steps) {
|
||||
// If the response also contains <observation> tags with real content, fall
|
||||
// back to coercion rather than discarding the response entirely — this covers
|
||||
// the case where the LLM wraps empty <summary></summary> around observation
|
||||
// content, which would otherwise resurrect the #1633 retry loop.
|
||||
if (coerceFromObservation && /<observation>/.test(text)) {
|
||||
const coerced = coerceObservationToSummary(text, sessionId);
|
||||
if (coerced) {
|
||||
logger.warn('PARSER', 'Empty <summary> match rejected — coerced from <observation> fallback (#1633)', { sessionId });
|
||||
return coerced;
|
||||
}
|
||||
}
|
||||
logger.warn('PARSER', 'Summary match has no sub-tags — skipping false positive', { sessionId });
|
||||
logger.warn('PARSER', 'Summary block has no sub-tags — rejecting false positive', { correlationId });
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -210,54 +226,10 @@ export function parseSummary(text: string, sessionId?: number, coerceFromObserva
|
||||
learned,
|
||||
completed,
|
||||
next_steps,
|
||||
notes
|
||||
notes,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Coerce <observation> response into a ParsedSummary when <summary> tags are missing.
|
||||
* Maps observation fields to the closest summary equivalents so that a usable
|
||||
* summary is stored instead of nothing — breaking the retry loop (#1633).
|
||||
*/
|
||||
function coerceObservationToSummary(text: string, sessionId?: number): ParsedSummary | null {
|
||||
// Iterate all <observation> blocks — if the LLM emits multiple and the first is
|
||||
// empty, we still want to salvage the first one that has usable content.
|
||||
const obsRegex = /<observation>([\s\S]*?)<\/observation>/g;
|
||||
let obsMatch: RegExpExecArray | null;
|
||||
let blockIndex = 0;
|
||||
|
||||
while ((obsMatch = obsRegex.exec(text)) !== null) {
|
||||
const obsContent = obsMatch[1];
|
||||
const title = extractField(obsContent, 'title');
|
||||
const subtitle = extractField(obsContent, 'subtitle');
|
||||
const narrative = extractField(obsContent, 'narrative');
|
||||
const facts = extractArrayElements(obsContent, 'facts', 'fact');
|
||||
|
||||
if (title || narrative || facts.length > 0) {
|
||||
// Map observation fields → summary fields (best-effort)
|
||||
const request = title || subtitle || null;
|
||||
const investigated = narrative || null;
|
||||
const learned = facts.length > 0 ? facts.join('; ') : null;
|
||||
const completed = title ? `${title}${subtitle ? ' — ' + subtitle : ''}` : null;
|
||||
const next_steps = null; // No direct observation equivalent
|
||||
|
||||
logger.warn('PARSER', 'Coerced <observation> response into <summary> to prevent retry loop (#1633)', {
|
||||
sessionId,
|
||||
blockIndex,
|
||||
hasTitle: !!title,
|
||||
hasNarrative: !!narrative,
|
||||
factCount: facts.length,
|
||||
});
|
||||
|
||||
return { request, investigated, learned, completed, next_steps, notes: null };
|
||||
}
|
||||
|
||||
blockIndex++;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a simple field value from XML content
|
||||
* Returns null for missing or empty/whitespace-only fields
|
||||
@@ -265,8 +237,6 @@ function coerceObservationToSummary(text: string, sessionId?: number): ParsedSum
|
||||
* Uses non-greedy match to handle nested tags and code snippets (Issue #798)
|
||||
*/
|
||||
function extractField(content: string, fieldName: string): string | null {
|
||||
// Use [\s\S]*? to match any character including newlines, non-greedily
|
||||
// This handles nested XML tags like <item>...</item> inside the field
|
||||
const regex = new RegExp(`<${fieldName}>([\\s\\S]*?)</${fieldName}>`);
|
||||
const match = regex.exec(content);
|
||||
if (!match) return null;
|
||||
@@ -282,7 +252,6 @@ function extractField(content: string, fieldName: string): string | null {
|
||||
function extractArrayElements(content: string, arrayName: string, elementName: string): string[] {
|
||||
const elements: string[] = [];
|
||||
|
||||
// Match the array block using [\s\S]*? for nested content
|
||||
const arrayRegex = new RegExp(`<${arrayName}>([\\s\\S]*?)</${arrayName}>`);
|
||||
const arrayMatch = arrayRegex.exec(content);
|
||||
|
||||
@@ -292,7 +261,6 @@ function extractArrayElements(content: string, arrayName: string, elementName: s
|
||||
|
||||
const arrayContent = arrayMatch[1];
|
||||
|
||||
// Extract individual elements using [\s\S]*? for nested content
|
||||
const elementRegex = new RegExp(`<${elementName}>([\\s\\S]*?)</${elementName}>`, 'g');
|
||||
let elementMatch;
|
||||
while ((elementMatch = elementRegex.exec(arrayContent)) !== null) {
|
||||
|
||||
@@ -7,19 +7,14 @@ import { logger } from '../utils/logger.js';
|
||||
import type { ModeConfig } from '../services/domain/types.js';
|
||||
|
||||
/**
|
||||
* Marker string embedded in summary prompts — used by ResponseProcessor to detect
|
||||
* whether the most recent user message was a summary request (enables observation→summary
|
||||
* coercion for #1633). Keep in sync with buildSummaryPrompt below.
|
||||
* Marker string embedded in summary prompts — historically used by
|
||||
* ResponseProcessor to detect summary turns for the (now-deleted) coercion
|
||||
* fallback. Kept here because `buildSummaryPrompt` still embeds it as the
|
||||
* mode-switch banner; deleting the constant would require rewriting the
|
||||
* prompt builder, which is out of scope for plan 03.
|
||||
*/
|
||||
export const SUMMARY_MODE_MARKER = 'MODE SWITCH: PROGRESS SUMMARY';
|
||||
|
||||
/**
|
||||
* Maximum consecutive summary failures before the circuit breaker opens.
|
||||
* After this many failures, SessionManager.queueSummarize will skip further
|
||||
* summarize requests to prevent the infinite retry loop (#1633).
|
||||
*/
|
||||
export const MAX_CONSECUTIVE_SUMMARY_FAILURES = 3;
|
||||
|
||||
export interface Observation {
|
||||
id: number;
|
||||
tool_name: string;
|
||||
|
||||
@@ -84,24 +84,32 @@ export class PendingMessageStore {
|
||||
}
|
||||
|
||||
/**
|
||||
* Enqueue a new message (persist before processing)
|
||||
* @returns The database ID of the persisted message
|
||||
* Enqueue a new message (persist before processing).
|
||||
*
|
||||
* Uses `INSERT OR IGNORE` so duplicate (content_session_id, tool_use_id)
|
||||
* pairs collapse to a single row — the UNIQUE INDEX added in plan 01 phase 1
|
||||
* is the authority on tool-use idempotency. Per principle 3 (UNIQUE
|
||||
* constraint over dedup window), we don't time-gate duplicates.
|
||||
*
|
||||
* @returns The database ID of the persisted message, or 0 when the insert
|
||||
* was suppressed by ON CONFLICT (caller must handle 0 explicitly).
|
||||
*/
|
||||
enqueue(sessionDbId: number, contentSessionId: string, message: PendingMessage): number {
|
||||
const now = Date.now();
|
||||
const stmt = this.db.prepare(`
|
||||
INSERT INTO pending_messages (
|
||||
session_db_id, content_session_id, message_type,
|
||||
INSERT OR IGNORE INTO pending_messages (
|
||||
session_db_id, content_session_id, tool_use_id, message_type,
|
||||
tool_name, tool_input, tool_response, cwd,
|
||||
last_assistant_message,
|
||||
prompt_number, status, retry_count, created_at_epoch,
|
||||
agent_type, agent_id
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', 0, ?, ?, ?)
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', 0, ?, ?, ?)
|
||||
`);
|
||||
|
||||
const result = stmt.run(
|
||||
sessionDbId,
|
||||
contentSessionId,
|
||||
message.toolUseId ?? null,
|
||||
message.type,
|
||||
message.tool_name || null,
|
||||
message.tool_input ? JSON.stringify(message.tool_input) : null,
|
||||
@@ -117,6 +125,38 @@ export class PendingMessageStore {
|
||||
return result.lastInsertRowid as number;
|
||||
}
|
||||
|
||||
/**
|
||||
* PATHFINDER plan 03 phase 6: pair tool_use rows with their tool_result by
|
||||
* (content_session_id, tool_use_id) at read time. Replaces the per-process
|
||||
* `pendingTools` Map deleted from `src/services/transcripts/processor.ts`.
|
||||
*
|
||||
* Returns observation rows whose tool_use_id is non-null and which have at
|
||||
* least one prior matching row in the same session (the UNIQUE INDEX
|
||||
* guarantees at most one logical pair survives per id).
|
||||
*/
|
||||
pairToolUsesByJoin(contentSessionId: string): Array<{
|
||||
tool_use_id: string;
|
||||
tool_use_payload: string | null;
|
||||
tool_result_payload: string | null;
|
||||
}> {
|
||||
const stmt = this.db.prepare(`
|
||||
SELECT u.tool_use_id AS tool_use_id,
|
||||
u.tool_input AS tool_use_payload,
|
||||
r.tool_response AS tool_result_payload
|
||||
FROM pending_messages u
|
||||
JOIN pending_messages r USING (content_session_id, tool_use_id)
|
||||
WHERE u.content_session_id = ?
|
||||
AND u.tool_use_id IS NOT NULL
|
||||
AND u.tool_input IS NOT NULL
|
||||
AND r.tool_response IS NOT NULL
|
||||
`);
|
||||
return stmt.all(contentSessionId) as Array<{
|
||||
tool_use_id: string;
|
||||
tool_use_payload: string | null;
|
||||
tool_result_payload: string | null;
|
||||
}>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Atomically claim the next message for `sessionDbId`.
|
||||
*
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
import path from 'path';
|
||||
import { sessionInitHandler } from '../../cli/handlers/session-init.js';
|
||||
import { observationHandler } from '../../cli/handlers/observation.js';
|
||||
import { fileEditHandler } from '../../cli/handlers/file-edit.js';
|
||||
import { sessionCompleteHandler } from '../../cli/handlers/session-complete.js';
|
||||
import { ensureWorkerRunning, workerHttpRequest } from '../../shared/worker-utils.js';
|
||||
@@ -12,6 +11,7 @@ import { resolveFieldSpec, resolveFields, matchesRule } from './field-utils.js';
|
||||
import { expandHomePath } from './config.js';
|
||||
import type { TranscriptSchema, WatchTarget, SchemaEvent } from './types.js';
|
||||
import { normalizePlatformSource } from '../../shared/platform-source.js';
|
||||
import { ingestObservation } from '../worker/http/shared.js';
|
||||
|
||||
interface SessionState {
|
||||
sessionId: string;
|
||||
@@ -20,14 +20,6 @@ interface SessionState {
|
||||
project?: string;
|
||||
lastUserMessage?: string;
|
||||
lastAssistantMessage?: string;
|
||||
pendingTools: Map<string, { name?: string; input?: unknown }>;
|
||||
}
|
||||
|
||||
interface PendingTool {
|
||||
id?: string;
|
||||
name?: string;
|
||||
input?: unknown;
|
||||
response?: unknown;
|
||||
}
|
||||
|
||||
export class TranscriptEventProcessor {
|
||||
@@ -56,7 +48,6 @@ export class TranscriptEventProcessor {
|
||||
session = {
|
||||
sessionId,
|
||||
platformSource: normalizePlatformSource(watch.name),
|
||||
pendingTools: new Map()
|
||||
};
|
||||
this.sessions.set(key, session);
|
||||
}
|
||||
@@ -196,12 +187,6 @@ export class TranscriptEventProcessor {
|
||||
const toolInput = this.maybeParseJson(fields.toolInput);
|
||||
const toolResponse = this.maybeParseJson(fields.toolResponse);
|
||||
|
||||
const pending: PendingTool = { id: toolId, name: toolName, input: toolInput, response: toolResponse };
|
||||
|
||||
if (toolId) {
|
||||
session.pendingTools.set(toolId, { name: pending.name, input: pending.input });
|
||||
}
|
||||
|
||||
if (toolName === 'apply_patch' && typeof toolInput === 'string') {
|
||||
const files = this.parseApplyPatchFiles(toolInput);
|
||||
for (const filePath of files) {
|
||||
@@ -212,11 +197,17 @@ export class TranscriptEventProcessor {
|
||||
}
|
||||
}
|
||||
|
||||
// PATHFINDER plan 03 phase 6: per-process tool pairing Map deleted.
|
||||
// Each event emits independently; the database's
|
||||
// UNIQUE(content_session_id, tool_use_id) index makes the insert
|
||||
// idempotent so duplicate observations from overlapping
|
||||
// tool_use + tool_result lines collapse to a single row.
|
||||
if (toolResponse !== undefined && toolName) {
|
||||
await this.sendObservation(session, {
|
||||
toolName,
|
||||
toolInput,
|
||||
toolResponse
|
||||
toolResponse,
|
||||
toolUseId: toolId,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -225,22 +216,14 @@ export class TranscriptEventProcessor {
|
||||
const toolId = typeof fields.toolId === 'string' ? fields.toolId : undefined;
|
||||
const toolName = typeof fields.toolName === 'string' ? fields.toolName : undefined;
|
||||
const toolResponse = this.maybeParseJson(fields.toolResponse);
|
||||
const toolInput = this.maybeParseJson(fields.toolInput);
|
||||
|
||||
let toolInput: unknown = this.maybeParseJson(fields.toolInput);
|
||||
let name = toolName;
|
||||
|
||||
if (toolId && session.pendingTools.has(toolId)) {
|
||||
const pending = session.pendingTools.get(toolId)!;
|
||||
toolInput = pending.input ?? toolInput;
|
||||
name = name ?? pending.name;
|
||||
session.pendingTools.delete(toolId);
|
||||
}
|
||||
|
||||
if (name) {
|
||||
if (toolName) {
|
||||
await this.sendObservation(session, {
|
||||
toolName: name,
|
||||
toolName,
|
||||
toolInput,
|
||||
toolResponse
|
||||
toolResponse,
|
||||
toolUseId: toolId,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -249,14 +232,23 @@ export class TranscriptEventProcessor {
|
||||
const toolName = typeof fields.toolName === 'string' ? fields.toolName : undefined;
|
||||
if (!toolName) return;
|
||||
|
||||
await observationHandler.execute({
|
||||
sessionId: session.sessionId,
|
||||
// PATHFINDER plan 03 phase 7: replace HTTP loopback (worker → its own
|
||||
// /api/sessions/observations endpoint) with a direct in-process call to
|
||||
// ingestObservation. Same implementation backs the cross-process HTTP
|
||||
// route handler (one helper, N callers).
|
||||
const result = ingestObservation({
|
||||
contentSessionId: session.sessionId,
|
||||
cwd: session.cwd ?? process.cwd(),
|
||||
toolName,
|
||||
toolInput: this.maybeParseJson(fields.toolInput),
|
||||
toolResponse: this.maybeParseJson(fields.toolResponse),
|
||||
platform: session.platformSource
|
||||
platformSource: session.platformSource,
|
||||
toolUseId: typeof fields.toolUseId === 'string' ? fields.toolUseId : undefined,
|
||||
});
|
||||
|
||||
if (!result.ok) {
|
||||
throw new Error(`ingestObservation failed: ${result.reason}`);
|
||||
}
|
||||
}
|
||||
|
||||
private async sendFileEdit(session: SessionState, fields: Record<string, unknown>): Promise<void> {
|
||||
@@ -277,12 +269,10 @@ export class TranscriptEventProcessor {
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) return value;
|
||||
if (!(trimmed.startsWith('{') || trimmed.startsWith('['))) return value;
|
||||
try {
|
||||
return JSON.parse(trimmed);
|
||||
} catch (error: unknown) {
|
||||
logger.debug('WORKER', 'Failed to parse JSON string', { length: trimmed.length }, error instanceof Error ? error : undefined);
|
||||
return value;
|
||||
}
|
||||
// PATHFINDER plan 03 phase 7: fail-fast. Strings that look like JSON but
|
||||
// do not parse are a contract violation, not a passthrough — throw so the
|
||||
// upstream `try` in `handleLine` logs and the caller decides.
|
||||
return JSON.parse(trimmed);
|
||||
}
|
||||
|
||||
private parseApplyPatchFiles(patch: string): string[] {
|
||||
@@ -314,7 +304,6 @@ export class TranscriptEventProcessor {
|
||||
platform: session.platformSource
|
||||
});
|
||||
await this.updateContext(session, watch);
|
||||
session.pendingTools.clear();
|
||||
const key = this.getSessionKey(watch, session.sessionId);
|
||||
this.sessions.delete(key);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { existsSync, statSync, watch as fsWatch, createReadStream } from 'fs';
|
||||
import { basename, join } from 'path';
|
||||
import { basename, join, resolve as resolvePath } from 'path';
|
||||
import { globSync } from 'glob';
|
||||
import { logger } from '../../utils/logger.js';
|
||||
import { expandHomePath } from './config.js';
|
||||
@@ -84,7 +84,7 @@ export class TranscriptWatcher {
|
||||
private processor = new TranscriptEventProcessor();
|
||||
private tailers = new Map<string, FileTailer>();
|
||||
private state: TranscriptWatchState;
|
||||
private rescanTimers: Array<NodeJS.Timeout> = [];
|
||||
private rootWatchers: Array<ReturnType<typeof fsWatch>> = [];
|
||||
|
||||
constructor(private config: TranscriptWatchConfig, private statePath: string) {
|
||||
this.state = loadWatchState(statePath);
|
||||
@@ -101,10 +101,10 @@ export class TranscriptWatcher {
|
||||
tailer.close();
|
||||
}
|
||||
this.tailers.clear();
|
||||
for (const timer of this.rescanTimers) {
|
||||
clearInterval(timer);
|
||||
for (const watcher of this.rootWatchers) {
|
||||
watcher.close();
|
||||
}
|
||||
this.rescanTimers = [];
|
||||
this.rootWatchers = [];
|
||||
}
|
||||
|
||||
private async setupWatch(watch: WatchTarget): Promise<void> {
|
||||
@@ -121,16 +121,65 @@ export class TranscriptWatcher {
|
||||
await this.addTailer(filePath, watch, schema, true);
|
||||
}
|
||||
|
||||
const rescanIntervalMs = watch.rescanIntervalMs ?? 5000;
|
||||
const timer = setInterval(async () => {
|
||||
const newFiles = this.resolveWatchFiles(resolvedPath);
|
||||
for (const filePath of newFiles) {
|
||||
if (!this.tailers.has(filePath)) {
|
||||
await this.addTailer(filePath, watch, schema, false);
|
||||
// PATHFINDER plan 03 phase 5: 5-second rescan timer replaced by a
|
||||
// recursive fs.watch on the configured root. Requires Node 20+ on Linux
|
||||
// for recursive mode (engines.node >= 20.0.0 — already enforced in
|
||||
// package.json).
|
||||
const watchRoot = this.deepestNonGlobAncestor(resolvedPath);
|
||||
if (!watchRoot || !existsSync(watchRoot)) {
|
||||
logger.debug('TRANSCRIPT', 'Watch root does not exist, skipping fs.watch', { watch: watch.name, watchRoot });
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const watcher = fsWatch(watchRoot, { recursive: true, persistent: true }, (event, name) => {
|
||||
if (!name) return; // some events omit filename
|
||||
// Re-resolve the configured path; new files surface here. Restricting
|
||||
// to the configured pattern keeps unrelated edits in the watched root
|
||||
// from triggering tailer churn.
|
||||
const matches = this.resolveWatchFiles(resolvedPath);
|
||||
for (const filePath of matches) {
|
||||
if (!this.tailers.has(filePath)) {
|
||||
void this.addTailer(filePath, watch, schema, false);
|
||||
}
|
||||
}
|
||||
});
|
||||
this.rootWatchers.push(watcher);
|
||||
logger.info('TRANSCRIPT', 'Watching transcript root recursively', { watch: watch.name, watchRoot });
|
||||
} catch (error) {
|
||||
logger.warn('TRANSCRIPT', 'Failed to start recursive fs.watch on transcript root', {
|
||||
watch: watch.name,
|
||||
watchRoot,
|
||||
}, error instanceof Error ? error : undefined);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the deepest path component that contains no glob meta-characters.
|
||||
* Used to anchor `fs.watch(recursive: true)` for both literal directories
|
||||
* and patterns like `~/.codex/sessions/**\/*.jsonl`.
|
||||
*/
|
||||
private deepestNonGlobAncestor(inputPath: string): string {
|
||||
if (!this.hasGlob(inputPath)) {
|
||||
// Literal path: if it's a file, return its directory; otherwise return as-is.
|
||||
if (existsSync(inputPath)) {
|
||||
try {
|
||||
const stat = statSync(inputPath);
|
||||
return stat.isDirectory() ? inputPath : resolvePath(inputPath, '..');
|
||||
} catch {
|
||||
return resolvePath(inputPath, '..');
|
||||
}
|
||||
}
|
||||
}, rescanIntervalMs);
|
||||
this.rescanTimers.push(timer);
|
||||
return inputPath;
|
||||
}
|
||||
|
||||
const segments = inputPath.split('/');
|
||||
const literalSegments: string[] = [];
|
||||
for (const segment of segments) {
|
||||
if (/[*?[\]{}()]/.test(segment)) break;
|
||||
literalSegments.push(segment);
|
||||
}
|
||||
return literalSegments.join('/') || '/';
|
||||
}
|
||||
|
||||
private resolveSchema(watch: WatchTarget): TranscriptSchema | null {
|
||||
|
||||
@@ -87,6 +87,7 @@ import { SearchManager } from './worker/SearchManager.js';
|
||||
import { FormattingService } from './worker/FormattingService.js';
|
||||
import { TimelineService } from './worker/TimelineService.js';
|
||||
import { SessionEventBroadcaster } from './worker/events/SessionEventBroadcaster.js';
|
||||
import { setIngestContext } from './worker/http/shared.js';
|
||||
import { DEFAULT_CONFIG_PATH, DEFAULT_STATE_PATH, expandHomePath, loadTranscriptWatchConfig, writeSampleConfig } from './transcripts/config.js';
|
||||
import { TranscriptWatcher } from './transcripts/watcher.js';
|
||||
|
||||
@@ -197,6 +198,14 @@ export class WorkerService {
|
||||
this.sessionEventBroadcaster = new SessionEventBroadcaster(this.sseBroadcaster, this);
|
||||
this.corpusStore = new CorpusStore();
|
||||
|
||||
// Wire ingest helpers (plan 03 phase 0). Worker-internal callers use these
|
||||
// directly instead of HTTP-loopback into our own routes.
|
||||
setIngestContext({
|
||||
sessionManager: this.sessionManager,
|
||||
dbManager: this.dbManager,
|
||||
eventBroadcaster: this.sessionEventBroadcaster,
|
||||
});
|
||||
|
||||
// Set callback for when sessions are deleted
|
||||
this.sessionManager.setOnSessionDeleted(() => {
|
||||
this.broadcastProcessingStatus();
|
||||
|
||||
@@ -48,9 +48,6 @@ export interface ActiveSession {
|
||||
// Track whether the most recent storage operation persisted a summary record.
|
||||
// Used by the status endpoint so the Stop hook can detect silent summary loss (#1633).
|
||||
lastSummaryStored?: boolean;
|
||||
// Circuit breaker: track consecutive summary failures to prevent infinite retry loops (#1633).
|
||||
// When this reaches MAX_CONSECUTIVE_SUMMARY_FAILURES, further summarize requests are skipped.
|
||||
consecutiveSummaryFailures: number;
|
||||
// Subagent identity carried forward from the most recent claimed pending message.
|
||||
// When observations are parsed and stored, these fields label the resulting rows
|
||||
// so subagent work is attributable. NULL / undefined means the batch came from the main session.
|
||||
@@ -69,6 +66,9 @@ export interface PendingMessage {
|
||||
// Claude Code subagent identity — present only when the hook fired inside a subagent.
|
||||
agentId?: string;
|
||||
agentType?: string;
|
||||
/** Provider-assigned tool-use id; underpins the
|
||||
* UNIQUE(content_session_id, tool_use_id) idempotency index added in plan 01. */
|
||||
toolUseId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -90,6 +90,8 @@ export interface ObservationData {
|
||||
// Claude Code subagent identity — present only when the hook fired inside a subagent.
|
||||
agentId?: string;
|
||||
agentType?: string;
|
||||
/** Provider-assigned tool-use id (plan 03 phase 6 idempotency key). */
|
||||
toolUseId?: string;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
|
||||
@@ -16,7 +16,6 @@ import { PendingMessageStore } from '../sqlite/PendingMessageStore.js';
|
||||
import { SessionQueueProcessor } from '../queue/SessionQueueProcessor.js';
|
||||
import { getSdkProcessForSession, ensureSdkProcessExit } from '../../supervisor/process-registry.js';
|
||||
import { getSupervisor } from '../../supervisor/index.js';
|
||||
import { MAX_CONSECUTIVE_SUMMARY_FAILURES } from '../../sdk/prompts.js';
|
||||
import { RestartGuard } from './RestartGuard.js';
|
||||
|
||||
export class SessionManager {
|
||||
@@ -165,7 +164,6 @@ export class SessionManager {
|
||||
restartGuard: new RestartGuard(),
|
||||
processingMessageIds: [], // CLAIM-CONFIRM: Track message IDs for confirmProcessed()
|
||||
lastGeneratorActivity: Date.now(), // Initialize for stale detection (Issue #1099)
|
||||
consecutiveSummaryFailures: 0, // Circuit breaker for summary retry loop (#1633)
|
||||
pendingAgentId: null, // Subagent identity carried from the most recent claimed message
|
||||
pendingAgentType: null // (null for main-session messages)
|
||||
};
|
||||
@@ -225,7 +223,8 @@ export class SessionManager {
|
||||
prompt_number: data.prompt_number,
|
||||
cwd: data.cwd,
|
||||
agentId: data.agentId,
|
||||
agentType: data.agentType
|
||||
agentType: data.agentType,
|
||||
toolUseId: data.toolUseId,
|
||||
};
|
||||
|
||||
try {
|
||||
@@ -269,17 +268,10 @@ export class SessionManager {
|
||||
session = this.initializeSession(sessionDbId);
|
||||
}
|
||||
|
||||
// Circuit breaker: skip summarize if too many consecutive failures (#1633).
|
||||
// This prevents the infinite loop where each failed summary spawns a new session
|
||||
// with an ever-growing prompt. Counter is in-memory per ActiveSession — it resets
|
||||
// on worker restart, which is acceptable because session state is already ephemeral.
|
||||
if (session.consecutiveSummaryFailures >= MAX_CONSECUTIVE_SUMMARY_FAILURES) {
|
||||
logger.warn('SESSION', `Circuit breaker OPEN: skipping summarize after ${session.consecutiveSummaryFailures} consecutive failures (#1633)`, {
|
||||
sessionId: sessionDbId,
|
||||
contentSessionId: session.contentSessionId
|
||||
});
|
||||
return;
|
||||
}
|
||||
// PATHFINDER plan 03 phase 3: summary-failure circuit breaker deleted.
|
||||
// Each failed parse is independently marked failed via the retry ladder
|
||||
// in PendingMessageStore.markFailed; a storm of bad parses surfaces as
|
||||
// retry exhaustion, not as silent suppression of further requests.
|
||||
|
||||
// CRITICAL: Persist to database FIRST
|
||||
const message: PendingMessage = {
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
*/
|
||||
|
||||
import { logger } from '../../../utils/logger.js';
|
||||
import { parseObservations, parseSummary, type ParsedObservation, type ParsedSummary } from '../../../sdk/parser.js';
|
||||
import { SUMMARY_MODE_MARKER, MAX_CONSECUTIVE_SUMMARY_FAILURES } from '../../../sdk/prompts.js';
|
||||
import { parseAgentXml, type ParsedObservation, type ParsedSummary } from '../../../sdk/parser.js';
|
||||
import { ingestEventBus, type SummaryStoredEvent } from '../http/shared.js';
|
||||
import { updateCursorContextForProject } from '../../integrations/CursorHooksInstaller.js';
|
||||
import { updateFolderClaudeMdFiles } from '../../../utils/claude-md-utils.js';
|
||||
import { getWorkerPort } from '../../../shared/worker-utils.js';
|
||||
@@ -66,39 +66,16 @@ export async function processAgentResponse(
|
||||
session.conversationHistory.push({ role: 'assistant', content: text });
|
||||
}
|
||||
|
||||
// Parse observations and summary
|
||||
const observations = parseObservations(text, session.contentSessionId);
|
||||
// Single fail-fast parse (PATHFINDER plan 03 phase 1+2). On invalid XML,
|
||||
// mark each in-flight pending message failed and stop. The PendingMessageStore
|
||||
// retry ladder is the legitimate primary-path surface for transient failures;
|
||||
// there is no circuit breaker, no coercion.
|
||||
const parsed = parseAgentXml(text, session.contentSessionId);
|
||||
|
||||
// Detect whether the most recent prompt was a summary request.
|
||||
// If so, enable observation-to-summary coercion to prevent the infinite
|
||||
// retry loop described in #1633.
|
||||
const lastMessage = session.conversationHistory.at(-1);
|
||||
const lastUserMessage = lastMessage?.role === 'user'
|
||||
? lastMessage
|
||||
: session.conversationHistory.findLast(m => m.role === 'user') ?? null;
|
||||
const summaryExpected = lastUserMessage?.content?.includes(SUMMARY_MODE_MARKER) ?? false;
|
||||
|
||||
const summary = parseSummary(text, session.sessionDbId, summaryExpected);
|
||||
|
||||
// Detect non-XML responses (auth errors, rate limits, garbled output).
|
||||
// When the response contains no parseable XML and produced no observations,
|
||||
// mark the pending messages as failed instead of confirming them — this prevents
|
||||
// silent data loss when the LLM returns garbage (#1874).
|
||||
const isNonXmlResponse = (
|
||||
text.trim() &&
|
||||
observations.length === 0 &&
|
||||
!summary &&
|
||||
!/<observation>|<summary>|<skip_summary\b/.test(text)
|
||||
);
|
||||
|
||||
if (isNonXmlResponse) {
|
||||
const preview = text.length > 200 ? `${text.slice(0, 200)}...` : text;
|
||||
logger.warn('PARSER', `${agentName} returned non-XML response; marking messages as failed for retry (#1874)`, {
|
||||
if (!parsed.valid) {
|
||||
logger.warn('PARSER', `${agentName} returned unparseable response: ${parsed.reason}`, {
|
||||
sessionId: session.sessionDbId,
|
||||
preview
|
||||
});
|
||||
|
||||
// Mark messages as failed (retry logic in PendingMessageStore handles retries)
|
||||
const pendingStore = sessionManager.getPendingMessageStore();
|
||||
for (const messageId of session.processingMessageIds) {
|
||||
pendingStore.markFailed(messageId);
|
||||
@@ -107,6 +84,17 @@ export async function processAgentResponse(
|
||||
return;
|
||||
}
|
||||
|
||||
let observations: ParsedObservation[] = [];
|
||||
let summary: ParsedSummary | null = null;
|
||||
if (parsed.kind === 'observation') {
|
||||
observations = parsed.data;
|
||||
} else if (!parsed.data.skipped) {
|
||||
// `<skip_summary/>` is a first-class parser result but carries nothing to
|
||||
// persist; the summary storage path is skipped entirely so storeObservations
|
||||
// does not see an empty record.
|
||||
summary = parsed.data;
|
||||
}
|
||||
|
||||
// Convert nullable fields to empty strings for storeSummary (if summary exists)
|
||||
const summaryForStore = normalizeSummaryForStorage(summary);
|
||||
|
||||
@@ -173,30 +161,16 @@ export async function processAgentResponse(
|
||||
// to the Stop hook for silent-summary-loss detection (#1633)
|
||||
session.lastSummaryStored = result.summaryId !== null;
|
||||
|
||||
// Circuit breaker: track consecutive summary failures (#1633).
|
||||
// Only evaluate when a summary was actually expected (summarize message was sent).
|
||||
// Without this guard, the counter would increment on every normal observation
|
||||
// response, tripping the breaker after 3 observations and permanently blocking
|
||||
// summarization — reproducing the data-loss scenario this fix is meant to prevent.
|
||||
if (summaryExpected) {
|
||||
const skippedIntentionally = /<skip_summary\b/.test(text);
|
||||
if (summaryForStore !== null) {
|
||||
// Summary was present in the response — reset the failure counter
|
||||
session.consecutiveSummaryFailures = 0;
|
||||
} else if (skippedIntentionally) {
|
||||
// Explicit <skip_summary/> is a valid protocol response — neither success
|
||||
// nor failure. Leave the counter unchanged so we don't mask a bad run that
|
||||
// happens to end on a skip, but also don't punish intentional skips.
|
||||
} else {
|
||||
// Summary was expected but none was stored — count as failure
|
||||
session.consecutiveSummaryFailures += 1;
|
||||
if (session.consecutiveSummaryFailures >= MAX_CONSECUTIVE_SUMMARY_FAILURES) {
|
||||
logger.error('SESSION', `Circuit breaker: ${session.consecutiveSummaryFailures} consecutive summary failures — further summarize requests will be skipped (#1633)`, {
|
||||
sessionId: session.sessionDbId,
|
||||
contentSessionId: session.contentSessionId
|
||||
});
|
||||
}
|
||||
}
|
||||
// PATHFINDER plan 03 phase 2: emit summaryStoredEvent for the blocking
|
||||
// /api/session/end endpoint (consumed by plan 05 hook surface). Fired once
|
||||
// per successful ingestSummary path (parsed summary that hit the store, or
|
||||
// an explicit <skip_summary/> bypass).
|
||||
if (parsed.kind === 'summary') {
|
||||
const messageId = session.processingMessageIds[0] ?? -1;
|
||||
ingestEventBus.emit('summaryStoredEvent', {
|
||||
sessionId: session.contentSessionId,
|
||||
messageId,
|
||||
} satisfies SummaryStoredEvent);
|
||||
}
|
||||
|
||||
// CLAIM-CONFIRM: Now that storage succeeded, confirm all processing messages (delete from queue)
|
||||
|
||||
340
src/services/worker/http/shared.ts
Normal file
340
src/services/worker/http/shared.ts
Normal file
@@ -0,0 +1,340 @@
|
||||
/**
|
||||
* Worker HTTP shared ingest helpers.
|
||||
*
|
||||
* Per PATHFINDER-2026-04-22 plan 03 phase 0:
|
||||
* `ingestObservation`, `ingestPrompt`, `ingestSummary` are the single
|
||||
* in-process implementation of the worker's three ingest paths. The HTTP
|
||||
* route handlers (cross-process callers) and worker-internal producers
|
||||
* (transcript processor, ResponseProcessor) BOTH delegate here.
|
||||
*
|
||||
* No HTTP loopback. No duplicated insert logic. One helper, N callers.
|
||||
*
|
||||
* Wiring: `WorkerService` registers its `sessionManager`, `dbManager`, and
|
||||
* `sessionEventBroadcaster` once at startup via `setIngestContext`. The
|
||||
* helpers fail fast if called before registration.
|
||||
*/
|
||||
|
||||
import { logger } from '../../../utils/logger.js';
|
||||
import type { SessionManager } from '../SessionManager.js';
|
||||
import type { DatabaseManager } from '../DatabaseManager.js';
|
||||
import type { SessionEventBroadcaster } from '../events/SessionEventBroadcaster.js';
|
||||
import type { ParsedSummary } from '../../../sdk/parser.js';
|
||||
import { stripMemoryTagsFromJson } from '../../../utils/tag-stripping.js';
|
||||
import { isProjectExcluded } from '../../../utils/project-filter.js';
|
||||
import { SettingsDefaultsManager } from '../../../shared/SettingsDefaultsManager.js';
|
||||
import { USER_SETTINGS_PATH } from '../../../shared/paths.js';
|
||||
import { getProjectContext } from '../../../utils/project-name.js';
|
||||
import { normalizePlatformSource } from '../../../shared/platform-source.js';
|
||||
import { PrivacyCheckValidator } from '../validation/PrivacyCheckValidator.js';
|
||||
import { EventEmitter } from 'events';
|
||||
|
||||
// ============================================================================
|
||||
// Event bus — Phase 2 (`summaryStoredEvent`) consumers attach here.
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Event payload emitted exactly once per successful `ingestSummary` call that
|
||||
* actually stored a summary row. `messageId` is the pending_messages row id
|
||||
* that produced the summary; `sessionId` is the contentSessionId.
|
||||
*
|
||||
* The blocking `/api/session/end` endpoint defined by `05-hook-surface.md`
|
||||
* Phase 3 awaits this event to know the summary has landed in the DB.
|
||||
*/
|
||||
export interface SummaryStoredEvent {
|
||||
sessionId: string;
|
||||
messageId: number;
|
||||
}
|
||||
|
||||
class IngestEventBus extends EventEmitter {}
|
||||
|
||||
/**
|
||||
* Process-local event bus for ingestion lifecycle events.
|
||||
*
|
||||
* Single Node EventEmitter — there is no third event-bus in the worker.
|
||||
* `SessionManager` already uses Node EventEmitter for queue notifications
|
||||
* (`src/services/worker/SessionManager.ts:25`), and
|
||||
* `SessionQueueProcessor` consumes EventEmitter events
|
||||
* (`src/services/queue/SessionQueueProcessor.ts:18`); this module follows
|
||||
* the same pattern at the ingestion layer.
|
||||
*/
|
||||
export const ingestEventBus = new IngestEventBus();
|
||||
|
||||
// ============================================================================
|
||||
// Context registration
|
||||
// ============================================================================
|
||||
|
||||
interface IngestContext {
|
||||
sessionManager: SessionManager;
|
||||
dbManager: DatabaseManager;
|
||||
eventBroadcaster: SessionEventBroadcaster;
|
||||
/** Optional callback to (re)start the SDK generator after enqueue. */
|
||||
ensureGeneratorRunning?: (sessionDbId: number, source: string) => void;
|
||||
}
|
||||
|
||||
let ctx: IngestContext | null = null;
|
||||
|
||||
/**
|
||||
* Register the worker-scoped services the ingest helpers depend on.
|
||||
* Called once from `WorkerService` constructor.
|
||||
*/
|
||||
export function setIngestContext(next: IngestContext): void {
|
||||
ctx = next;
|
||||
}
|
||||
|
||||
function requireContext(): IngestContext {
|
||||
if (!ctx) {
|
||||
throw new Error('ingest helpers used before setIngestContext() — wiring bug');
|
||||
}
|
||||
return ctx;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Result type
|
||||
// ============================================================================
|
||||
|
||||
export type IngestResult =
|
||||
| { ok: true; sessionDbId: number; messageId?: number }
|
||||
| { ok: true; status: 'skipped'; reason: string }
|
||||
| { ok: false; reason: string; status?: number };
|
||||
|
||||
// ============================================================================
|
||||
// Observation
|
||||
// ============================================================================
|
||||
|
||||
export interface ObservationPayload {
|
||||
contentSessionId: string;
|
||||
toolName: string;
|
||||
toolInput: unknown;
|
||||
toolResponse: unknown;
|
||||
cwd?: string;
|
||||
platformSource?: string;
|
||||
agentId?: string;
|
||||
agentType?: string;
|
||||
toolUseId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ingest an observation: resolve session, apply project / skip-tool filters,
|
||||
* strip privacy tags, persist to pending_messages, ensure the SDK generator
|
||||
* is running.
|
||||
*
|
||||
* Same implementation for cross-process HTTP callers and worker-internal
|
||||
* callers (transcript processor, ResponseProcessor side-effects).
|
||||
*/
|
||||
export function ingestObservation(payload: ObservationPayload): IngestResult {
|
||||
const { sessionManager, dbManager, eventBroadcaster, ensureGeneratorRunning } = requireContext();
|
||||
|
||||
if (!payload.contentSessionId) {
|
||||
return { ok: false, reason: 'missing contentSessionId', status: 400 };
|
||||
}
|
||||
if (!payload.toolName) {
|
||||
return { ok: false, reason: 'missing toolName', status: 400 };
|
||||
}
|
||||
|
||||
const platformSource = normalizePlatformSource(payload.platformSource);
|
||||
const cwd = typeof payload.cwd === 'string' ? payload.cwd : '';
|
||||
const project = cwd.trim() ? getProjectContext(cwd).primary : '';
|
||||
|
||||
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
||||
|
||||
// Project exclusion (the same gate the hook handler applies).
|
||||
if (cwd && isProjectExcluded(cwd, settings.CLAUDE_MEM_EXCLUDED_PROJECTS)) {
|
||||
return { ok: true, status: 'skipped', reason: 'project_excluded' };
|
||||
}
|
||||
|
||||
// Skip low-value or meta tools per user settings.
|
||||
const skipTools = new Set(
|
||||
settings.CLAUDE_MEM_SKIP_TOOLS.split(',').map(t => t.trim()).filter(Boolean)
|
||||
);
|
||||
if (skipTools.has(payload.toolName)) {
|
||||
return { ok: true, status: 'skipped', reason: 'tool_excluded' };
|
||||
}
|
||||
|
||||
// Skip meta-observations: file operations on session-memory files.
|
||||
const fileOperationTools = new Set(['Edit', 'Write', 'Read', 'NotebookEdit']);
|
||||
if (fileOperationTools.has(payload.toolName) && payload.toolInput && typeof payload.toolInput === 'object') {
|
||||
const input = payload.toolInput as { file_path?: string; notebook_path?: string };
|
||||
const filePath = input.file_path || input.notebook_path;
|
||||
if (filePath && filePath.includes('session-memory')) {
|
||||
return { ok: true, status: 'skipped', reason: 'session_memory_meta' };
|
||||
}
|
||||
}
|
||||
|
||||
const store = dbManager.getSessionStore();
|
||||
|
||||
let sessionDbId: number;
|
||||
let promptNumber: number;
|
||||
try {
|
||||
sessionDbId = store.createSDKSession(payload.contentSessionId, project, '', undefined, platformSource);
|
||||
promptNumber = store.getPromptNumberFromUserPrompts(payload.contentSessionId);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
logger.error('INGEST', 'Observation session resolution failed', {
|
||||
contentSessionId: payload.contentSessionId,
|
||||
toolName: payload.toolName,
|
||||
}, error instanceof Error ? error : new Error(message));
|
||||
return { ok: false, reason: message, status: 500 };
|
||||
}
|
||||
|
||||
// Privacy: skip if user prompt was entirely private.
|
||||
const userPrompt = PrivacyCheckValidator.checkUserPromptPrivacy(
|
||||
store,
|
||||
payload.contentSessionId,
|
||||
promptNumber,
|
||||
'observation',
|
||||
sessionDbId,
|
||||
{ tool_name: payload.toolName }
|
||||
);
|
||||
if (!userPrompt) {
|
||||
return { ok: true, status: 'skipped', reason: 'private' };
|
||||
}
|
||||
|
||||
const cleanedToolInput = payload.toolInput !== undefined
|
||||
? stripMemoryTagsFromJson(JSON.stringify(payload.toolInput))
|
||||
: '{}';
|
||||
const cleanedToolResponse = payload.toolResponse !== undefined
|
||||
? stripMemoryTagsFromJson(JSON.stringify(payload.toolResponse))
|
||||
: '{}';
|
||||
|
||||
sessionManager.queueObservation(sessionDbId, {
|
||||
tool_name: payload.toolName,
|
||||
tool_input: cleanedToolInput,
|
||||
tool_response: cleanedToolResponse,
|
||||
prompt_number: promptNumber,
|
||||
cwd: cwd || (() => {
|
||||
logger.error('INGEST', 'Missing cwd when ingesting observation', {
|
||||
sessionId: sessionDbId,
|
||||
toolName: payload.toolName,
|
||||
});
|
||||
return '';
|
||||
})(),
|
||||
agentId: typeof payload.agentId === 'string' ? payload.agentId : undefined,
|
||||
agentType: typeof payload.agentType === 'string' ? payload.agentType : undefined,
|
||||
});
|
||||
|
||||
ensureGeneratorRunning?.(sessionDbId, 'observation');
|
||||
eventBroadcaster.broadcastObservationQueued(sessionDbId);
|
||||
|
||||
return { ok: true, sessionDbId };
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Summary (queue side — agent processes the request asynchronously)
|
||||
// ============================================================================
|
||||
|
||||
export interface PromptPayload {
|
||||
contentSessionId: string;
|
||||
/** The user prompt text (must not contain stripped tags). */
|
||||
prompt: string;
|
||||
cwd?: string;
|
||||
platformSource?: string;
|
||||
promptNumber?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ingest a user prompt. Used by the SessionStart / UserPromptSubmit hooks and
|
||||
* by transcript-driven session inits. Wraps `SessionStore.appendUserPrompt`
|
||||
* so cross-process and in-process callers share the same path.
|
||||
*/
|
||||
export function ingestPrompt(payload: PromptPayload): IngestResult {
|
||||
const { dbManager } = requireContext();
|
||||
|
||||
if (!payload.contentSessionId) {
|
||||
return { ok: false, reason: 'missing contentSessionId', status: 400 };
|
||||
}
|
||||
if (typeof payload.prompt !== 'string') {
|
||||
return { ok: false, reason: 'missing prompt text', status: 400 };
|
||||
}
|
||||
|
||||
const platformSource = normalizePlatformSource(payload.platformSource);
|
||||
const cwd = typeof payload.cwd === 'string' ? payload.cwd : '';
|
||||
const project = cwd.trim() ? getProjectContext(cwd).primary : '';
|
||||
|
||||
try {
|
||||
const store = dbManager.getSessionStore();
|
||||
const sessionDbId = store.createSDKSession(payload.contentSessionId, project, payload.prompt, undefined, platformSource);
|
||||
return { ok: true, sessionDbId };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { ok: false, reason: message, status: 500 };
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Summary
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Two shapes of ingest:
|
||||
* - "queue a summarize request" (cross-process hook trigger): goes via
|
||||
* `SessionManager.queueSummarize` so the SDK agent will produce the XML
|
||||
* payload on its next iteration.
|
||||
* - "the SDK agent already produced the parsed summary": goes via
|
||||
* `ingestSummary({ parsed, sessionDbId, messageId })`. Stored synchronously,
|
||||
* emits `summaryStoredEvent` for the blocking endpoint in plan 05.
|
||||
*/
|
||||
export type SummaryPayload =
|
||||
| {
|
||||
kind: 'queue';
|
||||
contentSessionId: string;
|
||||
lastAssistantMessage?: string;
|
||||
platformSource?: string;
|
||||
cwd?: string;
|
||||
}
|
||||
| {
|
||||
kind: 'parsed';
|
||||
sessionDbId: number;
|
||||
messageId: number;
|
||||
contentSessionId: string;
|
||||
parsed: ParsedSummary;
|
||||
};
|
||||
|
||||
export function ingestSummary(payload: SummaryPayload): IngestResult {
|
||||
const { sessionManager, dbManager, ensureGeneratorRunning } = requireContext();
|
||||
|
||||
if (payload.kind === 'queue') {
|
||||
if (!payload.contentSessionId) {
|
||||
return { ok: false, reason: 'missing contentSessionId', status: 400 };
|
||||
}
|
||||
|
||||
const platformSource = normalizePlatformSource(payload.platformSource);
|
||||
const cwd = typeof payload.cwd === 'string' ? payload.cwd : '';
|
||||
const project = cwd.trim() ? getProjectContext(cwd).primary : '';
|
||||
|
||||
let sessionDbId: number;
|
||||
try {
|
||||
sessionDbId = dbManager.getSessionStore().createSDKSession(payload.contentSessionId, project, '', undefined, platformSource);
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { ok: false, reason: message, status: 500 };
|
||||
}
|
||||
|
||||
sessionManager.queueSummarize(sessionDbId, payload.lastAssistantMessage);
|
||||
ensureGeneratorRunning?.(sessionDbId, 'summarize');
|
||||
|
||||
return { ok: true, sessionDbId };
|
||||
}
|
||||
|
||||
// kind === 'parsed' — the SDK agent has produced a summary; store via
|
||||
// session store and emit the summaryStoredEvent for blocking consumers.
|
||||
// Skipped summaries (`<skip_summary/>`) are recorded as a successful no-op:
|
||||
// they have no content to persist, but consumers should still be unblocked.
|
||||
if (payload.parsed.skipped) {
|
||||
ingestEventBus.emit('summaryStoredEvent', {
|
||||
sessionId: payload.contentSessionId,
|
||||
messageId: payload.messageId,
|
||||
} satisfies SummaryStoredEvent);
|
||||
return { ok: true, sessionDbId: payload.sessionDbId, messageId: payload.messageId };
|
||||
}
|
||||
|
||||
// Note: the actual storage of the parsed summary remains in
|
||||
// `processAgentResponse` (it is co-transactional with the observation batch).
|
||||
// This branch exists to centralize the event emission so that producers
|
||||
// never forget to fire it.
|
||||
ingestEventBus.emit('summaryStoredEvent', {
|
||||
sessionId: payload.contentSessionId,
|
||||
messageId: payload.messageId,
|
||||
} satisfies SummaryStoredEvent);
|
||||
|
||||
return { ok: true, sessionDbId: payload.sessionDbId, messageId: payload.messageId };
|
||||
}
|
||||
@@ -10,82 +10,97 @@
|
||||
* (should not be persisted to memory)
|
||||
* 4. <system-reminder> - Claude Code-injected system reminders
|
||||
* (CLAUDE.md contents, deferred tool lists, etc. — should not be persisted)
|
||||
* 5. <persisted-output> - Persisted-output payload tag
|
||||
*
|
||||
* EDGE PROCESSING PATTERN: Filter at hook layer before sending to worker/storage.
|
||||
* This keeps the worker service simple and follows one-way data stream.
|
||||
*
|
||||
* PATHFINDER plan 03 phase 8: collapsed countTags + stripTagsInternal into a
|
||||
* single alternation regex. One pass over the input. One helper, N callers
|
||||
* (`stripMemoryTagsFromJson` / `stripMemoryTagsFromPrompt` are thin adapters).
|
||||
*/
|
||||
|
||||
import { logger } from './logger.js';
|
||||
|
||||
/** All tag names this module strips. Single source of truth for the regex. */
|
||||
const TAG_NAMES = [
|
||||
'private',
|
||||
'claude-mem-context',
|
||||
'system_instruction',
|
||||
'system-instruction',
|
||||
'persisted-output',
|
||||
'system-reminder',
|
||||
] as const;
|
||||
type TagName = (typeof TAG_NAMES)[number];
|
||||
|
||||
/**
|
||||
* Single-pass alternation regex covering every privacy / context tag.
|
||||
* Backreference `\1` ensures a closing tag matches the opening name; tag
|
||||
* attributes (e.g. `<system-reminder data-foo="…">`) are tolerated via
|
||||
* `[^>]*`.
|
||||
*/
|
||||
const STRIP_REGEX = new RegExp(
|
||||
`<(${TAG_NAMES.join('|')})\\b[^>]*>[\\s\\S]*?</\\1>`,
|
||||
'g'
|
||||
);
|
||||
|
||||
/**
|
||||
* Regex to match <system-reminder> tags and their content.
|
||||
* Exported for use by transcript parsers that strip system-reminder at read-time.
|
||||
*
|
||||
* Kept as a separate single-tag regex because the active transcript parser
|
||||
* (`src/shared/transcript-parser.ts`) consumes only this one tag and would
|
||||
* otherwise need to re-import the multi-tag list.
|
||||
*/
|
||||
export const SYSTEM_REMINDER_REGEX = /<system-reminder>[\s\S]*?<\/system-reminder>/g;
|
||||
|
||||
/**
|
||||
* Maximum number of tags allowed in a single content block
|
||||
* This protects against ReDoS (Regular Expression Denial of Service) attacks
|
||||
* where malicious input with many nested/unclosed tags could cause catastrophic backtracking
|
||||
*/
|
||||
/** Maximum total stripped-tag count before we log a ReDoS-class anomaly. */
|
||||
const MAX_TAG_COUNT = 100;
|
||||
|
||||
/**
|
||||
* Count total number of opening tags in content
|
||||
* Used for ReDoS protection before regex processing
|
||||
* Strip every recognised tag from `input` in a single pass.
|
||||
*
|
||||
* @returns the stripped string (trimmed) and per-tag counts. Counts are
|
||||
* surfaced to logs for observability but are not used as a control
|
||||
* signal.
|
||||
*/
|
||||
function countTags(content: string): number {
|
||||
const privateCount = (content.match(/<private>/g) || []).length;
|
||||
const contextCount = (content.match(/<claude-mem-context>/g) || []).length;
|
||||
const systemInstructionCount = (content.match(/<system_instruction>/g) || []).length;
|
||||
const systemInstructionHyphenCount = (content.match(/<system-instruction>/g) || []).length;
|
||||
const persistedOutputCount = (content.match(/<persisted-output>/g) || []).length;
|
||||
const systemReminderCount = (content.match(/<system-reminder>/g) || []).length;
|
||||
return privateCount + contextCount + systemInstructionCount + systemInstructionHyphenCount + persistedOutputCount + systemReminderCount;
|
||||
}
|
||||
export function stripTags(input: string): { stripped: string; counts: Record<TagName, number> } {
|
||||
const counts: Record<TagName, number> = Object.fromEntries(
|
||||
TAG_NAMES.map(name => [name, 0])
|
||||
) as Record<TagName, number>;
|
||||
|
||||
/**
|
||||
* Internal function to strip memory tags from content
|
||||
* Shared logic extracted from both JSON and prompt stripping functions
|
||||
*/
|
||||
function stripTagsInternal(content: string): string {
|
||||
// ReDoS protection: limit tag count before regex processing
|
||||
const tagCount = countTags(content);
|
||||
if (tagCount > MAX_TAG_COUNT) {
|
||||
STRIP_REGEX.lastIndex = 0; // /g state is per-instance — reset before each call.
|
||||
|
||||
let total = 0;
|
||||
const stripped = input.replace(STRIP_REGEX, (_, name: TagName) => {
|
||||
counts[name] = (counts[name] ?? 0) + 1;
|
||||
total += 1;
|
||||
return '';
|
||||
});
|
||||
|
||||
if (total > MAX_TAG_COUNT) {
|
||||
logger.warn('SYSTEM', 'tag count exceeds limit', undefined, {
|
||||
tagCount,
|
||||
tagCount: total,
|
||||
maxAllowed: MAX_TAG_COUNT,
|
||||
contentLength: content.length
|
||||
contentLength: input.length,
|
||||
});
|
||||
// Still process but log the anomaly
|
||||
}
|
||||
|
||||
return content
|
||||
.replace(/<claude-mem-context>[\s\S]*?<\/claude-mem-context>/g, '')
|
||||
.replace(/<private>[\s\S]*?<\/private>/g, '')
|
||||
.replace(/<system_instruction>[\s\S]*?<\/system_instruction>/g, '')
|
||||
.replace(/<system-instruction>[\s\S]*?<\/system-instruction>/g, '')
|
||||
.replace(/<persisted-output>[\s\S]*?<\/persisted-output>/g, '')
|
||||
.replace(SYSTEM_REMINDER_REGEX, '')
|
||||
.trim();
|
||||
return { stripped: stripped.trim(), counts };
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip memory tags from JSON-serialized content (tool inputs/responses)
|
||||
*
|
||||
* @param content - Stringified JSON content from tool_input or tool_response
|
||||
* @returns Cleaned content with tags removed, or '{}' if invalid
|
||||
* Strip memory tags from JSON-serialized content (tool inputs/responses).
|
||||
* Thin adapter around `stripTags` — same regex, same single pass.
|
||||
*/
|
||||
export function stripMemoryTagsFromJson(content: string): string {
|
||||
return stripTagsInternal(content);
|
||||
return stripTags(content).stripped;
|
||||
}
|
||||
|
||||
/**
|
||||
* Strip memory tags from user prompt content
|
||||
*
|
||||
* @param content - Raw user prompt text
|
||||
* @returns Cleaned content with tags removed
|
||||
* Strip memory tags from user prompt content.
|
||||
* Thin adapter around `stripTags` — same regex, same single pass.
|
||||
*/
|
||||
export function stripMemoryTagsFromPrompt(content: string): string {
|
||||
return stripTagsInternal(content);
|
||||
return stripTags(content).stripped;
|
||||
}
|
||||
|
||||
@@ -1,266 +0,0 @@
|
||||
/**
|
||||
* TranscriptParser - Properly parse Claude Code transcript JSONL files
|
||||
* Handles all transcript entry types based on validated model
|
||||
*/
|
||||
|
||||
import { readFileSync } from 'fs';
|
||||
import { logger } from './logger.js';
|
||||
import { SYSTEM_REMINDER_REGEX } from './tag-stripping.js';
|
||||
import type {
|
||||
TranscriptEntry,
|
||||
UserTranscriptEntry,
|
||||
AssistantTranscriptEntry,
|
||||
SummaryTranscriptEntry,
|
||||
SystemTranscriptEntry,
|
||||
QueueOperationTranscriptEntry,
|
||||
ContentItem,
|
||||
TextContent,
|
||||
} from '../types/transcript.js';
|
||||
|
||||
export interface ParseStats {
|
||||
totalLines: number;
|
||||
parsedEntries: number;
|
||||
failedLines: number;
|
||||
entriesByType: Record<string, number>;
|
||||
failureRate: number;
|
||||
}
|
||||
|
||||
export class TranscriptParser {
|
||||
private entries: TranscriptEntry[] = [];
|
||||
private parseErrors: Array<{ lineNumber: number; error: string }> = [];
|
||||
|
||||
constructor(transcriptPath: string) {
|
||||
this.parseTranscript(transcriptPath);
|
||||
}
|
||||
|
||||
private parseTranscript(transcriptPath: string): void {
|
||||
const content = readFileSync(transcriptPath, 'utf-8').trim();
|
||||
if (!content) return;
|
||||
|
||||
const lines = content.split('\n');
|
||||
|
||||
lines.forEach((line, index) => {
|
||||
try {
|
||||
const entry = JSON.parse(line) as TranscriptEntry;
|
||||
this.entries.push(entry);
|
||||
} catch (error) {
|
||||
logger.debug('PARSER', 'Failed to parse transcript line', { lineNumber: index + 1 }, error as Error);
|
||||
this.parseErrors.push({
|
||||
lineNumber: index + 1,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Log summary if there were parse errors
|
||||
if (this.parseErrors.length > 0) {
|
||||
logger.error('PARSER', `Failed to parse ${this.parseErrors.length} lines`, {
|
||||
path: transcriptPath,
|
||||
totalLines: lines.length,
|
||||
errorCount: this.parseErrors.length
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all entries of a specific type
|
||||
*/
|
||||
getEntriesByType<T extends TranscriptEntry>(type: T['type']): T[] {
|
||||
return this.entries.filter((e) => e.type === type) as T[];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all user entries
|
||||
*/
|
||||
getUserEntries(): UserTranscriptEntry[] {
|
||||
return this.getEntriesByType<UserTranscriptEntry>('user');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all assistant entries
|
||||
*/
|
||||
getAssistantEntries(): AssistantTranscriptEntry[] {
|
||||
return this.getEntriesByType<AssistantTranscriptEntry>('assistant');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all summary entries
|
||||
*/
|
||||
getSummaryEntries(): SummaryTranscriptEntry[] {
|
||||
return this.getEntriesByType<SummaryTranscriptEntry>('summary');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all system entries
|
||||
*/
|
||||
getSystemEntries(): SystemTranscriptEntry[] {
|
||||
return this.getEntriesByType<SystemTranscriptEntry>('system');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all queue operation entries
|
||||
*/
|
||||
getQueueOperationEntries(): QueueOperationTranscriptEntry[] {
|
||||
return this.getEntriesByType<QueueOperationTranscriptEntry>('queue-operation');
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last entry of a specific type
|
||||
*/
|
||||
getLastEntryByType<T extends TranscriptEntry>(type: T['type']): T | null {
|
||||
const entries = this.getEntriesByType<T>(type);
|
||||
return entries.length > 0 ? entries[entries.length - 1] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text content from content items
|
||||
*/
|
||||
private extractTextFromContent(content: string | ContentItem[]): string {
|
||||
if (typeof content === 'string') {
|
||||
return content;
|
||||
}
|
||||
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.filter((item): item is TextContent => item.type === 'text')
|
||||
.map((item) => item.text)
|
||||
.join('\n');
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last user message text (finds last entry with actual text content)
|
||||
*/
|
||||
getLastUserMessage(): string {
|
||||
const userEntries = this.getUserEntries();
|
||||
|
||||
// Iterate backward to find the last user message with text content
|
||||
for (let i = userEntries.length - 1; i >= 0; i--) {
|
||||
const entry = userEntries[i];
|
||||
if (!entry?.message?.content) continue;
|
||||
|
||||
const text = this.extractTextFromContent(entry.message.content);
|
||||
if (text) return text;
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get last assistant message text (finds last entry with text content, with optional system-reminder filtering)
|
||||
*/
|
||||
getLastAssistantMessage(filterSystemReminders = true): string {
|
||||
const assistantEntries = this.getAssistantEntries();
|
||||
|
||||
// Iterate backward to find the last assistant message with text content
|
||||
for (let i = assistantEntries.length - 1; i >= 0; i--) {
|
||||
const entry = assistantEntries[i];
|
||||
if (!entry?.message?.content) continue;
|
||||
|
||||
let text = this.extractTextFromContent(entry.message.content);
|
||||
if (!text) continue;
|
||||
|
||||
if (filterSystemReminders) {
|
||||
// Filter out system-reminder tags and their content
|
||||
text = text.replace(SYSTEM_REMINDER_REGEX, '');
|
||||
// Clean up excessive whitespace
|
||||
text = text.replace(/\n{3,}/g, '\n\n').trim();
|
||||
}
|
||||
|
||||
if (text) return text;
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all tool use operations from assistant entries
|
||||
*/
|
||||
getToolUseHistory(): Array<{ name: string; timestamp: string; input: any }> {
|
||||
const toolUses: Array<{ name: string; timestamp: string; input: any }> = [];
|
||||
|
||||
for (const entry of this.getAssistantEntries()) {
|
||||
if (Array.isArray(entry.message.content)) {
|
||||
for (const item of entry.message.content) {
|
||||
if (item.type === 'tool_use') {
|
||||
toolUses.push({
|
||||
name: item.name,
|
||||
timestamp: entry.timestamp,
|
||||
input: item.input,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return toolUses;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get total token usage across all assistant messages
|
||||
*/
|
||||
getTotalTokenUsage(): {
|
||||
inputTokens: number;
|
||||
outputTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
cacheReadTokens: number;
|
||||
} {
|
||||
const assistantEntries = this.getAssistantEntries();
|
||||
|
||||
return assistantEntries.reduce(
|
||||
(acc, entry) => {
|
||||
const usage = entry.message.usage;
|
||||
if (usage) {
|
||||
acc.inputTokens += usage.input_tokens || 0;
|
||||
acc.outputTokens += usage.output_tokens || 0;
|
||||
acc.cacheCreationTokens += usage.cache_creation_input_tokens || 0;
|
||||
acc.cacheReadTokens += usage.cache_read_input_tokens || 0;
|
||||
}
|
||||
return acc;
|
||||
},
|
||||
{
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
cacheCreationTokens: 0,
|
||||
cacheReadTokens: 0,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get parse statistics
|
||||
*/
|
||||
getParseStats(): ParseStats {
|
||||
const entriesByType: Record<string, number> = {};
|
||||
|
||||
for (const entry of this.entries) {
|
||||
entriesByType[entry.type] = (entriesByType[entry.type] || 0) + 1;
|
||||
}
|
||||
|
||||
const totalLines = this.entries.length + this.parseErrors.length;
|
||||
|
||||
return {
|
||||
totalLines,
|
||||
parsedEntries: this.entries.length,
|
||||
failedLines: this.parseErrors.length,
|
||||
entriesByType,
|
||||
failureRate: totalLines > 0 ? this.parseErrors.length / totalLines : 0,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Get parse errors
|
||||
*/
|
||||
getParseErrors(): Array<{ lineNumber: number; error: string }> {
|
||||
return this.parseErrors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all entries (raw)
|
||||
*/
|
||||
getAllEntries(): TranscriptEntry[] {
|
||||
return this.entries;
|
||||
}
|
||||
}
|
||||
@@ -1,37 +1,56 @@
|
||||
/**
|
||||
* Tests for parseSummary (fix for #1360)
|
||||
* Tests for parseAgentXml summary path (PATHFINDER plan 03 phase 1).
|
||||
*
|
||||
* Validates that false-positive summary matches (no sub-tags) are rejected
|
||||
* while real summaries — even with some missing fields — are still saved.
|
||||
* Validates that the discriminated-union parser:
|
||||
* - rejects responses with no recognised root element (`{ valid: false }`),
|
||||
* - rejects empty / no-sub-tag <summary> blocks (former #1360 false-positive),
|
||||
* - returns a populated summary when at least one sub-tag is present,
|
||||
* - treats <skip_summary reason="…"/> as a first-class summary case,
|
||||
* - DOES NOT coerce <observation> blocks into summary fields (former
|
||||
* #1633 fallback path is deleted; the caller must mark the message failed
|
||||
* and let the retry ladder do its job — principle 1 + principle 2).
|
||||
*/
|
||||
import { describe, it, expect } from 'bun:test';
|
||||
import { parseSummary } from '../../src/sdk/parser.js';
|
||||
import { describe, it, expect, mock } from 'bun:test';
|
||||
|
||||
describe('parseSummary', () => {
|
||||
it('returns null when no <summary> tag present and coercion disabled', () => {
|
||||
expect(parseSummary('<observation><title>foo</title></observation>')).toBeNull();
|
||||
mock.module('../../src/services/domain/ModeManager.js', () => ({
|
||||
ModeManager: {
|
||||
getInstance: () => ({
|
||||
getActiveMode: () => ({
|
||||
observation_types: [{ id: 'bugfix' }, { id: 'discovery' }, { id: 'refactor' }],
|
||||
}),
|
||||
}),
|
||||
},
|
||||
}));
|
||||
|
||||
import { parseAgentXml } from '../../src/sdk/parser.js';
|
||||
|
||||
describe('parseAgentXml — summaries', () => {
|
||||
it('returns invalid when response is plain text (no XML)', () => {
|
||||
const result = parseAgentXml('Some plain text response without any XML tags');
|
||||
expect(result.valid).toBe(false);
|
||||
});
|
||||
|
||||
it('returns null when no <summary> or <observation> tags present', () => {
|
||||
expect(parseSummary('Some plain text response without any XML tags')).toBeNull();
|
||||
it('returns invalid when <summary> has no sub-tags (false positive — was #1360)', () => {
|
||||
// observation response that accidentally contains <summary>some text</summary>
|
||||
const result = parseAgentXml('<observation>done <summary>some content here</summary></observation>');
|
||||
// The first root is <observation>, which has no parseable content; result must be invalid.
|
||||
expect(result.valid).toBe(false);
|
||||
});
|
||||
|
||||
it('returns null when <summary> has no sub-tags (false positive — fix for #1360)', () => {
|
||||
// This is the bug: observation response accidentally contains <summary>some text</summary>
|
||||
expect(parseSummary('<observation>done <summary>some content here</summary></observation>')).toBeNull();
|
||||
it('returns invalid for bare <summary> with only plain text, no sub-tags', () => {
|
||||
const result = parseAgentXml('<summary>This session was productive.</summary>');
|
||||
expect(result.valid).toBe(false);
|
||||
});
|
||||
|
||||
it('returns null for bare <summary> with only plain text, no sub-tags', () => {
|
||||
expect(parseSummary('<summary>This session was productive.</summary>')).toBeNull();
|
||||
});
|
||||
|
||||
it('returns summary when at least one sub-tag is present (respects maintainer note)', () => {
|
||||
it('returns valid summary when at least one sub-tag is present', () => {
|
||||
const text = `<summary><request>Fix the bug</request></summary>`;
|
||||
const result = parseSummary(text);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('Fix the bug');
|
||||
expect(result?.investigated).toBeNull();
|
||||
expect(result?.learned).toBeNull();
|
||||
const result = parseAgentXml(text);
|
||||
expect(result.valid).toBe(true);
|
||||
if (result.valid && result.kind === 'summary') {
|
||||
expect(result.data.request).toBe('Fix the bug');
|
||||
expect(result.data.investigated).toBeNull();
|
||||
expect(result.data.learned).toBeNull();
|
||||
}
|
||||
});
|
||||
|
||||
it('returns full summary when all fields are present', () => {
|
||||
@@ -42,108 +61,49 @@ describe('parseSummary', () => {
|
||||
<completed>Extended token TTL to 24h</completed>
|
||||
<next_steps>Monitor error rates</next_steps>
|
||||
</summary>`;
|
||||
const result = parseSummary(text);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('Fix login bug');
|
||||
expect(result?.investigated).toBe('Auth flow and JWT expiry');
|
||||
expect(result?.learned).toBe('Token was expiring too soon');
|
||||
expect(result?.completed).toBe('Extended token TTL to 24h');
|
||||
expect(result?.next_steps).toBe('Monitor error rates');
|
||||
const result = parseAgentXml(text);
|
||||
expect(result.valid).toBe(true);
|
||||
if (result.valid && result.kind === 'summary') {
|
||||
expect(result.data.request).toBe('Fix login bug');
|
||||
expect(result.data.investigated).toBe('Auth flow and JWT expiry');
|
||||
expect(result.data.learned).toBe('Token was expiring too soon');
|
||||
expect(result.data.completed).toBe('Extended token TTL to 24h');
|
||||
expect(result.data.next_steps).toBe('Monitor error rates');
|
||||
}
|
||||
});
|
||||
|
||||
it('returns null when skip_summary tag is present', () => {
|
||||
expect(parseSummary('<skip_summary reason="no work done"/>')).toBeNull();
|
||||
it('treats <skip_summary reason="…"/> as a first-class summary with skipped:true', () => {
|
||||
const result = parseAgentXml('<skip_summary reason="no work done"/>');
|
||||
expect(result.valid).toBe(true);
|
||||
if (result.valid && result.kind === 'summary') {
|
||||
expect(result.data.skipped).toBe(true);
|
||||
expect(result.data.skip_reason).toBe('no work done');
|
||||
}
|
||||
});
|
||||
|
||||
// Observation-to-summary coercion tests (#1633)
|
||||
it('coerces <observation> with content into a summary when coerceFromObservation=true (#1633)', () => {
|
||||
const result = parseSummary('<observation><title>foo</title></observation>', undefined, true);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('foo');
|
||||
expect(result?.completed).toBe('foo');
|
||||
it('does NOT coerce <observation> into a summary (former #1633 path deleted)', () => {
|
||||
const result = parseAgentXml('<observation><title>foo</title></observation>');
|
||||
expect(result.valid).toBe(true);
|
||||
if (result.valid) {
|
||||
expect(result.kind).toBe('observation');
|
||||
}
|
||||
});
|
||||
|
||||
it('coerces observation with narrative into summary with investigated field (#1633)', () => {
|
||||
const text = `<observation>
|
||||
<type>refactor</type>
|
||||
<title>UObjectArray refactored</title>
|
||||
<narrative>Removed local XXXX and migrated to new pattern</narrative>
|
||||
</observation>`;
|
||||
const result = parseSummary(text, undefined, true);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('UObjectArray refactored');
|
||||
expect(result?.investigated).toBe('Removed local XXXX and migrated to new pattern');
|
||||
});
|
||||
|
||||
it('coerces observation with facts into summary with learned field (#1633)', () => {
|
||||
const text = `<observation>
|
||||
<type>discovery</type>
|
||||
<title>JWT token handling</title>
|
||||
<facts>
|
||||
<fact>Tokens expire after 1 hour</fact>
|
||||
<fact>Refresh flow uses rotating keys</fact>
|
||||
</facts>
|
||||
</observation>`;
|
||||
const result = parseSummary(text, undefined, true);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('JWT token handling');
|
||||
expect(result?.learned).toBe('Tokens expire after 1 hour; Refresh flow uses rotating keys');
|
||||
});
|
||||
|
||||
it('coerces observation with subtitle into completed field (#1633)', () => {
|
||||
const text = `<observation>
|
||||
<type>config</type>
|
||||
<title>Database migration</title>
|
||||
<subtitle>Added new index for performance</subtitle>
|
||||
</observation>`;
|
||||
const result = parseSummary(text, undefined, true);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.completed).toBe('Database migration — Added new index for performance');
|
||||
});
|
||||
|
||||
it('returns null for empty observation even with coercion enabled (#1633)', () => {
|
||||
const text = `<observation><type>config</type></observation>`;
|
||||
expect(parseSummary(text, undefined, true)).toBeNull();
|
||||
});
|
||||
|
||||
it('prefers <summary> tags over observation coercion when both present (#1633)', () => {
|
||||
it('prefers <summary> over <observation> when both present', () => {
|
||||
const text = `<observation><title>obs title</title></observation>
|
||||
<summary><request>summary request</request></summary>`;
|
||||
const result = parseSummary(text, undefined, true);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('summary request');
|
||||
const result = parseAgentXml(text);
|
||||
// First root by position is observation → that wins. Caller must pick the
|
||||
// right turn (summary vs observation) by sending only summary prompts on
|
||||
// summary turns. This is the contract; it is not coercion.
|
||||
expect(result.valid).toBe(true);
|
||||
if (result.valid) {
|
||||
expect(result.kind).toBe('observation');
|
||||
}
|
||||
});
|
||||
|
||||
it('falls back to observation coercion when <summary> matches but has empty sub-tags (#1633)', () => {
|
||||
// LLM wraps an empty summary around real observation content — without the
|
||||
// fallback, the empty-subtag guard (#1360) rejects the summary and we lose
|
||||
// the observation content, resurrecting the retry loop.
|
||||
const text = `<summary></summary>
|
||||
<observation>
|
||||
<title>the real work</title>
|
||||
<narrative>what actually happened</narrative>
|
||||
</observation>`;
|
||||
const result = parseSummary(text, undefined, true);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('the real work');
|
||||
expect(result?.investigated).toBe('what actually happened');
|
||||
});
|
||||
|
||||
it('empty <summary> with no observation content still returns null (coercion disabled)', () => {
|
||||
const text = '<summary></summary>';
|
||||
expect(parseSummary(text, undefined, true)).toBeNull();
|
||||
});
|
||||
|
||||
it('skips empty leading observation blocks and coerces from the first populated one (#1633)', () => {
|
||||
const text = `<observation><type>discovery</type></observation>
|
||||
<observation>
|
||||
<type>bugfix</type>
|
||||
<title>second block has content</title>
|
||||
<narrative>fixed the crash</narrative>
|
||||
</observation>`;
|
||||
const result = parseSummary(text, undefined, true);
|
||||
expect(result).not.toBeNull();
|
||||
expect(result?.request).toBe('second block has content');
|
||||
expect(result?.investigated).toBe('fixed the crash');
|
||||
it('returns invalid for empty input', () => {
|
||||
expect(parseAgentXml('').valid).toBe(false);
|
||||
expect(parseAgentXml(' \n ').valid).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -11,9 +11,16 @@ mock.module('../../src/services/domain/ModeManager.js', () => ({
|
||||
},
|
||||
}));
|
||||
|
||||
import { parseObservations } from '../../src/sdk/parser.js';
|
||||
import { parseAgentXml } from '../../src/sdk/parser.js';
|
||||
|
||||
describe('parseObservations', () => {
|
||||
function expectObservation(raw: string) {
|
||||
const result = parseAgentXml(raw);
|
||||
if (!result.valid) throw new Error(`expected valid observation, got reason: ${result.reason}`);
|
||||
if (result.kind !== 'observation') throw new Error(`expected observation, got ${result.kind}`);
|
||||
return result.data;
|
||||
}
|
||||
|
||||
describe('parseAgentXml — observations', () => {
|
||||
it('returns a populated observation when title is present', () => {
|
||||
const xml = `<observation>
|
||||
<type>discovery</type>
|
||||
@@ -21,7 +28,7 @@ describe('parseObservations', () => {
|
||||
<narrative>The token refresh logic skips expired tokens.</narrative>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
const result = expectObservation(xml);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].title).toBe('Found a bug in auth module');
|
||||
@@ -35,7 +42,7 @@ describe('parseObservations', () => {
|
||||
<narrative>Patched the null pointer dereference in session handler.</narrative>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
const result = expectObservation(xml);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].title).toBeNull();
|
||||
@@ -48,7 +55,7 @@ describe('parseObservations', () => {
|
||||
<facts><fact>File limit is hardcoded to 5</fact></facts>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
const result = expectObservation(xml);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].facts).toEqual(['File limit is hardcoded to 5']);
|
||||
@@ -60,7 +67,7 @@ describe('parseObservations', () => {
|
||||
<concepts><concept>dependency-injection</concept></concepts>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
const result = expectObservation(xml);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].concepts).toEqual(['dependency-injection']);
|
||||
@@ -73,9 +80,8 @@ describe('parseObservations', () => {
|
||||
<type>bugfix</type>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
|
||||
expect(result).toHaveLength(0);
|
||||
const result = parseAgentXml(xml);
|
||||
expect(result.valid).toBe(false);
|
||||
});
|
||||
|
||||
it('filters out ghost observation with empty tags but no text content (#1625)', () => {
|
||||
@@ -87,9 +93,8 @@ describe('parseObservations', () => {
|
||||
<concepts></concepts>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
|
||||
expect(result).toHaveLength(0);
|
||||
const result = parseAgentXml(xml);
|
||||
expect(result.valid).toBe(false);
|
||||
});
|
||||
|
||||
it('filters out multiple ghost observations while keeping valid ones (#1625)', () => {
|
||||
@@ -102,7 +107,7 @@ describe('parseObservations', () => {
|
||||
<observation><type>refactor</type><title></title><narrative> </narrative></observation>
|
||||
`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
const result = expectObservation(xml);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].title).toBe('Real observation');
|
||||
@@ -116,9 +121,8 @@ describe('parseObservations', () => {
|
||||
<subtitle>Only a subtitle, no real content</subtitle>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
|
||||
expect(result).toHaveLength(0);
|
||||
const result = parseAgentXml(xml);
|
||||
expect(result.valid).toBe(false);
|
||||
});
|
||||
|
||||
it('uses first mode type as fallback when type is missing', () => {
|
||||
@@ -126,16 +130,19 @@ describe('parseObservations', () => {
|
||||
<title>Missing type field</title>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
const result = expectObservation(xml);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
// First type in mocked mode is 'bugfix'
|
||||
expect(result[0].type).toBe('bugfix');
|
||||
});
|
||||
|
||||
it('returns empty array when no observation blocks are present', () => {
|
||||
const result = parseObservations('Some text without any observations.');
|
||||
expect(result).toHaveLength(0);
|
||||
it('returns a fail-fast result when no observation/summary blocks are present', () => {
|
||||
const result = parseAgentXml('Some text without any observations.');
|
||||
expect(result.valid).toBe(false);
|
||||
if (!result.valid) {
|
||||
expect(result.reason).toMatch(/unknown root|empty/);
|
||||
}
|
||||
});
|
||||
|
||||
it('parses files_read and files_modified arrays correctly', () => {
|
||||
@@ -146,7 +153,7 @@ describe('parseObservations', () => {
|
||||
<files_modified><file>src/utils.ts</file></files_modified>
|
||||
</observation>`;
|
||||
|
||||
const result = parseObservations(xml);
|
||||
const result = expectObservation(xml);
|
||||
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].files_read).toEqual(['src/utils.ts', 'src/parser.ts']);
|
||||
|
||||
@@ -131,7 +131,6 @@ describe('ResponseProcessor', () => {
|
||||
conversationHistory: [],
|
||||
currentProvider: 'claude',
|
||||
processingMessageIds: [], // CLAIM-CONFIRM pattern: track message IDs being processed
|
||||
consecutiveSummaryFailures: 0,
|
||||
...overrides,
|
||||
} as ActiveSession;
|
||||
}
|
||||
@@ -214,9 +213,15 @@ describe('ResponseProcessor', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('non-XML observer responses', () => {
|
||||
it('warns when the observer returns prose that will be discarded', async () => {
|
||||
const session = createMockSession();
|
||||
describe('non-XML observer responses (fail-fast — plan 03 phase 2)', () => {
|
||||
it('warns and marks messages failed when the observer returns non-XML prose', async () => {
|
||||
const markFailed = mock(() => {});
|
||||
mockSessionManager = {
|
||||
getMessageIterator: async function* () { yield* []; },
|
||||
getPendingMessageStore: () => ({ markFailed, confirmProcessed: mock(() => {}) }),
|
||||
} as unknown as SessionManager;
|
||||
|
||||
const session = createMockSession({ processingMessageIds: [7] });
|
||||
const responseText = 'Skipping — repeated log scan with no new findings.';
|
||||
|
||||
await processAgentResponse(
|
||||
@@ -232,30 +237,20 @@ describe('ResponseProcessor', () => {
|
||||
|
||||
expect(logger.warn).toHaveBeenCalledWith(
|
||||
'PARSER',
|
||||
'TestAgent returned non-XML response; observation content was discarded',
|
||||
expect.objectContaining({
|
||||
sessionId: 1,
|
||||
preview: responseText
|
||||
})
|
||||
expect.stringMatching(/^TestAgent returned unparseable response:/),
|
||||
expect.objectContaining({ sessionId: 1 })
|
||||
);
|
||||
const [, , observations, summary] = mockStoreObservations.mock.calls[0];
|
||||
expect(observations).toHaveLength(0);
|
||||
expect(summary).toBeNull();
|
||||
expect(markFailed).toHaveBeenCalledWith(7);
|
||||
expect(mockStoreObservations).not.toHaveBeenCalled();
|
||||
});
|
||||
});
|
||||
|
||||
describe('parsing summary from XML response', () => {
|
||||
it('should parse summary from response', async () => {
|
||||
const session = createMockSession();
|
||||
// PATHFINDER plan 03 phase 1: parseAgentXml returns one kind per call.
|
||||
// Summary-only response exercises the summary path.
|
||||
const responseText = `
|
||||
<observation>
|
||||
<type>discovery</type>
|
||||
<title>Test</title>
|
||||
<facts></facts>
|
||||
<concepts></concepts>
|
||||
<files_read></files_read>
|
||||
<files_modified></files_modified>
|
||||
</observation>
|
||||
<summary>
|
||||
<request>Build login form</request>
|
||||
<investigated>Reviewed existing forms</investigated>
|
||||
@@ -374,7 +369,11 @@ describe('ResponseProcessor', () => {
|
||||
expect(memorySessionId).toBe('memory-session-456');
|
||||
expect(project).toBe('test-project');
|
||||
expect(observations).toHaveLength(1);
|
||||
expect(summary).not.toBeNull();
|
||||
// PATHFINDER plan 03 phase 1: parseAgentXml returns ONE kind per call.
|
||||
// The first recognised root wins (here: <observation>), so the summary
|
||||
// in the same response is NOT extracted — the caller is expected to
|
||||
// issue observation turns and summary turns separately.
|
||||
expect(summary).toBeNull();
|
||||
expect(promptNumber).toBe(5);
|
||||
expect(tokens).toBe(100);
|
||||
expect(timestamp).toBe(1700000000000);
|
||||
@@ -434,16 +433,21 @@ describe('ResponseProcessor', () => {
|
||||
});
|
||||
|
||||
it('should broadcast summary via SSE', async () => {
|
||||
// PATHFINDER plan 03 phase 1: parseAgentXml returns one kind per call,
|
||||
// so summary broadcasts require a summary-only response.
|
||||
mockStoreObservations = mock(() => ({
|
||||
observationIds: [],
|
||||
summaryId: 99,
|
||||
createdAtEpoch: 1700000000000,
|
||||
} as StorageResult));
|
||||
(mockDbManager.getSessionStore as any) = () => ({
|
||||
storeObservations: mockStoreObservations,
|
||||
ensureMemorySessionIdRegistered: mock(() => {}),
|
||||
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
||||
});
|
||||
|
||||
const session = createMockSession();
|
||||
const responseText = `
|
||||
<observation>
|
||||
<type>discovery</type>
|
||||
<title>Test</title>
|
||||
<facts></facts>
|
||||
<concepts></concepts>
|
||||
<files_read></files_read>
|
||||
<files_modified></files_modified>
|
||||
</observation>
|
||||
<summary>
|
||||
<request>Build feature</request>
|
||||
<investigated>Reviewed code</investigated>
|
||||
@@ -473,70 +477,44 @@ describe('ResponseProcessor', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('handling empty response', () => {
|
||||
it('should handle empty response gracefully', async () => {
|
||||
const session = createMockSession();
|
||||
describe('handling empty / non-XML response (fail-fast — plan 03 phase 2)', () => {
|
||||
it('marks in-flight messages failed and does NOT call storeObservations on empty response', async () => {
|
||||
const markFailed = mock(() => {});
|
||||
mockSessionManager = {
|
||||
getMessageIterator: async function* () { yield* []; },
|
||||
getPendingMessageStore: () => ({ markFailed, confirmProcessed: mock(() => {}) }),
|
||||
} as unknown as SessionManager;
|
||||
|
||||
const session = createMockSession({ processingMessageIds: [1, 2, 3] });
|
||||
const responseText = '';
|
||||
|
||||
// Mock to handle empty observations
|
||||
mockStoreObservations = mock(() => ({
|
||||
observationIds: [],
|
||||
summaryId: null,
|
||||
createdAtEpoch: 1700000000000,
|
||||
}));
|
||||
(mockDbManager.getSessionStore as any) = () => ({
|
||||
storeObservations: mockStoreObservations,
|
||||
ensureMemorySessionIdRegistered: mock(() => {}),
|
||||
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
||||
});
|
||||
|
||||
await processAgentResponse(
|
||||
responseText,
|
||||
session,
|
||||
mockDbManager,
|
||||
mockSessionManager,
|
||||
mockWorker,
|
||||
100,
|
||||
null,
|
||||
'TestAgent'
|
||||
responseText, session, mockDbManager, mockSessionManager, mockWorker,
|
||||
100, null, 'TestAgent'
|
||||
);
|
||||
|
||||
// Should still call storeObservations with empty arrays
|
||||
expect(mockStoreObservations).toHaveBeenCalledTimes(1);
|
||||
const [, , observations, summary] = mockStoreObservations.mock.calls[0];
|
||||
expect(observations).toHaveLength(0);
|
||||
expect(summary).toBeNull();
|
||||
expect(mockStoreObservations).not.toHaveBeenCalled();
|
||||
expect(markFailed).toHaveBeenCalledTimes(3);
|
||||
expect(session.processingMessageIds).toEqual([]);
|
||||
});
|
||||
|
||||
it('should handle response with only text (no XML)', async () => {
|
||||
const session = createMockSession();
|
||||
it('marks in-flight messages failed and does NOT call storeObservations on plain-text response', async () => {
|
||||
const markFailed = mock(() => {});
|
||||
mockSessionManager = {
|
||||
getMessageIterator: async function* () { yield* []; },
|
||||
getPendingMessageStore: () => ({ markFailed, confirmProcessed: mock(() => {}) }),
|
||||
} as unknown as SessionManager;
|
||||
|
||||
const session = createMockSession({ processingMessageIds: [42] });
|
||||
const responseText = 'This is just plain text without any XML tags.';
|
||||
|
||||
mockStoreObservations = mock(() => ({
|
||||
observationIds: [],
|
||||
summaryId: null,
|
||||
createdAtEpoch: 1700000000000,
|
||||
}));
|
||||
(mockDbManager.getSessionStore as any) = () => ({
|
||||
storeObservations: mockStoreObservations,
|
||||
ensureMemorySessionIdRegistered: mock(() => {}),
|
||||
getSessionById: mock(() => ({ memory_session_id: 'memory-session-456' })),
|
||||
});
|
||||
|
||||
await processAgentResponse(
|
||||
responseText,
|
||||
session,
|
||||
mockDbManager,
|
||||
mockSessionManager,
|
||||
mockWorker,
|
||||
100,
|
||||
null,
|
||||
'TestAgent'
|
||||
responseText, session, mockDbManager, mockSessionManager, mockWorker,
|
||||
100, null, 'TestAgent'
|
||||
);
|
||||
|
||||
expect(mockStoreObservations).toHaveBeenCalledTimes(1);
|
||||
const [, , observations] = mockStoreObservations.mock.calls[0];
|
||||
expect(observations).toHaveLength(0);
|
||||
expect(mockStoreObservations).not.toHaveBeenCalled();
|
||||
expect(markFailed).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -669,7 +647,11 @@ describe('ResponseProcessor', () => {
|
||||
const session = createMockSession({
|
||||
memorySessionId: null, // Missing memory session ID
|
||||
});
|
||||
const responseText = '<observation><type>discovery</type></observation>';
|
||||
const responseText = `<observation>
|
||||
<type>discovery</type>
|
||||
<title>some title</title>
|
||||
<narrative>some narrative</narrative>
|
||||
</observation>`;
|
||||
|
||||
await expect(
|
||||
processAgentResponse(
|
||||
@@ -729,7 +711,14 @@ describe('ResponseProcessor', () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe('circuit breaker: consecutiveSummaryFailures counter (#1633)', () => {
|
||||
// PATHFINDER plan 03 phase 3: circuit breaker (consecutiveSummaryFailures) deleted.
|
||||
// Former tests covered: counter stability on observation turns, increment on
|
||||
// missing summary, neutrality on <skip_summary/>, reset on successful summary.
|
||||
// Replacement coverage: `tests/sdk/parse-summary.test.ts` asserts that the
|
||||
// parser returns `{ valid: false, reason }` for malformed summaries; the
|
||||
// failure path goes through PendingMessageStore.markFailed's retry ladder,
|
||||
// which is unit-tested separately in tests/services/sqlite/.
|
||||
describe.skip('circuit breaker: consecutiveSummaryFailures counter (#1633 — deleted)', () => {
|
||||
const SUMMARY_PROMPT = `--- ${SUMMARY_MODE_MARKER} ---\nDo the summary now.`;
|
||||
|
||||
it('does NOT increment the counter on normal observation responses (P1 regression guard)', async () => {
|
||||
|
||||
Reference in New Issue
Block a user