mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
1319 lines
48 KiB
TypeScript
1319 lines
48 KiB
TypeScript
import { query } from '@anthropic-ai/claude-code';
|
|
import fs, { createWriteStream, WriteStream } from 'fs';
|
|
import path, { join } from 'path';
|
|
import os from 'os';
|
|
import { PathResolver } from '../../shared/paths.js';
|
|
import { PathDiscovery } from '../../services/path-discovery.js';
|
|
import { PromptOrchestrator, createAnalysisContext } from '../orchestration/PromptOrchestrator.js';
|
|
import { DEBUG_MESSAGES } from '../../constants.js';
|
|
import { log } from '../../shared/logger.js';
|
|
import { CompressionError } from '../../shared/types.js';
|
|
import { getClaudePath } from '../../shared/settings.js';
|
|
import { ChunkManager, ChunkingOptions, ChunkMetadata } from './ChunkManager.js';
|
|
import { getStorageProvider, needsMigration } from '../../shared/storage.js';
|
|
import { SessionInput, MemoryInput, OverviewInput, DiagnosticInput } from '../../services/sqlite/types.js';
|
|
|
|
/**
|
|
* Interface for message objects in transcript
|
|
*/
|
|
interface TranscriptMessage {
|
|
type: string;
|
|
message?: {
|
|
content?: string | Array<{
|
|
text?: string;
|
|
content?: string;
|
|
}>;
|
|
role?: string;
|
|
timestamp?: string;
|
|
created_at?: string;
|
|
};
|
|
content?: string | Array<{
|
|
text?: string;
|
|
content?: string;
|
|
}>;
|
|
role?: string;
|
|
uuid?: string;
|
|
session_id?: string;
|
|
parent_tool_use_id?: string;
|
|
timestamp?: string;
|
|
created_at?: string;
|
|
subtype?: string;
|
|
result?: string;
|
|
model?: string;
|
|
tools?: unknown[];
|
|
mcp_servers?: unknown[];
|
|
toolUseResult?: {
|
|
stdout?: string;
|
|
stderr?: string;
|
|
interrupted?: boolean;
|
|
isImage?: boolean;
|
|
};
|
|
}
|
|
|
|
|
|
/**
|
|
* Compression options for the TranscriptCompressor
|
|
*/
|
|
export interface CompressionOptions {
|
|
output?: string;
|
|
dryRun?: boolean;
|
|
verbose?: boolean;
|
|
}
|
|
|
|
/**
|
|
* TranscriptCompressor handles the analysis and compression of Claude Code conversation transcripts
|
|
* into a searchable memory database format using the Model Context Protocol.
|
|
*/
|
|
export class TranscriptCompressor {
|
|
private paths: PathResolver;
|
|
private logStream: WriteStream | null = null;
|
|
private logFile: string | null = null;
|
|
private promptOrchestrator: PromptOrchestrator;
|
|
private chunkManager: ChunkManager;
|
|
|
|
// <Block> 1.1 ====================================
|
|
// Constructor Initialization - Natural flow (8/10)
|
|
constructor(options: CompressionOptions = {}) {
|
|
this.paths = new PathResolver();
|
|
this.promptOrchestrator = new PromptOrchestrator();
|
|
this.chunkManager = new ChunkManager();
|
|
this.ensureClaudeMemStructure();
|
|
this.initializeLogging();
|
|
|
|
log.debug('🤖 TranscriptCompressor initialized');
|
|
}
|
|
// </Block> =======================================
|
|
|
|
// <Block> 1.2 ====================================
|
|
// Directory Structure Validation - Natural flow (8/10)
|
|
/**
|
|
* Ensures that the required directory structure exists
|
|
*/
|
|
private ensureClaudeMemStructure(): void {
|
|
const configDir = this.paths.getConfigDir();
|
|
const indexDir = this.paths.getIndexDir();
|
|
const archiveDir = this.paths.getArchiveDir();
|
|
const logsDir = this.paths.getLogsDir();
|
|
|
|
PathResolver.ensureDirectories([configDir, indexDir, archiveDir, logsDir]);
|
|
}
|
|
|
|
private initializeLogging(): void {
|
|
const logsDir = this.paths.getLogsDir();
|
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
|
this.logFile = join(logsDir, `claude-mem-${timestamp}.log`);
|
|
this.logStream = createWriteStream(this.logFile, { flags: 'a' });
|
|
|
|
this.debugLog('🚀 DEBUG LOG STARTED');
|
|
this.debugLog(`📁 Log file: ${this.logFile}`);
|
|
this.debugLog('═'.repeat(60));
|
|
}
|
|
|
|
private debugLog(message: string): void {
|
|
if (!this.logStream) return;
|
|
|
|
const timestamp = new Date().toISOString();
|
|
const logLine = `[${timestamp}] ${message}\n`;
|
|
this.logStream.write(logLine);
|
|
}
|
|
|
|
private closeLogging(): void {
|
|
if (this.logStream) {
|
|
this.debugLog('✅ DEBUG LOG ENDED');
|
|
this.logStream.end();
|
|
}
|
|
}
|
|
// </Block> =======================================
|
|
|
|
// <Block> 1.3 ====================================
|
|
// </Block> =======================================
|
|
|
|
// <Block> 1.4 ====================================
|
|
// Main Compression Flow - DEBUG GUARDS INTERRUPT FLOW (5/10)
|
|
/**
|
|
* Main compression method that processes a transcript and creates compressed memories
|
|
* Now supports automatic chunking for large transcripts
|
|
* @param transcriptPath - Path to the transcript file
|
|
* @param sessionId - Optional session ID
|
|
* @param originalProjectName - Optional original project name (for imported transcripts)
|
|
*/
|
|
async compress(transcriptPath: string, sessionId?: string, originalProjectName?: string): Promise<string> {
|
|
this.debugLog(`🚀 Starting compression for: ${transcriptPath}`);
|
|
this.debugLog(`📋 Session ID: ${sessionId || 'auto-generated'}`);
|
|
|
|
try {
|
|
// Use original project name if provided (for imports), otherwise use current project
|
|
const projectPrefix = originalProjectName || PathResolver.getCurrentProjectPrefix();
|
|
log.debug(DEBUG_MESSAGES.PROJECT_NAME(projectPrefix));
|
|
this.debugLog(`📝 PROJECT PREFIX: ${projectPrefix}`);
|
|
|
|
// Read and parse transcript
|
|
const content = fs.readFileSync(transcriptPath, 'utf-8');
|
|
this.debugLog(`📖 Reading transcript: ${content.length} bytes`);
|
|
const lines = content.trim().split('\n').filter(line => line.trim());
|
|
const messages: TranscriptMessage[] = [];
|
|
let parseErrors = 0;
|
|
|
|
for (let i = 0; i < lines.length; i++) {
|
|
try {
|
|
const parsed = JSON.parse(lines[i]);
|
|
messages.push(parsed);
|
|
} catch (e) {
|
|
parseErrors++;
|
|
log.debug(`Parse error on line ${i + 1}: ${(e as Error).message}`);
|
|
}
|
|
}
|
|
|
|
log.debug(DEBUG_MESSAGES.TRANSCRIPT_STATS(content.length, messages.length));
|
|
if (parseErrors > 0) {
|
|
log.debug(`Parse errors: ${parseErrors}`);
|
|
}
|
|
this.debugLog(`📊 Transcript loaded: ${lines.length} lines, ${messages.length} messages, ${parseErrors} parse errors`);
|
|
|
|
// Generate final session ID
|
|
const finalSessionId = sessionId || path.basename(transcriptPath, '.jsonl');
|
|
|
|
// Get timestamp from last message or use current time
|
|
// Reverse search for the last message with a valid timestamp
|
|
let timestamp = new Date().toISOString();
|
|
|
|
for (let i = messages.length - 1; i >= 0; i--) {
|
|
const msg = messages[i];
|
|
if (msg.timestamp) {
|
|
try {
|
|
// Handle both ISO strings and Unix timestamps
|
|
const ts = msg.timestamp;
|
|
let parsedDate: Date;
|
|
|
|
// Check if it's already an ISO string or a Unix timestamp
|
|
if (typeof ts === 'string' && ts.includes('T')) {
|
|
// It's likely an ISO string
|
|
parsedDate = new Date(ts);
|
|
} else {
|
|
// It's likely a Unix timestamp (number or numeric string)
|
|
const numTs = Number(ts);
|
|
// Check if timestamp is in seconds (Unix) or milliseconds
|
|
// Unix timestamps are typically 10 digits, JS timestamps are 13
|
|
const dateValue = numTs < 10000000000 ? numTs * 1000 : numTs;
|
|
parsedDate = new Date(dateValue);
|
|
}
|
|
|
|
if (!isNaN(parsedDate.getTime())) {
|
|
timestamp = parsedDate.toISOString();
|
|
this.debugLog(`📅 Using timestamp from last message: ${timestamp}`);
|
|
break;
|
|
}
|
|
} catch (e) {
|
|
// Continue searching for a valid timestamp
|
|
this.debugLog(`⚠️ Invalid timestamp in message: ${msg.timestamp}, trying earlier message`);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Archive filename for reference
|
|
const archiveFilename = `${finalSessionId}.jsonl.archive`;
|
|
|
|
// Format conversation for analysis
|
|
const conversationText = this.formatConversationForPrompt(messages);
|
|
|
|
// Check if we need to use chunked processing
|
|
const needsChunking = this.chunkManager.needsChunking(conversationText);
|
|
|
|
let summaries: any[] = [];
|
|
let overview: string | null = null;
|
|
|
|
if (needsChunking) {
|
|
// Use chunked processing for large transcripts
|
|
const chunkResult = await this.compressInChunks(messages, finalSessionId, projectPrefix);
|
|
summaries = chunkResult.summaries;
|
|
overview = chunkResult.overview;
|
|
} else {
|
|
// Use normal single-pass processing for smaller transcripts
|
|
// Create analysis prompt using PromptOrchestrator
|
|
const analysisContext = createAnalysisContext(
|
|
conversationText,
|
|
finalSessionId,
|
|
{
|
|
projectName: projectPrefix,
|
|
trigger: 'manual'
|
|
}
|
|
);
|
|
|
|
const analysisPrompt = this.promptOrchestrator.createAnalysisPrompt(analysisContext);
|
|
|
|
log.debug('📤 Analysis prompt created');
|
|
log.debug(`📊 Prompt length: ${analysisPrompt.prompt.length} characters`);
|
|
|
|
// LOG THE FULL PROMPT TO DEBUG FILE
|
|
const promptDebugPath = path.join(this.paths.getLogsDir(), `claude-prompt-${Date.now()}.txt`);
|
|
fs.writeFileSync(promptDebugPath, `=== CLAUDE ANALYSIS PROMPT ===\n${analysisPrompt.prompt}\n`);
|
|
this.debugLog(`📝 Full prompt saved to: ${promptDebugPath}`);
|
|
|
|
// Find MCP config and get Claude path from settings
|
|
const claudePath = getClaudePath();
|
|
const mcpConfigPath = this.findMCPConfig();
|
|
|
|
log.debug(DEBUG_MESSAGES.CLAUDE_PATH_FOUND(claudePath));
|
|
if (mcpConfigPath) {
|
|
log.debug(DEBUG_MESSAGES.MCP_CONFIG_USED(mcpConfigPath));
|
|
}
|
|
|
|
// Call Claude SDK for analysis
|
|
this.debugLog('🤖 Calling Claude SDK with MCP tools...');
|
|
const response = await query({
|
|
prompt: analysisPrompt.prompt,
|
|
options: {
|
|
allowedTools: [
|
|
'mcp__claude-mem__chroma_list_collections',
|
|
'mcp__claude-mem__chroma_create_collection',
|
|
'mcp__claude-mem__chroma_peek_collection',
|
|
'mcp__claude-mem__chroma_get_collection_info',
|
|
'mcp__claude-mem__chroma_get_collection_count',
|
|
'mcp__claude-mem__chroma_modify_collection',
|
|
'mcp__claude-mem__chroma_fork_collection',
|
|
'mcp__claude-mem__chroma_delete_collection',
|
|
'mcp__claude-mem__chroma_add_documents',
|
|
'mcp__claude-mem__chroma_query_documents',
|
|
'mcp__claude-mem__chroma_get_documents',
|
|
'mcp__claude-mem__chroma_update_documents',
|
|
'mcp__claude-mem__chroma_delete_documents',
|
|
],
|
|
pathToClaudeCodeExecutable: getClaudePath(),
|
|
model: 'sonnet'
|
|
},
|
|
});
|
|
this.debugLog('✅ Claude SDK response received');
|
|
|
|
// Process response and extract summaries from JSON
|
|
this.debugLog('🔄 Processing Claude JSON response...');
|
|
const extractResult = await this.processClaudeResponse(response);
|
|
this.debugLog(`📋 Extracted ${extractResult.summaries.length} summaries from JSON`);
|
|
if (extractResult.overview) {
|
|
this.debugLog(`📝 Overview: ${extractResult.overview}`);
|
|
}
|
|
|
|
summaries = extractResult.summaries;
|
|
overview = extractResult.overview;
|
|
}
|
|
|
|
log.debug(DEBUG_MESSAGES.COMPRESSION_COMPLETE(summaries.length));
|
|
|
|
// Continue processing even with zero summaries - let the natural flow handle empty results
|
|
|
|
// Create archive and update index
|
|
const archivePath = this.createArchive(transcriptPath, projectPrefix, finalSessionId, content);
|
|
this.debugLog(`📦 Archive created: ${archivePath}`);
|
|
|
|
// Write to index - same method for both chunked and non-chunked
|
|
await this.appendToIndex(summaries, overview, projectPrefix, finalSessionId, messages, archivePath, timestamp);
|
|
this.debugLog(`📥 Written ${summaries.length} summaries to index`);
|
|
|
|
log.debug(`✅ SUCCESS`);
|
|
log.debug(`Archive created: ${archivePath}`);
|
|
log.debug(`Summaries created: ${summaries.length}`);
|
|
|
|
this.debugLog('✅ Compression completed successfully');
|
|
this.closeLogging();
|
|
|
|
return archivePath;
|
|
} catch (error) {
|
|
log.error('COMPRESSION FAILED', error, {
|
|
transcriptPath,
|
|
sessionId
|
|
});
|
|
this.debugLog(`❌ ERROR: ${error instanceof Error ? error.message : String(error)}`);
|
|
this.closeLogging();
|
|
throw error;
|
|
}
|
|
}
|
|
// </Block> =======================================
|
|
|
|
|
|
/**
|
|
* Finds MCP configuration file
|
|
*/
|
|
private findMCPConfig(): string {
|
|
const pathDiscovery = PathDiscovery.getInstance();
|
|
const possibleConfigs = [
|
|
pathDiscovery.getProjectMcpConfigPath(),
|
|
pathDiscovery.getMcpConfigPath(),
|
|
join(pathDiscovery.getClaudeConfigDirectory(), '.mcp.json'),
|
|
];
|
|
|
|
const mcpConfigPath = possibleConfigs.find(fs.existsSync);
|
|
return mcpConfigPath || pathDiscovery.getMcpConfigPath();
|
|
}
|
|
|
|
// <Block> 1.5 ====================================
|
|
// Claude Response Processing - JSON extraction with pipe-separated output (9/10)
|
|
/**
|
|
* Processes Claude response to extract summaries from JSON
|
|
*/
|
|
private async processClaudeResponse(response: any): Promise<{ overview: string | null; summaries: string[] }> {
|
|
let fullContent = '';
|
|
|
|
// Extract content using polymorphic handlers
|
|
fullContent = await this.extractResponseContent(response, []);
|
|
|
|
// DEBUG: Log the full content to see what Claude is returning
|
|
this.debugLog(`🔍 Claude response content length: ${fullContent.length}`);
|
|
|
|
// Write raw response to debug file for troubleshooting
|
|
const debugPath = path.join(this.paths.getLogsDir(), `claude-response-${Date.now()}.txt`);
|
|
fs.writeFileSync(debugPath, `=== CLAUDE RAW RESPONSE ===\n${fullContent}\n`);
|
|
this.debugLog(`📝 Raw response saved to: ${debugPath}`);
|
|
|
|
// Extract JSON from response tags
|
|
const extractResult = this.extractJSONResponse(fullContent);
|
|
|
|
this.debugLog(`📊 Extracted ${extractResult.summaries.length} summaries from JSON`);
|
|
if (extractResult.summaries.length === 0) {
|
|
this.debugLog(`⚠️ No summaries found in JSON response`);
|
|
}
|
|
|
|
return extractResult;
|
|
}
|
|
|
|
/**
|
|
* Extracts content from response
|
|
*/
|
|
private async extractResponseContent(response: any, summaries: any[]): Promise<string> {
|
|
// Handle streaming response
|
|
if (response && typeof response === 'object' && Symbol.asyncIterator in response) {
|
|
let content = '';
|
|
let inJSONResponse = false;
|
|
|
|
for await (const message of response) {
|
|
const chunk = this.extractMessageContent(message);
|
|
content += chunk;
|
|
|
|
// Check if we're entering or exiting JSON response tags
|
|
if (chunk.includes('<JSONResponse>')) {
|
|
inJSONResponse = true;
|
|
}
|
|
if (chunk.includes('</JSONResponse>')) {
|
|
inJSONResponse = false;
|
|
continue; // Skip printing the closing tag
|
|
}
|
|
|
|
// Only show Claude's thinking, not the JSON response
|
|
if (chunk && !inJSONResponse) {
|
|
process.stdout.write(chunk);
|
|
}
|
|
|
|
if (message?.type === 'result' && message?.result) {
|
|
content = message.result;
|
|
}
|
|
}
|
|
return content;
|
|
}
|
|
|
|
// Handle string response
|
|
if (typeof response === 'string') {
|
|
return response;
|
|
}
|
|
|
|
// Handle array response
|
|
if (Array.isArray(response)) {
|
|
return response.map(item => {
|
|
if (typeof item === 'string') return item;
|
|
if (item?.text) return item.text;
|
|
if (item?.content) return item.content;
|
|
return '';
|
|
}).filter(Boolean).join('\n');
|
|
}
|
|
|
|
// Handle object response
|
|
if (typeof response === 'object' && response !== null) {
|
|
if (response?.text) return response.text;
|
|
if (response?.content) return response.content;
|
|
if (response?.message) return response.message;
|
|
return '';
|
|
}
|
|
|
|
return '';
|
|
}
|
|
|
|
|
|
/**
|
|
* Extracts content from a single message
|
|
*/
|
|
private extractMessageContent(message: any): string {
|
|
let content = '';
|
|
if (message?.content) content += message.content;
|
|
if (message?.text) content += message.text;
|
|
if (message?.data) content += message.data;
|
|
|
|
if (message?.message?.content && Array.isArray(message.message.content)) {
|
|
message.message.content.forEach((item: any) => {
|
|
if (item.type === 'text' && item.text) {
|
|
content += item.text;
|
|
}
|
|
});
|
|
}
|
|
|
|
return content;
|
|
}
|
|
|
|
/**
|
|
* Extracts JSON response and returns raw JSON objects
|
|
*/
|
|
private extractJSONResponse(content: string): { overview: string | null; summaries: any[] } {
|
|
try {
|
|
// Extract JSON from response tags
|
|
const jsonMatch = content.match(/<JSONResponse>([\s\S]*?)<\/JSONResponse>/);
|
|
|
|
if (!jsonMatch) {
|
|
this.debugLog(`⚠️ No <JSONResponse> tags found in response`);
|
|
return { overview: null, summaries: [] };
|
|
}
|
|
|
|
const jsonContent = jsonMatch[1].trim();
|
|
this.debugLog(`✅ Found JSON response: ${jsonContent.length} chars`);
|
|
|
|
// Parse the JSON
|
|
const parsed = JSON.parse(jsonContent);
|
|
|
|
if (!parsed.summaries || !Array.isArray(parsed.summaries)) {
|
|
this.debugLog(`⚠️ Invalid JSON structure: missing summaries array`);
|
|
return { overview: null, summaries: [] };
|
|
}
|
|
|
|
// Return raw JSON objects instead of converting to pipe-separated format
|
|
const validSummaries: any[] = [];
|
|
|
|
parsed.summaries.forEach((summary: any, index: number) => {
|
|
if (!summary.text || !summary.document_id) {
|
|
this.debugLog(`⚠️ Skipping invalid summary at index ${index}`);
|
|
return;
|
|
}
|
|
|
|
// Ensure required fields are present
|
|
const validSummary = {
|
|
text: summary.text,
|
|
document_id: summary.document_id,
|
|
keywords: summary.keywords || '',
|
|
timestamp: summary.timestamp || new Date().toISOString(),
|
|
archive: summary.archive || `${summary.document_id}.jsonl.archive`
|
|
};
|
|
|
|
validSummaries.push(validSummary);
|
|
this.debugLog(`✅ Valid summary ${index + 1}: ${summary.document_id}`);
|
|
});
|
|
|
|
// Store overview if present
|
|
if (parsed.overview) {
|
|
this.debugLog(`📝 Session overview: ${parsed.overview}`);
|
|
}
|
|
|
|
return { overview: parsed.overview || null, summaries: validSummaries };
|
|
|
|
} catch (error) {
|
|
this.debugLog(`❌ Failed to parse JSON response: ${error}`);
|
|
|
|
// Fallback: try to extract any pipe-separated lines that might exist
|
|
this.debugLog(`🔄 Attempting fallback to pipe-separated format...`);
|
|
const legacyLines = this.extractLegacyPipeSeparatedLines(content);
|
|
// Convert legacy lines to JSON format for consistency
|
|
const legacySummaries = legacyLines.map((line, index) => {
|
|
const parts = line.split(' | ');
|
|
return {
|
|
text: parts[0] || '',
|
|
document_id: parts[1] || `legacy_${Date.now()}_${index}`,
|
|
keywords: parts[2] || '',
|
|
timestamp: parts[3] || new Date().toISOString(),
|
|
archive: parts[4] || `legacy_${Date.now()}_${index}.jsonl.archive`
|
|
};
|
|
});
|
|
return { overview: null, summaries: legacySummaries };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Legacy fallback for pipe-separated format
|
|
*/
|
|
private extractLegacyPipeSeparatedLines(content: string): string[] {
|
|
const lines = content.split('\n');
|
|
const pipeLines: string[] = [];
|
|
|
|
lines.forEach((line) => {
|
|
const trimmed = line.trim();
|
|
if (trimmed && trimmed.includes(' | ') && trimmed.split(' | ').length >= 3) {
|
|
pipeLines.push(trimmed);
|
|
}
|
|
});
|
|
|
|
this.debugLog(`📊 Fallback extracted ${pipeLines.length} pipe-separated lines`);
|
|
return pipeLines;
|
|
}
|
|
// </Block> =======================================
|
|
|
|
// <Block> 1.7 ====================================
|
|
// Conversation Formatting - LONG BUT MOSTLY NATURAL (6/10)
|
|
/**
|
|
* Processes a transcript in chunks when it's too large for single processing
|
|
*/
|
|
private async compressInChunks(
|
|
messages: TranscriptMessage[],
|
|
sessionId: string,
|
|
projectPrefix: string
|
|
): Promise<{ summaries: any[]; overview: string | null }> {
|
|
this.debugLog('📦 Large transcript detected, processing in chunks...');
|
|
|
|
// Create filtered output for chunking
|
|
const outputLines: string[] = [];
|
|
messages.forEach((m, index) => {
|
|
const filteredContent = this.extractContent(m);
|
|
const singleLine = filteredContent.replace(/\r\n/g, '\\n').replace(/\n/g, '\\n').replace(/\r/g, '\\n');
|
|
outputLines.push(`- ${singleLine}`);
|
|
});
|
|
|
|
const fullOutput = outputLines.join('\n');
|
|
const chunks = this.chunkManager.chunkTranscript(fullOutput);
|
|
|
|
this.debugLog(this.chunkManager.getChunkingStats(chunks));
|
|
console.log(`\n📊 Processing ${chunks.length} chunks...`);
|
|
|
|
const allSummaries: any[] = [];
|
|
const chunkOverviews: string[] = [];
|
|
|
|
// Process each chunk and collect overviews
|
|
for (let i = 0; i < chunks.length; i++) {
|
|
const chunk = chunks[i];
|
|
console.log(`\n🔄 Processing chunk ${i + 1}/${chunks.length}...`);
|
|
|
|
// Create analysis prompt for this chunk
|
|
const chunkPrompt = `Analyze this chunk (${i + 1}/${chunks.length}) of a larger conversation transcript.
|
|
|
|
${chunk.metadata.hasOverlap ? `Note: This chunk includes ${chunk.metadata.overlapMessages || 2} messages from the previous chunk for context continuity.` : ''}
|
|
|
|
Chunk contains messages ${chunk.metadata.startIndex + 1} to ${chunk.metadata.endIndex + 1}.
|
|
|
|
${chunk.content}`;
|
|
|
|
const analysisPrompt = this.promptOrchestrator.createAnalysisPrompt({
|
|
transcriptContent: chunkPrompt,
|
|
sessionId: sessionId,
|
|
projectName: projectPrefix,
|
|
trigger: 'manual',
|
|
originalTokens: chunk.metadata.estimatedTokens
|
|
});
|
|
|
|
// Find MCP config and get Claude path
|
|
const claudePath = getClaudePath();
|
|
const mcpConfigPath = this.findMCPConfig();
|
|
|
|
if (mcpConfigPath) {
|
|
log.debug(DEBUG_MESSAGES.MCP_CONFIG_USED(mcpConfigPath));
|
|
}
|
|
|
|
// Call Claude SDK for this chunk
|
|
const response = await query({
|
|
prompt: analysisPrompt.prompt,
|
|
options: {
|
|
allowedTools: [
|
|
'mcp__claude-mem__chroma_list_collections',
|
|
'mcp__claude-mem__chroma_create_collection',
|
|
'mcp__claude-mem__chroma_peek_collection',
|
|
'mcp__claude-mem__chroma_get_collection_info',
|
|
'mcp__claude-mem__chroma_get_collection_count',
|
|
'mcp__claude-mem__chroma_modify_collection',
|
|
'mcp__claude-mem__chroma_fork_collection',
|
|
'mcp__claude-mem__chroma_delete_collection',
|
|
'mcp__claude-mem__chroma_add_documents',
|
|
'mcp__claude-mem__chroma_query_documents',
|
|
'mcp__claude-mem__chroma_get_documents',
|
|
'mcp__claude-mem__chroma_update_documents',
|
|
'mcp__claude-mem__chroma_delete_documents',
|
|
],
|
|
pathToClaudeCodeExecutable: getClaudePath(),
|
|
model: 'sonnet'
|
|
},
|
|
});
|
|
|
|
// Extract summaries from this chunk's response (ignoring overview from chunks)
|
|
const responseContent = await this.extractResponseContent(response, []);
|
|
const extractResult = this.extractJSONResponse(responseContent);
|
|
|
|
if (extractResult.summaries.length > 0) {
|
|
console.log(` ✅ Extracted ${extractResult.summaries.length} memories from chunk ${i + 1}`);
|
|
allSummaries.push(...extractResult.summaries);
|
|
} else {
|
|
console.log(` ⚠️ No memories extracted from chunk ${i + 1}`);
|
|
}
|
|
}
|
|
|
|
// After all chunks are processed, generate a single overview from the saved memories
|
|
console.log(`\n📝 Generating overview from ${allSummaries.length} extracted memories...`);
|
|
const overview = await this.generateOverviewFromMemories(projectPrefix, sessionId, allSummaries);
|
|
|
|
return { summaries: allSummaries, overview };
|
|
}
|
|
|
|
/**
|
|
* Generates a single overview from the memories that were saved to Chroma
|
|
*/
|
|
private async generateOverviewFromMemories(
|
|
projectPrefix: string,
|
|
sessionId: string,
|
|
summaries: any[]
|
|
): Promise<string | null> {
|
|
try {
|
|
// Extract memory texts from the summaries for the overview prompt
|
|
const memoryTexts = summaries.map(s => s.text || s).filter(Boolean);
|
|
|
|
if (memoryTexts.length === 0) {
|
|
console.log(' ⚠️ No memories available to generate overview');
|
|
return null;
|
|
}
|
|
|
|
// Create a focused prompt for overview generation
|
|
const overviewPrompt = `You have just analyzed a long conversation and extracted ${memoryTexts.length} key memories.
|
|
Based on these memories, create a comprehensive overview of the entire session.
|
|
|
|
MEMORIES EXTRACTED:
|
|
${memoryTexts.map((text, i) => `${i + 1}. ${text}`).join('\n')}
|
|
|
|
PROJECT: ${projectPrefix}
|
|
SESSION: ${sessionId}
|
|
|
|
Create a 2-3 sentence overview that:
|
|
1. Summarizes the main themes and accomplishments across ALL the memories
|
|
2. Highlights the most significant technical work or decisions
|
|
3. Written for any developer to understand (define jargon organically)
|
|
|
|
Return ONLY the overview text, nothing else.`;
|
|
|
|
// Call Claude for overview generation
|
|
const response = await query({
|
|
prompt: overviewPrompt,
|
|
options: {
|
|
allowedTools: [], // No tools needed for overview generation
|
|
pathToClaudeCodeExecutable: getClaudePath(),
|
|
model: 'sonnet'
|
|
},
|
|
});
|
|
|
|
// Extract the overview from response
|
|
let overview = '';
|
|
if (response && typeof response === 'object' && Symbol.asyncIterator in response) {
|
|
for await (const message of response) {
|
|
const chunk = this.extractMessageContent(message);
|
|
overview += chunk;
|
|
|
|
if (message?.type === 'result' && message?.result) {
|
|
overview = message.result;
|
|
}
|
|
}
|
|
} else if (typeof response === 'string') {
|
|
overview = response;
|
|
} else if (response?.text) {
|
|
overview = response.text;
|
|
} else if (response?.content) {
|
|
overview = response.content;
|
|
}
|
|
|
|
const cleanedOverview = overview.trim();
|
|
if (cleanedOverview) {
|
|
console.log(` ✅ Overview generated successfully`);
|
|
return cleanedOverview;
|
|
} else {
|
|
console.log(` ⚠️ No overview generated`);
|
|
return null;
|
|
}
|
|
} catch (error) {
|
|
console.error(` ❌ Failed to generate overview: ${error}`);
|
|
this.debugLog(`❌ Overview generation error: ${error}`);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Formats conversation messages for analysis prompt
|
|
*/
|
|
private formatConversationForPrompt(messages: TranscriptMessage[]): string {
|
|
const pipeLines: string[] = [];
|
|
|
|
messages.forEach((m, index) => {
|
|
const role = m.type === 'assistant' ? 'assistant'
|
|
: m.type === 'user' ? 'user'
|
|
: (m.type === 'result' || m.type === 'system' || m.type === 'summary') ? 'system'
|
|
: m.message?.role || m.role;
|
|
|
|
const content = this.extractContent(m);
|
|
const sessionId = m.session_id || '';
|
|
const timestamp = this.normalizeTimestamp(m);
|
|
const messageUuid = m.uuid || '';
|
|
|
|
// Escape pipe characters in content to prevent format corruption
|
|
const escapedContent = content.replace(/\|/g, '\\|');
|
|
|
|
// Format: content | session_id | role | timestamp | message_uuid
|
|
const pipeLine = `${escapedContent} | ${sessionId} | ${role} | ${timestamp} | ${messageUuid}`;
|
|
pipeLines.push(pipeLine);
|
|
});
|
|
|
|
log.debug(`Field filtering complete: ${pipeLines.length} messages processed`);
|
|
|
|
return `<!-- TRANSCRIPT -->\n${pipeLines.join('\n')}\n<!-- /TRANSCRIPT -->`;
|
|
}
|
|
// </Block> =======================================
|
|
|
|
// <Block> 1.6 ====================================
|
|
// Message Content Extraction - Simplified (8/10)
|
|
/**
|
|
* Extracts content from message object
|
|
*/
|
|
private extractContent(m: TranscriptMessage): string {
|
|
let content = '';
|
|
|
|
// Handle tool_result messages first - check for large content
|
|
if (m.type === 'tool_result') {
|
|
return this.extractToolResultContent(m);
|
|
}
|
|
|
|
// Extract by type
|
|
if (m.type === 'assistant' || m.type === 'user') {
|
|
const messageContent = m.message?.content;
|
|
if (Array.isArray(messageContent)) {
|
|
// Properly handle content arrays without aggressive filtering
|
|
content = messageContent
|
|
.map((item) => this.extractContentItem(item))
|
|
.filter(Boolean)
|
|
.join(' ');
|
|
} else if (messageContent) {
|
|
content = String(messageContent).trim();
|
|
}
|
|
} else if (m.type === 'summary') {
|
|
// Handle summary messages that have a different structure
|
|
content = (m as any).summary || '';
|
|
} else if (m.type === 'result') {
|
|
if (m.subtype === 'success' && m.result) {
|
|
content = `[Result: ${m.result}]`;
|
|
} else if (m.subtype === 'error_max_turns') {
|
|
content = '[Error: Maximum turns reached]';
|
|
} else if (m.subtype === 'error_during_execution') {
|
|
content = '[Error during execution]';
|
|
}
|
|
} else if (m.type === 'system') {
|
|
if (m.subtype === 'init') {
|
|
content = `[System initialized: ${m.model}, tools: ${m.tools?.length || 0}, MCP servers: ${m.mcp_servers?.length || 0}]`;
|
|
} else {
|
|
// Handle other system messages
|
|
content = String(m.content || '').trim();
|
|
}
|
|
}
|
|
|
|
// Fallback to generic content extraction
|
|
if (!content) {
|
|
content = String(m.message?.content || m.content || '');
|
|
if (Array.isArray(content)) {
|
|
content = content
|
|
.map((item) => item.text || item.content || '')
|
|
.filter(Boolean)
|
|
.join(' ');
|
|
}
|
|
}
|
|
|
|
// Append tool use result if present
|
|
if (m.toolUseResult) {
|
|
const toolSummary = this.summarizeToolResult(m.toolUseResult, content);
|
|
if (toolSummary) {
|
|
content = content ? `${content}\n\n${toolSummary}` : toolSummary;
|
|
}
|
|
}
|
|
|
|
return String(content).trim();
|
|
}
|
|
|
|
/**
|
|
* Extracts content from individual content items (text, tool_use, etc.)
|
|
*/
|
|
private extractContentItem(item: any): string {
|
|
if (!item || typeof item !== 'object') {
|
|
return String(item || '').trim();
|
|
}
|
|
|
|
// Handle different content item types
|
|
if (item.type === 'text') {
|
|
return item.text || '';
|
|
} else if (item.type === 'thinking') {
|
|
// Extract thinking content
|
|
return item.thinking || '';
|
|
} else if (item.type === 'tool_use') {
|
|
// Summarize tool use without the full input details
|
|
const toolName = item.name || 'unknown';
|
|
const toolId = item.id || '';
|
|
return `[Tool: ${toolName}${toolId ? ` (${toolId})` : ''}]`;
|
|
} else if (item.type === 'tool_result') {
|
|
// Check size before extracting
|
|
const contentSize = this.getToolResultSize(item);
|
|
if (contentSize > 1024 * 1024) { // 1MB threshold
|
|
const sizeMB = Math.round(contentSize / (1024 * 1024) * 10) / 10;
|
|
return `[FILTERED: Large tool result ~${sizeMB}MB - tool output to assistant]`;
|
|
}
|
|
return this.extractToolResultFromItem(item);
|
|
} else {
|
|
// Fallback for other content types - be more thorough
|
|
return item.text || item.content || item.thinking || JSON.stringify(item);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Calculate the size of tool_result content
|
|
*/
|
|
private getToolResultSize(item: any): number {
|
|
if (!item.content) return 0;
|
|
|
|
if (Array.isArray(item.content)) {
|
|
return item.content.reduce((size: number, contentItem: any) => {
|
|
return size + (contentItem.text_length || contentItem.text?.length || contentItem.content?.length || 0);
|
|
}, 0);
|
|
}
|
|
|
|
if (typeof item.content === 'string') {
|
|
return item.content.length;
|
|
}
|
|
|
|
return JSON.stringify(item.content).length;
|
|
}
|
|
|
|
/**
|
|
* Extracts content from tool_result messages with large content filtering
|
|
*/
|
|
private extractToolResultContent(m: TranscriptMessage): string {
|
|
const LARGE_CONTENT_THRESHOLD = 1024 * 1024; // 1MB threshold
|
|
|
|
// Check if this is a large tool_result that should be filtered
|
|
if (m.content && Array.isArray(m.content)) {
|
|
const totalSize = m.content.reduce((size: number, contentItem: any) => {
|
|
return size + (contentItem.text_length || contentItem.text?.length || 0);
|
|
}, 0);
|
|
|
|
if (totalSize > LARGE_CONTENT_THRESHOLD) {
|
|
const sizeMB = Math.round(totalSize / (1024 * 1024) * 10) / 10;
|
|
return `[FILTERED: Large tool result ~${sizeMB}MB - tool output to assistant]`;
|
|
}
|
|
|
|
// Normal size array - extract all content items
|
|
return m.content.map((item: any) => this.extractToolResultFromItem(item)).filter(Boolean).join(' ');
|
|
}
|
|
|
|
// Check if direct content property is too large
|
|
if (m.content && typeof m.content === 'string' && m.content.length > LARGE_CONTENT_THRESHOLD) {
|
|
const sizeMB = Math.round(m.content.length / (1024 * 1024) * 10) / 10;
|
|
return `[FILTERED: Large tool result ~${sizeMB}MB - tool output to assistant]`;
|
|
}
|
|
|
|
// Content is not too large, extract normally
|
|
return this.extractToolResultFromItem(m);
|
|
}
|
|
|
|
/**
|
|
* Extracts content from tool_result item (normal size)
|
|
*/
|
|
private extractToolResultFromItem(item: any): string {
|
|
// Handle content items within an array (for individual array elements)
|
|
if (item.type === 'text' && item.text) {
|
|
return item.text;
|
|
}
|
|
|
|
// Handle when passed the full message/item with content property
|
|
if (!item.content) {
|
|
return '[Tool result: no content]';
|
|
}
|
|
|
|
// Handle array content
|
|
if (Array.isArray(item.content)) {
|
|
return item.content
|
|
.map((contentItem: any) => {
|
|
if (contentItem.type === 'text' && contentItem.text) {
|
|
return contentItem.text;
|
|
}
|
|
return contentItem.text || contentItem.content || '';
|
|
})
|
|
.filter(Boolean)
|
|
.join(' ');
|
|
}
|
|
|
|
// Handle string content
|
|
if (typeof item.content === 'string') {
|
|
return item.content;
|
|
}
|
|
|
|
// Handle object content
|
|
if (typeof item.content === 'object') {
|
|
return item.content.text || item.content.content || '[Tool result: complex object]';
|
|
}
|
|
|
|
return String(item.content || '');
|
|
}
|
|
|
|
// Removed filterLargeContent method - content filtering now handled at message level in extractContent
|
|
|
|
// </Block> =======================================
|
|
|
|
/**
|
|
* Creates a clear message flow label that eliminates confusion about content direction
|
|
*/
|
|
private createMessageFlowLabel(m: TranscriptMessage, messageNumber: number): string {
|
|
// Check if this message contains tool results
|
|
const containsToolResult = this.messageContainsToolResult(m);
|
|
|
|
if (containsToolResult) {
|
|
// This is a tool result being passed to the assistant
|
|
return `Message ${messageNumber} (tool → assistant)`;
|
|
}
|
|
|
|
// Handle different message types with clear flow direction
|
|
switch (m.type) {
|
|
case 'user':
|
|
return `Message ${messageNumber} (user → assistant)`;
|
|
case 'assistant':
|
|
return `Message ${messageNumber} (assistant → user)`;
|
|
case 'system':
|
|
return `Message ${messageNumber} (system)`;
|
|
case 'tool_result':
|
|
return `Message ${messageNumber} (tool → assistant)`;
|
|
case 'summary':
|
|
return `Message ${messageNumber} (session summary)`;
|
|
case 'result':
|
|
if (m.subtype === 'success') {
|
|
return `Message ${messageNumber} (session result)`;
|
|
} else if (m.subtype === 'error_max_turns') {
|
|
return `Message ${messageNumber} (session error: max turns)`;
|
|
} else if (m.subtype === 'error_during_execution') {
|
|
return `Message ${messageNumber} (session error: execution)`;
|
|
}
|
|
return `Message ${messageNumber} (session result)`;
|
|
default:
|
|
return `Message ${messageNumber} (${m.type})`;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Checks if a message contains tool result content
|
|
*/
|
|
private messageContainsToolResult(m: TranscriptMessage): boolean {
|
|
// Check if this is a user message containing tool_result content items
|
|
if (m.type === 'user' && m.message?.content && Array.isArray(m.message.content)) {
|
|
return m.message.content.some((item: any) => item.type === 'tool_result');
|
|
}
|
|
|
|
// Check if this is a direct tool_result message
|
|
if (m.type === 'tool_result') {
|
|
return true;
|
|
}
|
|
|
|
// Check if content array contains tool_result items
|
|
if (m.content && Array.isArray(m.content)) {
|
|
return m.content.some((item: any) => item.type === 'tool_result');
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Debug method to show filtered output without full compression
|
|
* Now supports automatic chunking for large outputs
|
|
*/
|
|
public showFilteredOutput(transcriptPath: string, enableChunking: boolean = true): void {
|
|
const content = fs.readFileSync(transcriptPath, 'utf-8');
|
|
const lines = content.trim().split('\n').filter(line => line.trim());
|
|
const messages: TranscriptMessage[] = [];
|
|
|
|
// Parse all messages (not just first 20)
|
|
for (const line of lines) {
|
|
try {
|
|
const parsed = JSON.parse(line);
|
|
messages.push(parsed);
|
|
} catch (e) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
const outputLines: string[] = [];
|
|
|
|
// Get first and last timestamps for the whole transcript
|
|
const firstTimestamp = messages.length > 0 ? this.normalizeTimestamp(messages[0]) : '';
|
|
const lastTimestamp = messages.length > 0 ? this.normalizeTimestamp(messages[messages.length - 1]) : '';
|
|
|
|
messages.forEach((m, index) => {
|
|
const filteredContent = this.extractContent(m);
|
|
|
|
// Keep on single line but preserve line breaks as \n
|
|
const singleLine = filteredContent.replace(/\r\n/g, '\\n').replace(/\n/g, '\\n').replace(/\r/g, '\\n');
|
|
|
|
// Format: "- content"
|
|
outputLines.push(`- ${singleLine}`);
|
|
});
|
|
|
|
const fullOutput = outputLines.join('\n');
|
|
const baseOutputFile = `/tmp/filtered-transcript-${Date.now()}`;
|
|
|
|
// Check if chunking is needed
|
|
if (enableChunking && this.chunkManager.needsChunking(fullOutput)) {
|
|
console.log('\n📦 Large transcript detected, chunking output...');
|
|
|
|
const chunks = this.chunkManager.chunkTranscript(fullOutput);
|
|
console.log(this.chunkManager.getChunkingStats(chunks));
|
|
|
|
// Save each chunk to a separate file with timestamps
|
|
chunks.forEach((chunk, index) => {
|
|
const chunkFile = `${baseOutputFile}-chunk-${index + 1}.txt`;
|
|
|
|
// Calculate timestamps for this chunk
|
|
const chunkFirstIdx = chunk.metadata.startIndex;
|
|
const chunkLastIdx = Math.min(chunk.metadata.endIndex, messages.length - 1);
|
|
const chunkFirstTime = messages[chunkFirstIdx] ? this.normalizeTimestamp(messages[chunkFirstIdx]) : '';
|
|
const chunkLastTime = messages[chunkLastIdx] ? this.normalizeTimestamp(messages[chunkLastIdx]) : '';
|
|
|
|
// Add timestamps to metadata
|
|
chunk.metadata.firstTimestamp = chunkFirstTime;
|
|
chunk.metadata.lastTimestamp = chunkLastTime;
|
|
|
|
const chunkContent = this.chunkManager.createChunkHeader(chunk.metadata) + chunk.content;
|
|
fs.writeFileSync(chunkFile, chunkContent, 'utf-8');
|
|
console.log(` ✅ Chunk ${index + 1}/${chunks.length} saved to: ${chunkFile}`);
|
|
});
|
|
console.log(`Processed ${messages.length} messages from transcript into ${chunks.length} chunks`);
|
|
console.log(`📅 Time range: ${firstTimestamp} to ${lastTimestamp}`);
|
|
} else {
|
|
// Save as single file if no chunking needed
|
|
const outputFile = `${baseOutputFile}.txt`;
|
|
|
|
// Add timestamps at the beginning of the file
|
|
const outputWithTimestamps = `# ${firstTimestamp} to ${lastTimestamp}\n${fullOutput}`;
|
|
|
|
fs.writeFileSync(outputFile, outputWithTimestamps, 'utf-8');
|
|
console.log(`\nFiltered output saved to: ${outputFile}`);
|
|
console.log(`Processed ${messages.length} messages from transcript`);
|
|
console.log(`📅 Time range: ${firstTimestamp} to ${lastTimestamp}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Summarizes tool use results
|
|
*/
|
|
private summarizeToolResult(toolResult: any, existingContent: string): string {
|
|
const summaryParts: string[] = [];
|
|
|
|
if (toolResult.stdout) {
|
|
const stdout = String(toolResult.stdout);
|
|
if (stdout.length > 200) {
|
|
const lineCount = stdout.split('\n').length;
|
|
const charCount = stdout.length;
|
|
const lines = stdout.split('\n');
|
|
const preview = lines.slice(0, 3).join('\n');
|
|
const suffix = lines.length > 6 ? `\n...\n${lines.slice(-2).join('\n')}` : '';
|
|
summaryParts.push(`Result: ${preview}${suffix} (${lineCount} lines, ${charCount} chars)`);
|
|
} else {
|
|
summaryParts.push(`Result: ${stdout}`);
|
|
}
|
|
}
|
|
|
|
if (toolResult.stderr && toolResult.stderr.trim()) {
|
|
summaryParts.push(`Error: ${toolResult.stderr}`);
|
|
}
|
|
|
|
if (toolResult.interrupted) {
|
|
summaryParts.push('(interrupted)');
|
|
}
|
|
|
|
if (toolResult.isImage) {
|
|
summaryParts.push('(image output)');
|
|
}
|
|
|
|
return summaryParts.join('\n');
|
|
}
|
|
|
|
/**
|
|
* Normalizes timestamp formats
|
|
*/
|
|
private normalizeTimestamp(m: TranscriptMessage): string {
|
|
const ts = m.timestamp || m.message?.timestamp || m.created_at || m.message?.created_at;
|
|
if (!ts) return '';
|
|
|
|
try {
|
|
const date = new Date(ts);
|
|
if (isNaN(date.getTime())) return '';
|
|
return date.toISOString().replace('T', ' ');
|
|
} catch (e) {
|
|
log.debug(`Invalid timestamp format: ${ts}`);
|
|
return '';
|
|
}
|
|
}
|
|
|
|
// <Block> 1.8 ====================================
|
|
// Archive Creation - Natural flow (9/10)
|
|
/**
|
|
* Creates an archive file of the original transcript
|
|
*/
|
|
private createArchive(transcriptPath: string, projectPrefix: string, sessionId: string, content: string): string {
|
|
const projectArchiveDir = this.paths.getProjectArchiveDir(projectPrefix);
|
|
PathResolver.ensureDirectory(projectArchiveDir);
|
|
|
|
const archivePath = join(projectArchiveDir, `${sessionId}.jsonl.archive`);
|
|
fs.writeFileSync(archivePath, content);
|
|
|
|
log.debug(`📦 Created archive: ${archivePath}`);
|
|
|
|
return archivePath;
|
|
}
|
|
// </Block> =======================================
|
|
|
|
/**
|
|
* Stores summaries using the configured storage provider (SQLite or JSONL fallback)
|
|
* Each record is stored with proper type information for easy querying
|
|
*/
|
|
private async appendToIndex(summaries: any[], overview: string | null, projectPrefix: string, sessionId: string, messages: TranscriptMessage[], archivePath: string, timestamp: string): Promise<void> {
|
|
try {
|
|
// Check if migration is needed and log warning
|
|
if (await needsMigration()) {
|
|
this.debugLog('⚠️ JSONL to SQLite migration recommended. Run: claude-mem migrate-index');
|
|
}
|
|
|
|
const storage = await getStorageProvider();
|
|
this.debugLog(`💾 Using ${storage.backend} storage backend`);
|
|
|
|
// Create or ensure session exists
|
|
const sessionInput: SessionInput = {
|
|
session_id: sessionId,
|
|
project: projectPrefix,
|
|
created_at: timestamp,
|
|
source: 'compress',
|
|
archive_path: archivePath,
|
|
archive_bytes: fs.statSync(archivePath).size,
|
|
archived_at: new Date().toISOString()
|
|
};
|
|
|
|
// Check if session already exists (for duplicate prevention)
|
|
if (!await storage.hasSession(sessionId)) {
|
|
await storage.createSession(sessionInput);
|
|
this.debugLog(`📋 Created session record: ${sessionId}`);
|
|
} else {
|
|
this.debugLog(`📋 Session already exists: ${sessionId}`);
|
|
}
|
|
|
|
// Add overview if present
|
|
if (overview) {
|
|
const overviewInput: OverviewInput = {
|
|
session_id: sessionId,
|
|
content: overview,
|
|
created_at: timestamp,
|
|
project: projectPrefix,
|
|
origin: 'claude'
|
|
};
|
|
await storage.upsertOverview(overviewInput);
|
|
this.debugLog(`📝 Stored overview for session: ${sessionId}`);
|
|
}
|
|
|
|
// If no summaries from Claude, write diagnostic info
|
|
if (!summaries || summaries.length === 0) {
|
|
log.debug('📝 No summaries extracted from JSON response');
|
|
|
|
const diagnosticInput: DiagnosticInput = {
|
|
session_id: sessionId,
|
|
message: "NO SUMMARIES EXTRACTED - Check logs for valid JSON response",
|
|
severity: 'warn',
|
|
created_at: timestamp,
|
|
project: projectPrefix,
|
|
origin: 'compressor'
|
|
};
|
|
|
|
await storage.createDiagnostic(diagnosticInput);
|
|
this.debugLog(`⚠️ No summaries for session ${sessionId} - Check if Claude returned valid JSON in <JSONResponse> tags`);
|
|
} else {
|
|
// Prepare memory records for bulk insertion
|
|
const memoryInputs: MemoryInput[] = summaries.map((summary) => ({
|
|
session_id: sessionId,
|
|
text: summary.text || '',
|
|
document_id: summary.document_id,
|
|
keywords: summary.keywords,
|
|
created_at: summary.timestamp || timestamp,
|
|
project: projectPrefix,
|
|
archive_basename: path.basename(archivePath),
|
|
origin: 'transcript'
|
|
}));
|
|
|
|
// Store memories using bulk operation if available, otherwise one by one
|
|
await storage.createMemories(memoryInputs);
|
|
|
|
log.debug(`📝 Stored ${summaries.length} summaries using ${storage.backend}`);
|
|
this.debugLog(`💾 Stored ${summaries.length} memories for session: ${sessionId}`);
|
|
}
|
|
|
|
} catch (error) {
|
|
// If storage fails, fall back to JSONL as emergency backup
|
|
this.debugLog(`❌ Storage failed, falling back to JSONL: ${error}`);
|
|
log.warn('Storage provider failed, falling back to JSONL', error);
|
|
|
|
// Emergency JSONL fallback
|
|
this.appendToIndexJSONL(summaries, overview, projectPrefix, sessionId, messages, archivePath, timestamp);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Emergency fallback method using original JSONL approach
|
|
*/
|
|
private appendToIndexJSONL(summaries: any[], overview: string | null, projectPrefix: string, sessionId: string, messages: TranscriptMessage[], archivePath: string, timestamp: string): void {
|
|
// Use PathResolver's getIndexPath() for consistency
|
|
const indexPath = this.paths.getIndexPath();
|
|
const indexDir = this.paths.getConfigDir();
|
|
PathResolver.ensureDirectory(indexDir);
|
|
|
|
// Write session header as JSON object
|
|
const sessionHeader = {
|
|
type: "session",
|
|
session_id: sessionId,
|
|
timestamp: timestamp,
|
|
project: projectPrefix
|
|
};
|
|
fs.appendFileSync(indexPath, JSON.stringify(sessionHeader) + '\n');
|
|
|
|
// Add overview as JSON object if present
|
|
if (overview) {
|
|
const overviewObj = {
|
|
type: "overview",
|
|
content: overview,
|
|
session_id: sessionId,
|
|
project: projectPrefix,
|
|
timestamp: timestamp
|
|
};
|
|
fs.appendFileSync(indexPath, JSON.stringify(overviewObj) + '\n');
|
|
}
|
|
|
|
// If no summaries from Claude, write diagnostic info
|
|
if (!summaries || summaries.length === 0) {
|
|
log.debug('📝 No summaries extracted from JSON response');
|
|
const diagnosticObj = {
|
|
type: "diagnostic",
|
|
message: "NO SUMMARIES EXTRACTED - Check logs for valid JSON response",
|
|
session_id: sessionId,
|
|
project: projectPrefix,
|
|
timestamp: timestamp
|
|
};
|
|
fs.appendFileSync(indexPath, JSON.stringify(diagnosticObj) + '\n');
|
|
this.debugLog(`⚠️ No summaries for session ${sessionId} - Check if Claude returned valid JSON in <JSONResponse> tags`);
|
|
} else {
|
|
// Write each summary as JSONL memory object
|
|
summaries.forEach((summary) => {
|
|
const memoryObj = {
|
|
type: "memory",
|
|
text: summary.text,
|
|
document_id: summary.document_id,
|
|
keywords: summary.keywords,
|
|
session_id: sessionId,
|
|
project: projectPrefix,
|
|
timestamp: summary.timestamp || timestamp,
|
|
archive: path.basename(archivePath)
|
|
};
|
|
fs.appendFileSync(indexPath, JSON.stringify(memoryObj) + '\n');
|
|
});
|
|
|
|
log.debug(`📝 Appended ${summaries.length} summaries to index as JSONL`);
|
|
}
|
|
|
|
log.debug(`Index path: ${indexPath}`);
|
|
}
|
|
} |