claude-mem/src/core/compression/TranscriptCompressor.ts

import { query } from '@anthropic-ai/claude-code';
import fs, { createWriteStream, WriteStream } from 'fs';
import path, { join } from 'path';
import os from 'os';
import { PathResolver } from '../../shared/paths.js';
import { PathDiscovery } from '../../services/path-discovery.js';
import { PromptOrchestrator, createAnalysisContext } from '../orchestration/PromptOrchestrator.js';
import { DEBUG_MESSAGES } from '../../constants.js';
import { log } from '../../shared/logger.js';
import { CompressionError } from '../../shared/types.js';
import { getClaudePath } from '../../shared/settings.js';
import { ChunkManager, ChunkingOptions, ChunkMetadata } from './ChunkManager.js';
import { getStorageProvider, needsMigration } from '../../shared/storage.js';
import { SessionInput, MemoryInput, OverviewInput, DiagnosticInput } from '../../services/sqlite/types.js';

/**
 * Interface for message objects in transcript
 */
interface TranscriptMessage {
  type: string;
  message?: {
    content?: string | Array<{
      text?: string;
      content?: string;
    }>;
    role?: string;
    timestamp?: string;
    created_at?: string;
  };
  content?: string | Array<{
    text?: string;
    content?: string;
  }>;
  role?: string;
  uuid?: string;
  session_id?: string;
  parent_tool_use_id?: string;
  timestamp?: string;
  created_at?: string;
  subtype?: string;
  result?: string;
  model?: string;
  tools?: unknown[];
  mcp_servers?: unknown[];
  toolUseResult?: {
    stdout?: string;
    stderr?: string;
    interrupted?: boolean;
    isImage?: boolean;
  };
}


/**
 * Compression options for the TranscriptCompressor
 */
export interface CompressionOptions {
  output?: string;
  dryRun?: boolean;
  verbose?: boolean;
}

/**
 * TranscriptCompressor handles the analysis and compression of Claude Code conversation transcripts
 * into a searchable memory database format using the Model Context Protocol.
 */
export class TranscriptCompressor {
  private paths: PathResolver;
  private logStream: WriteStream | null = null;
  private logFile: string | null = null;
  private promptOrchestrator: PromptOrchestrator;
  private chunkManager: ChunkManager;

  // <Block> 1.1 ====================================
  // Constructor Initialization - Natural flow (8/10)
  constructor(options: CompressionOptions = {}) {
    this.paths = new PathResolver();
    this.promptOrchestrator = new PromptOrchestrator();
    this.chunkManager = new ChunkManager();
    this.ensureClaudeMemStructure();
    this.initializeLogging();

    log.debug('🤖 TranscriptCompressor initialized');
  }
  // </Block> =======================================

  // <Block> 1.2 ====================================
  // Directory Structure Validation - Natural flow (8/10)
  /**
   * Ensures that the required directory structure exists
   */
  private ensureClaudeMemStructure(): void {
    const configDir = this.paths.getConfigDir();
    const indexDir = this.paths.getIndexDir();
    const archiveDir = this.paths.getArchiveDir();
    const logsDir = this.paths.getLogsDir();

    PathResolver.ensureDirectories([configDir, indexDir, archiveDir, logsDir]);
  }

  private initializeLogging(): void {
    const logsDir = this.paths.getLogsDir();
    const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
    this.logFile = join(logsDir, `claude-mem-${timestamp}.log`);
    this.logStream = createWriteStream(this.logFile, { flags: 'a' });

    this.debugLog('🚀 DEBUG LOG STARTED');
    this.debugLog(`📁 Log file: ${this.logFile}`);
    this.debugLog('═'.repeat(60));
  }

  private debugLog(message: string): void {
    if (!this.logStream) return;

    const timestamp = new Date().toISOString();
    const logLine = `[${timestamp}] ${message}\n`;
    this.logStream.write(logLine);
  }

  private closeLogging(): void {
    if (this.logStream) {
      this.debugLog('✅ DEBUG LOG ENDED');
      this.logStream.end();
    }
  }
  // </Block> =======================================

  // <Block> 1.3 ====================================
  // </Block> =======================================

  // <Block> 1.4 ====================================
  // Main Compression Flow - DEBUG GUARDS INTERRUPT FLOW (5/10)
  /**
   * Main compression method that processes a transcript and creates compressed memories
   * Now supports automatic chunking for large transcripts
   * @param transcriptPath - Path to the transcript file
   * @param sessionId - Optional session ID
   * @param originalProjectName - Optional original project name (for imported transcripts)
   */
  async compress(transcriptPath: string, sessionId?: string, originalProjectName?: string): Promise<string> {
    this.debugLog(`🚀 Starting compression for: ${transcriptPath}`);
    this.debugLog(`📋 Session ID: ${sessionId || 'auto-generated'}`);

    try {
      // Use original project name if provided (for imports), otherwise use current project
      const projectPrefix = originalProjectName || PathResolver.getCurrentProjectPrefix();
      log.debug(DEBUG_MESSAGES.PROJECT_NAME(projectPrefix));
      this.debugLog(`📝 PROJECT PREFIX: ${projectPrefix}`);

      // Read and parse transcript
      const content = fs.readFileSync(transcriptPath, 'utf-8');
      this.debugLog(`📖 Reading transcript: ${content.length} bytes`);
      const lines = content.trim().split('\n').filter(line => line.trim());
      const messages: TranscriptMessage[] = [];
      let parseErrors = 0;

      for (let i = 0; i < lines.length; i++) {
        try {
          const parsed = JSON.parse(lines[i]);
          messages.push(parsed);
        } catch (e) {
          parseErrors++;
          log.debug(`Parse error on line ${i + 1}: ${(e as Error).message}`);
        }
      }

      log.debug(DEBUG_MESSAGES.TRANSCRIPT_STATS(content.length, messages.length));
      if (parseErrors > 0) {
        log.debug(`Parse errors: ${parseErrors}`);
      }
      this.debugLog(`📊 Transcript loaded: ${lines.length} lines, ${messages.length} messages, ${parseErrors} parse errors`);

      // Generate final session ID
      const finalSessionId = sessionId || path.basename(transcriptPath, '.jsonl');

      // Get timestamp from last message or use current time
      // Reverse search for the last message with a valid timestamp
      let timestamp = new Date().toISOString();

      for (let i = messages.length - 1; i >= 0; i--) {
        const msg = messages[i];
        if (msg.timestamp) {
          try {
            // Handle both ISO strings and Unix timestamps
            const ts = msg.timestamp;
            let parsedDate: Date;

            // Check if it's already an ISO string or a Unix timestamp
            if (typeof ts === 'string' && ts.includes('T')) {
              // It's likely an ISO string
              parsedDate = new Date(ts);
            } else {
              // It's likely a Unix timestamp (number or numeric string)
              const numTs = Number(ts);
              // Check if timestamp is in seconds (Unix) or milliseconds
              // Unix timestamps are typically 10 digits, JS timestamps are 13
              const dateValue = numTs < 10000000000 ? numTs * 1000 : numTs;
              parsedDate = new Date(dateValue);
            }

            if (!isNaN(parsedDate.getTime())) {
              timestamp = parsedDate.toISOString();
              this.debugLog(`📅 Using timestamp from last message: ${timestamp}`);
              break;
            }
          } catch (e) {
            // Continue searching for a valid timestamp
            this.debugLog(`⚠️ Invalid timestamp in message: ${msg.timestamp}, trying earlier message`);
          }
        }
      }

      // Archive filename for reference
      const archiveFilename = `${finalSessionId}.jsonl.archive`;

      // Format conversation for analysis
      const conversationText = this.formatConversationForPrompt(messages);

      // Check if we need to use chunked processing
      const needsChunking = this.chunkManager.needsChunking(conversationText);

      let summaries: any[] = [];
      let overview: string | null = null;

      if (needsChunking) {
        // Use chunked processing for large transcripts
        const chunkResult = await this.compressInChunks(messages, finalSessionId, projectPrefix);
        summaries = chunkResult.summaries;
        overview = chunkResult.overview;
      } else {
        // Use normal single-pass processing for smaller transcripts
        // Create analysis prompt using PromptOrchestrator
        const analysisContext = createAnalysisContext(
          conversationText,
          finalSessionId,
          {
            projectName: projectPrefix,
            trigger: 'manual'
          }
        );

        const analysisPrompt = this.promptOrchestrator.createAnalysisPrompt(analysisContext);

        log.debug('📤 Analysis prompt created');
        log.debug(`📊 Prompt length: ${analysisPrompt.prompt.length} characters`);

        // LOG THE FULL PROMPT TO DEBUG FILE
        const promptDebugPath = path.join(this.paths.getLogsDir(), `claude-prompt-${Date.now()}.txt`);
        fs.writeFileSync(promptDebugPath, `=== CLAUDE ANALYSIS PROMPT ===\n${analysisPrompt.prompt}\n`);
        this.debugLog(`📝 Full prompt saved to: ${promptDebugPath}`);

        // Find MCP config and get Claude path from settings
        const claudePath = getClaudePath();
        const mcpConfigPath = this.findMCPConfig();

        log.debug(DEBUG_MESSAGES.CLAUDE_PATH_FOUND(claudePath));
        if (mcpConfigPath) {
          log.debug(DEBUG_MESSAGES.MCP_CONFIG_USED(mcpConfigPath));
        }

        // Call Claude SDK for analysis
        this.debugLog('🤖 Calling Claude SDK with MCP tools...');
        const response = await query({
        prompt: analysisPrompt.prompt,
        options: {
          allowedTools: [
            'mcp__claude-mem__chroma_list_collections',
            'mcp__claude-mem__chroma_create_collection',
            'mcp__claude-mem__chroma_peek_collection',
            'mcp__claude-mem__chroma_get_collection_info',
            'mcp__claude-mem__chroma_get_collection_count',
            'mcp__claude-mem__chroma_modify_collection',
            'mcp__claude-mem__chroma_fork_collection',
            'mcp__claude-mem__chroma_delete_collection',
            'mcp__claude-mem__chroma_add_documents',
            'mcp__claude-mem__chroma_query_documents',
            'mcp__claude-mem__chroma_get_documents',
            'mcp__claude-mem__chroma_update_documents',
            'mcp__claude-mem__chroma_delete_documents',
          ],
          pathToClaudeCodeExecutable: getClaudePath(),
          model: 'sonnet'
        },
      });
        this.debugLog('✅ Claude SDK response received');

        // Process response and extract summaries from JSON
        this.debugLog('🔄 Processing Claude JSON response...');
        const extractResult = await this.processClaudeResponse(response);
        this.debugLog(`📋 Extracted ${extractResult.summaries.length} summaries from JSON`);
        if (extractResult.overview) {
          this.debugLog(`📝 Overview: ${extractResult.overview}`);
        }

        summaries = extractResult.summaries;
        overview = extractResult.overview;
      }

      log.debug(DEBUG_MESSAGES.COMPRESSION_COMPLETE(summaries.length));

      // Continue processing even with zero summaries - let the natural flow handle empty results

      // Create archive and update index
      const archivePath = this.createArchive(transcriptPath, projectPrefix, finalSessionId, content);
      this.debugLog(`📦 Archive created: ${archivePath}`);

      // Write to index - same method for both chunked and non-chunked
      await this.appendToIndex(summaries, overview, projectPrefix, finalSessionId, messages, archivePath, timestamp);
      this.debugLog(`📥 Written ${summaries.length} summaries to index`);

      log.debug(`✅ SUCCESS`);
      log.debug(`Archive created: ${archivePath}`);
      log.debug(`Summaries created: ${summaries.length}`);

      this.debugLog('✅ Compression completed successfully');
      this.closeLogging();

      return archivePath;
    } catch (error) {
      log.error('COMPRESSION FAILED', error, {
        transcriptPath,
        sessionId
      });
      this.debugLog(`❌ ERROR: ${error instanceof Error ? error.message : String(error)}`);
      this.closeLogging();
      throw error;
    }
  }
  // </Block> =======================================


  /**
   * Finds MCP configuration file
   */
  private findMCPConfig(): string {
    const pathDiscovery = PathDiscovery.getInstance();
    const possibleConfigs = [
      pathDiscovery.getProjectMcpConfigPath(),
      pathDiscovery.getMcpConfigPath(),
      join(pathDiscovery.getClaudeConfigDirectory(), '.mcp.json'),
    ];

    const mcpConfigPath = possibleConfigs.find(fs.existsSync);
    return mcpConfigPath || pathDiscovery.getMcpConfigPath();
  }

  // <Block> 1.5 ====================================
  // Claude Response Processing - JSON extraction with pipe-separated output (9/10)
  /**
   * Processes Claude response to extract summaries from JSON
   */
  private async processClaudeResponse(response: any): Promise<{ overview: string | null; summaries: string[] }> {
    let fullContent = '';

    // Extract content using polymorphic handlers
    fullContent = await this.extractResponseContent(response, []);

    // DEBUG: Log the full content to see what Claude is returning
    this.debugLog(`🔍 Claude response content length: ${fullContent.length}`);

    // Write raw response to debug file for troubleshooting
    const debugPath = path.join(this.paths.getLogsDir(), `claude-response-${Date.now()}.txt`);
    fs.writeFileSync(debugPath, `=== CLAUDE RAW RESPONSE ===\n${fullContent}\n`);
    this.debugLog(`📝 Raw response saved to: ${debugPath}`);

    // Extract JSON from response tags
    const extractResult = this.extractJSONResponse(fullContent);

    this.debugLog(`📊 Extracted ${extractResult.summaries.length} summaries from JSON`);
    if (extractResult.summaries.length === 0) {
      this.debugLog(`⚠️ No summaries found in JSON response`);
    }

    return extractResult;
  }

  /**
   * Extracts content from response
   */
  private async extractResponseContent(response: any, summaries: any[]): Promise<string> {
    // Handle streaming response
    if (response && typeof response === 'object' && Symbol.asyncIterator in response) {
      let content = '';
      let inJSONResponse = false;

      for await (const message of response) {
        const chunk = this.extractMessageContent(message);
        content += chunk;

        // Check if we're entering or exiting JSON response tags
        if (chunk.includes('<JSONResponse>')) {
          inJSONResponse = true;
        }
        if (chunk.includes('</JSONResponse>')) {
          inJSONResponse = false;
          continue; // Skip printing the closing tag
        }

        // Only show Claude's thinking, not the JSON response
        if (chunk && !inJSONResponse) {
          process.stdout.write(chunk);
        }

        if (message?.type === 'result' && message?.result) {
          content = message.result;
        }
      }
      return content;
    }

    // Handle string response
    if (typeof response === 'string') {
      return response;
    }

    // Handle array response
    if (Array.isArray(response)) {
      return response.map(item => {
        if (typeof item === 'string') return item;
        if (item?.text) return item.text;
        if (item?.content) return item.content;
        return '';
      }).filter(Boolean).join('\n');
    }

    // Handle object response
    if (typeof response === 'object' && response !== null) {
      if (response?.text) return response.text;
      if (response?.content) return response.content;
      if (response?.message) return response.message;
      return '';
    }

    return '';
  }


  /**
   * Extracts content from a single message
   */
  private extractMessageContent(message: any): string {
    let content = '';
    if (message?.content) content += message.content;
    if (message?.text) content += message.text;
    if (message?.data) content += message.data;

    if (message?.message?.content && Array.isArray(message.message.content)) {
      message.message.content.forEach((item: any) => {
        if (item.type === 'text' && item.text) {
          content += item.text;
        }
      });
    }

    return content;
  }

  /**
   * Extracts JSON response and returns raw JSON objects
   */
  private extractJSONResponse(content: string): { overview: string | null; summaries: any[] } {
    try {
      // Extract JSON from response tags
      const jsonMatch = content.match(/<JSONResponse>([\s\S]*?)<\/JSONResponse>/);

      if (!jsonMatch) {
        this.debugLog(`⚠️ No <JSONResponse> tags found in response`);
        return { overview: null, summaries: [] };
      }

      const jsonContent = jsonMatch[1].trim();
      this.debugLog(`✅ Found JSON response: ${jsonContent.length} chars`);

      // Parse the JSON
      const parsed = JSON.parse(jsonContent);

      if (!parsed.summaries || !Array.isArray(parsed.summaries)) {
        this.debugLog(`⚠️ Invalid JSON structure: missing summaries array`);
        return { overview: null, summaries: [] };
      }

      // Return raw JSON objects instead of converting to pipe-separated format
      const validSummaries: any[] = [];

      parsed.summaries.forEach((summary: any, index: number) => {
        if (!summary.text || !summary.document_id) {
          this.debugLog(`⚠️ Skipping invalid summary at index ${index}`);
          return;
        }

        // Ensure required fields are present
        const validSummary = {
          text: summary.text,
          document_id: summary.document_id,
          keywords: summary.keywords || '',
          timestamp: summary.timestamp || new Date().toISOString(),
          archive: summary.archive || `${summary.document_id}.jsonl.archive`
        };

        validSummaries.push(validSummary);
        this.debugLog(`✅ Valid summary ${index + 1}: ${summary.document_id}`);
      });

      // Store overview if present
      if (parsed.overview) {
        this.debugLog(`📝 Session overview: ${parsed.overview}`);
      }

      return { overview: parsed.overview || null, summaries: validSummaries };

    } catch (error) {
      this.debugLog(`❌ Failed to parse JSON response: ${error}`);

      // Fallback: try to extract any pipe-separated lines that might exist
      this.debugLog(`🔄 Attempting fallback to pipe-separated format...`);
      const legacyLines = this.extractLegacyPipeSeparatedLines(content);
      // Convert legacy lines to JSON format for consistency
      const legacySummaries = legacyLines.map((line, index) => {
        const parts = line.split(' | ');
        return {
          text: parts[0] || '',
          document_id: parts[1] || `legacy_${Date.now()}_${index}`,
          keywords: parts[2] || '',
          timestamp: parts[3] || new Date().toISOString(),
          archive: parts[4] || `legacy_${Date.now()}_${index}.jsonl.archive`
        };
      });
      return { overview: null, summaries: legacySummaries };
    }
  }

  /**
   * Legacy fallback for pipe-separated format
   */
  private extractLegacyPipeSeparatedLines(content: string): string[] {
    const lines = content.split('\n');
    const pipeLines: string[] = [];

    lines.forEach((line) => {
      const trimmed = line.trim();
      if (trimmed && trimmed.includes(' | ') && trimmed.split(' | ').length >= 3) {
        pipeLines.push(trimmed);
      }
    });

    this.debugLog(`📊 Fallback extracted ${pipeLines.length} pipe-separated lines`);
    return pipeLines;
  }
  // </Block> =======================================

  // <Block> 1.7 ====================================
  // Conversation Formatting - LONG BUT MOSTLY NATURAL (6/10)
  /**
   * Processes a transcript in chunks when it's too large for single processing
   */
  private async compressInChunks(
    messages: TranscriptMessage[],
    sessionId: string,
    projectPrefix: string
  ): Promise<{ summaries: any[]; overview: string | null }> {
    this.debugLog('📦 Large transcript detected, processing in chunks...');

    // Create filtered output for chunking
    const outputLines: string[] = [];
    messages.forEach((m, index) => {
      const filteredContent = this.extractContent(m);
      const singleLine = filteredContent.replace(/\r\n/g, '\\n').replace(/\n/g, '\\n').replace(/\r/g, '\\n');
      outputLines.push(`- ${singleLine}`);
    });

    const fullOutput = outputLines.join('\n');
    const chunks = this.chunkManager.chunkTranscript(fullOutput);

    this.debugLog(this.chunkManager.getChunkingStats(chunks));
    console.log(`\n📊 Processing ${chunks.length} chunks...`);

    const allSummaries: any[] = [];
    const chunkOverviews: string[] = [];

    // Process each chunk and collect overviews
    for (let i = 0; i < chunks.length; i++) {
      const chunk = chunks[i];
      console.log(`\n🔄 Processing chunk ${i + 1}/${chunks.length}...`);

      // Create analysis prompt for this chunk
      const chunkPrompt = `Analyze this chunk (${i + 1}/${chunks.length}) of a larger conversation transcript.

${chunk.metadata.hasOverlap ? `Note: This chunk includes ${chunk.metadata.overlapMessages || 2} messages from the previous chunk for context continuity.` : ''}

Chunk contains messages ${chunk.metadata.startIndex + 1} to ${chunk.metadata.endIndex + 1}.

${chunk.content}`;

      const analysisPrompt = this.promptOrchestrator.createAnalysisPrompt({
        transcriptContent: chunkPrompt,
        sessionId: sessionId,
        projectName: projectPrefix,
        trigger: 'manual',
        originalTokens: chunk.metadata.estimatedTokens
      });

      // Find MCP config and get Claude path
      const claudePath = getClaudePath();
      const mcpConfigPath = this.findMCPConfig();

      if (mcpConfigPath) {
        log.debug(DEBUG_MESSAGES.MCP_CONFIG_USED(mcpConfigPath));
      }

      // Call Claude SDK for this chunk
      const response = await query({
        prompt: analysisPrompt.prompt,
        options: {
          allowedTools: [
            'mcp__claude-mem__chroma_list_collections',
            'mcp__claude-mem__chroma_create_collection',
            'mcp__claude-mem__chroma_peek_collection',
            'mcp__claude-mem__chroma_get_collection_info',
            'mcp__claude-mem__chroma_get_collection_count',
            'mcp__claude-mem__chroma_modify_collection',
            'mcp__claude-mem__chroma_fork_collection',
            'mcp__claude-mem__chroma_delete_collection',
            'mcp__claude-mem__chroma_add_documents',
            'mcp__claude-mem__chroma_query_documents',
            'mcp__claude-mem__chroma_get_documents',
            'mcp__claude-mem__chroma_update_documents',
            'mcp__claude-mem__chroma_delete_documents',
          ],
          pathToClaudeCodeExecutable: getClaudePath(),
          model: 'sonnet'
        },
      });

      // Extract summaries from this chunk's response (ignoring overview from chunks)
      const responseContent = await this.extractResponseContent(response, []);
      const extractResult = this.extractJSONResponse(responseContent);

      if (extractResult.summaries.length > 0) {
        console.log(`  ✅ Extracted ${extractResult.summaries.length} memories from chunk ${i + 1}`);
        allSummaries.push(...extractResult.summaries);
      } else {
        console.log(`  ⚠️ No memories extracted from chunk ${i + 1}`);
      }
    }

    // After all chunks are processed, generate a single overview from the saved memories
    console.log(`\n📝 Generating overview from ${allSummaries.length} extracted memories...`);
    const overview = await this.generateOverviewFromMemories(projectPrefix, sessionId, allSummaries);

    return { summaries: allSummaries, overview };
  }

  /**
   * Generates a single overview from the memories that were saved to Chroma
   */
  private async generateOverviewFromMemories(
    projectPrefix: string,
    sessionId: string,
    summaries: any[]
  ): Promise<string | null> {
    try {
      // Extract memory texts from the summaries for the overview prompt
      const memoryTexts = summaries.map(s => s.text || s).filter(Boolean);

      if (memoryTexts.length === 0) {
        console.log('  ⚠️ No memories available to generate overview');
        return null;
      }

      // Create a focused prompt for overview generation
      const overviewPrompt = `You have just analyzed a long conversation and extracted ${memoryTexts.length} key memories.
Based on these memories, create a comprehensive overview of the entire session.

MEMORIES EXTRACTED:
${memoryTexts.map((text, i) => `${i + 1}. ${text}`).join('\n')}

PROJECT: ${projectPrefix}
SESSION: ${sessionId}

Create a 2-3 sentence overview that:
1. Summarizes the main themes and accomplishments across ALL the memories
2. Highlights the most significant technical work or decisions
3. Written for any developer to understand (define jargon organically)

Return ONLY the overview text, nothing else.`;

      // Call Claude for overview generation
      const response = await query({
        prompt: overviewPrompt,
        options: {
          allowedTools: [], // No tools needed for overview generation
          pathToClaudeCodeExecutable: getClaudePath(),
          model: 'sonnet'
        },
      });

      // Extract the overview from response
      let overview = '';
      if (response && typeof response === 'object' && Symbol.asyncIterator in response) {
        for await (const message of response) {
          const chunk = this.extractMessageContent(message);
          overview += chunk;

          if (message?.type === 'result' && message?.result) {
            overview = message.result;
          }
        }
      } else if (typeof response === 'string') {
        overview = response;
      } else if (response?.text) {
        overview = response.text;
      } else if (response?.content) {
        overview = response.content;
      }

      const cleanedOverview = overview.trim();
      if (cleanedOverview) {
        console.log(`  ✅ Overview generated successfully`);
        return cleanedOverview;
      } else {
        console.log(`  ⚠️ No overview generated`);
        return null;
      }
    } catch (error) {
      console.error(`  ❌ Failed to generate overview: ${error}`);
      this.debugLog(`❌ Overview generation error: ${error}`);
      return null;
    }
  }

  /**
   * Formats conversation messages for analysis prompt
   */
  private formatConversationForPrompt(messages: TranscriptMessage[]): string {
    const pipeLines: string[] = [];

    messages.forEach((m, index) => {
      const role = m.type === 'assistant' ? 'assistant'
        : m.type === 'user' ? 'user'
        : (m.type === 'result' || m.type === 'system' || m.type === 'summary') ? 'system'
        : m.message?.role || m.role;

      const content = this.extractContent(m);
      const sessionId = m.session_id || '';
      const timestamp = this.normalizeTimestamp(m);
      const messageUuid = m.uuid || '';

      // Escape pipe characters in content to prevent format corruption
      const escapedContent = content.replace(/\|/g, '\\|');

      // Format: content | session_id | role | timestamp | message_uuid
      const pipeLine = `${escapedContent} | ${sessionId} | ${role} | ${timestamp} | ${messageUuid}`;
      pipeLines.push(pipeLine);
    });

    log.debug(`Field filtering complete: ${pipeLines.length} messages processed`);

    return `<!-- TRANSCRIPT -->\n${pipeLines.join('\n')}\n<!-- /TRANSCRIPT -->`;
  }
  // </Block> =======================================

  // <Block> 1.6 ====================================
  // Message Content Extraction - Simplified (8/10)
  /**
   * Extracts content from message object
   */
  private extractContent(m: TranscriptMessage): string {
    let content = '';

    // Handle tool_result messages first - check for large content
    if (m.type === 'tool_result') {
      return this.extractToolResultContent(m);
    }

    // Extract by type
    if (m.type === 'assistant' || m.type === 'user') {
      const messageContent = m.message?.content;
      if (Array.isArray(messageContent)) {
        // Properly handle content arrays without aggressive filtering
        content = messageContent
          .map((item) => this.extractContentItem(item))
          .filter(Boolean)
          .join(' ');
      } else if (messageContent) {
        content = String(messageContent).trim();
      }
    } else if (m.type === 'summary') {
      // Handle summary messages that have a different structure
      content = (m as any).summary || '';
    } else if (m.type === 'result') {
      if (m.subtype === 'success' && m.result) {
        content = `[Result: ${m.result}]`;
      } else if (m.subtype === 'error_max_turns') {
        content = '[Error: Maximum turns reached]';
      } else if (m.subtype === 'error_during_execution') {
        content = '[Error during execution]';
      }
    } else if (m.type === 'system') {
      if (m.subtype === 'init') {
        content = `[System initialized: ${m.model}, tools: ${m.tools?.length || 0}, MCP servers: ${m.mcp_servers?.length || 0}]`;
      } else {
        // Handle other system messages
        content = String(m.content || '').trim();
      }
    }

    // Fallback to generic content extraction
    if (!content) {
      content = String(m.message?.content || m.content || '');
      if (Array.isArray(content)) {
        content = content
          .map((item) => item.text || item.content || '')
          .filter(Boolean)
          .join(' ');
      }
    }

    // Append tool use result if present
    if (m.toolUseResult) {
      const toolSummary = this.summarizeToolResult(m.toolUseResult, content);
      if (toolSummary) {
        content = content ? `${content}\n\n${toolSummary}` : toolSummary;
      }
    }

    return String(content).trim();
  }

  /**
   * Extracts content from individual content items (text, tool_use, etc.)
   */
  private extractContentItem(item: any): string {
    if (!item || typeof item !== 'object') {
      return String(item || '').trim();
    }

    // Handle different content item types
    if (item.type === 'text') {
      return item.text || '';
    } else if (item.type === 'thinking') {
      // Extract thinking content
      return item.thinking || '';
    } else if (item.type === 'tool_use') {
      // Summarize tool use without the full input details
      const toolName = item.name || 'unknown';
      const toolId = item.id || '';
      return `[Tool: ${toolName}${toolId ? ` (${toolId})` : ''}]`;
    } else if (item.type === 'tool_result') {
      // Check size before extracting
      const contentSize = this.getToolResultSize(item);
      if (contentSize > 1024 * 1024) { // 1MB threshold
        const sizeMB = Math.round(contentSize / (1024 * 1024) * 10) / 10;
        return `[FILTERED: Large tool result ~${sizeMB}MB - tool output to assistant]`;
      }
      return this.extractToolResultFromItem(item);
    } else {
      // Fallback for other content types - be more thorough
      return item.text || item.content || item.thinking || JSON.stringify(item);
    }
  }

  /**
   * Calculate the size of tool_result content
   */
  private getToolResultSize(item: any): number {
    if (!item.content) return 0;

    if (Array.isArray(item.content)) {
      return item.content.reduce((size: number, contentItem: any) => {
        return size + (contentItem.text_length || contentItem.text?.length || contentItem.content?.length || 0);
      }, 0);
    }

    if (typeof item.content === 'string') {
      return item.content.length;
    }

    return JSON.stringify(item.content).length;
  }

  /**
   * Extracts content from tool_result messages with large content filtering
   */
  private extractToolResultContent(m: TranscriptMessage): string {
    const LARGE_CONTENT_THRESHOLD = 1024 * 1024; // 1MB threshold

    // Check if this is a large tool_result that should be filtered
    if (m.content && Array.isArray(m.content)) {
      const totalSize = m.content.reduce((size: number, contentItem: any) => {
        return size + (contentItem.text_length || contentItem.text?.length || 0);
      }, 0);

      if (totalSize > LARGE_CONTENT_THRESHOLD) {
        const sizeMB = Math.round(totalSize / (1024 * 1024) * 10) / 10;
        return `[FILTERED: Large tool result ~${sizeMB}MB - tool output to assistant]`;
      }

      // Normal size array - extract all content items
      return m.content.map((item: any) => this.extractToolResultFromItem(item)).filter(Boolean).join(' ');
    }

    // Check if direct content property is too large
    if (m.content && typeof m.content === 'string' && m.content.length > LARGE_CONTENT_THRESHOLD) {
      const sizeMB = Math.round(m.content.length / (1024 * 1024) * 10) / 10;
      return `[FILTERED: Large tool result ~${sizeMB}MB - tool output to assistant]`;
    }

    // Content is not too large, extract normally
    return this.extractToolResultFromItem(m);
  }

  /**
   * Extracts content from tool_result item (normal size)
   */
  private extractToolResultFromItem(item: any): string {
    // Handle content items within an array (for individual array elements)
    if (item.type === 'text' && item.text) {
      return item.text;
    }

    // Handle when passed the full message/item with content property
    if (!item.content) {
      return '[Tool result: no content]';
    }

    // Handle array content
    if (Array.isArray(item.content)) {
      return item.content
        .map((contentItem: any) => {
          if (contentItem.type === 'text' && contentItem.text) {
            return contentItem.text;
          }
          return contentItem.text || contentItem.content || '';
        })
        .filter(Boolean)
        .join(' ');
    }

    // Handle string content
    if (typeof item.content === 'string') {
      return item.content;
    }

    // Handle object content
    if (typeof item.content === 'object') {
      return item.content.text || item.content.content || '[Tool result: complex object]';
    }

    return String(item.content || '');
  }

  // Removed filterLargeContent method - content filtering now handled at message level in extractContent

  // </Block> =======================================

  /**
   * Creates a clear message flow label that eliminates confusion about content direction
   */
  private createMessageFlowLabel(m: TranscriptMessage, messageNumber: number): string {
    // Check if this message contains tool results
    const containsToolResult = this.messageContainsToolResult(m);

    if (containsToolResult) {
      // This is a tool result being passed to the assistant
      return `Message ${messageNumber} (tool → assistant)`;
    }

    // Handle different message types with clear flow direction
    switch (m.type) {
      case 'user':
        return `Message ${messageNumber} (user → assistant)`;
      case 'assistant':
        return `Message ${messageNumber} (assistant → user)`;
      case 'system':
        return `Message ${messageNumber} (system)`;
      case 'tool_result':
        return `Message ${messageNumber} (tool → assistant)`;
      case 'summary':
        return `Message ${messageNumber} (session summary)`;
      case 'result':
        if (m.subtype === 'success') {
          return `Message ${messageNumber} (session result)`;
        } else if (m.subtype === 'error_max_turns') {
          return `Message ${messageNumber} (session error: max turns)`;
        } else if (m.subtype === 'error_during_execution') {
          return `Message ${messageNumber} (session error: execution)`;
        }
        return `Message ${messageNumber} (session result)`;
      default:
        return `Message ${messageNumber} (${m.type})`;
    }
  }

  /**
   * Checks if a message contains tool result content
   */
  private messageContainsToolResult(m: TranscriptMessage): boolean {
    // Check if this is a user message containing tool_result content items
    if (m.type === 'user' && m.message?.content && Array.isArray(m.message.content)) {
      return m.message.content.some((item: any) => item.type === 'tool_result');
    }

    // Check if this is a direct tool_result message
    if (m.type === 'tool_result') {
      return true;
    }

    // Check if content array contains tool_result items
    if (m.content && Array.isArray(m.content)) {
      return m.content.some((item: any) => item.type === 'tool_result');
    }

    return false;
  }

  /**
   * Debug method to show filtered output without full compression
   * Now supports automatic chunking for large outputs
   */
  public showFilteredOutput(transcriptPath: string, enableChunking: boolean = true): void {
    const content = fs.readFileSync(transcriptPath, 'utf-8');
    const lines = content.trim().split('\n').filter(line => line.trim());
    const messages: TranscriptMessage[] = [];

    // Parse all messages (not just first 20)
    for (const line of lines) {
      try {
        const parsed = JSON.parse(line);
        messages.push(parsed);
      } catch (e) {
        continue;
      }
    }

    const outputLines: string[] = [];

    // Get first and last timestamps for the whole transcript
    const firstTimestamp = messages.length > 0 ? this.normalizeTimestamp(messages[0]) : '';
    const lastTimestamp = messages.length > 0 ? this.normalizeTimestamp(messages[messages.length - 1]) : '';

    messages.forEach((m, index) => {
      const filteredContent = this.extractContent(m);

      // Keep on single line but preserve line breaks as \n
      const singleLine = filteredContent.replace(/\r\n/g, '\\n').replace(/\n/g, '\\n').replace(/\r/g, '\\n');

      // Format: "- content"
      outputLines.push(`- ${singleLine}`);
    });

    const fullOutput = outputLines.join('\n');
    const baseOutputFile = `/tmp/filtered-transcript-${Date.now()}`;

    // Check if chunking is needed
    if (enableChunking && this.chunkManager.needsChunking(fullOutput)) {
      console.log('\n📦 Large transcript detected, chunking output...');

      const chunks = this.chunkManager.chunkTranscript(fullOutput);
      console.log(this.chunkManager.getChunkingStats(chunks));

      // Save each chunk to a separate file with timestamps
      chunks.forEach((chunk, index) => {
        const chunkFile = `${baseOutputFile}-chunk-${index + 1}.txt`;

        // Calculate timestamps for this chunk
        const chunkFirstIdx = chunk.metadata.startIndex;
        const chunkLastIdx = Math.min(chunk.metadata.endIndex, messages.length - 1);
        const chunkFirstTime = messages[chunkFirstIdx] ? this.normalizeTimestamp(messages[chunkFirstIdx]) : '';
        const chunkLastTime = messages[chunkLastIdx] ? this.normalizeTimestamp(messages[chunkLastIdx]) : '';

        // Add timestamps to metadata
        chunk.metadata.firstTimestamp = chunkFirstTime;
        chunk.metadata.lastTimestamp = chunkLastTime;

        const chunkContent = this.chunkManager.createChunkHeader(chunk.metadata) + chunk.content;
        fs.writeFileSync(chunkFile, chunkContent, 'utf-8');
        console.log(`  ✅ Chunk ${index + 1}/${chunks.length} saved to: ${chunkFile}`);
      });
      console.log(`Processed ${messages.length} messages from transcript into ${chunks.length} chunks`);
      console.log(`📅 Time range: ${firstTimestamp} to ${lastTimestamp}`);
    } else {
      // Save as single file if no chunking needed
      const outputFile = `${baseOutputFile}.txt`;

      // Add timestamps at the beginning of the file
      const outputWithTimestamps = `# ${firstTimestamp} to ${lastTimestamp}\n${fullOutput}`;

      fs.writeFileSync(outputFile, outputWithTimestamps, 'utf-8');
      console.log(`\nFiltered output saved to: ${outputFile}`);
      console.log(`Processed ${messages.length} messages from transcript`);
      console.log(`📅 Time range: ${firstTimestamp} to ${lastTimestamp}`);
    }
  }

  /**
   * Summarizes tool use results
   */
  private summarizeToolResult(toolResult: any, existingContent: string): string {
    const summaryParts: string[] = [];

    if (toolResult.stdout) {
      const stdout = String(toolResult.stdout);
      if (stdout.length > 200) {
        const lineCount = stdout.split('\n').length;
        const charCount = stdout.length;
        const lines = stdout.split('\n');
        const preview = lines.slice(0, 3).join('\n');
        const suffix = lines.length > 6 ? `\n...\n${lines.slice(-2).join('\n')}` : '';
        summaryParts.push(`Result: ${preview}${suffix} (${lineCount} lines, ${charCount} chars)`);
      } else {
        summaryParts.push(`Result: ${stdout}`);
      }
    }

    if (toolResult.stderr && toolResult.stderr.trim()) {
      summaryParts.push(`Error: ${toolResult.stderr}`);
    }

    if (toolResult.interrupted) {
      summaryParts.push('(interrupted)');
    }

    if (toolResult.isImage) {
      summaryParts.push('(image output)');
    }

    return summaryParts.join('\n');
  }

  /**
   * Normalizes timestamp formats
   */
  private normalizeTimestamp(m: TranscriptMessage): string {
    const ts = m.timestamp || m.message?.timestamp || m.created_at || m.message?.created_at;
    if (!ts) return '';

    try {
      const date = new Date(ts);
      if (isNaN(date.getTime())) return '';
      return date.toISOString().replace('T', ' ');
    } catch (e) {
      log.debug(`Invalid timestamp format: ${ts}`);
      return '';
    }
  }

  // <Block> 1.8 ====================================
  // Archive Creation - Natural flow (9/10)
  /**
   * Creates an archive file of the original transcript
   */
  private createArchive(transcriptPath: string, projectPrefix: string, sessionId: string, content: string): string {
    const projectArchiveDir = this.paths.getProjectArchiveDir(projectPrefix);
    PathResolver.ensureDirectory(projectArchiveDir);

    const archivePath = join(projectArchiveDir, `${sessionId}.jsonl.archive`);
    fs.writeFileSync(archivePath, content);

    log.debug(`📦 Created archive: ${archivePath}`);

    return archivePath;
  }
  // </Block> =======================================

  /**
   * Stores summaries using the configured storage provider (SQLite or JSONL fallback)
   * Each record is stored with proper type information for easy querying
   */
  private async appendToIndex(summaries: any[], overview: string | null, projectPrefix: string, sessionId: string, messages: TranscriptMessage[], archivePath: string, timestamp: string): Promise<void> {
    try {
      // Check if migration is needed and log warning
      if (await needsMigration()) {
        this.debugLog('⚠️ JSONL to SQLite migration recommended. Run: claude-mem migrate-index');
      }

      const storage = await getStorageProvider();
      this.debugLog(`💾 Using ${storage.backend} storage backend`);

      // Create or ensure session exists
      const sessionInput: SessionInput = {
        session_id: sessionId,
        project: projectPrefix,
        created_at: timestamp,
        source: 'compress',
        archive_path: archivePath,
        archive_bytes: fs.statSync(archivePath).size,
        archived_at: new Date().toISOString()
      };

      // Check if session already exists (for duplicate prevention)
      if (!await storage.hasSession(sessionId)) {
        await storage.createSession(sessionInput);
        this.debugLog(`📋 Created session record: ${sessionId}`);
      } else {
        this.debugLog(`📋 Session already exists: ${sessionId}`);
      }

      // Add overview if present
      if (overview) {
        const overviewInput: OverviewInput = {
          session_id: sessionId,
          content: overview,
          created_at: timestamp,
          project: projectPrefix,
          origin: 'claude'
        };
        await storage.upsertOverview(overviewInput);
        this.debugLog(`📝 Stored overview for session: ${sessionId}`);
      }

      // If no summaries from Claude, write diagnostic info
      if (!summaries || summaries.length === 0) {
        log.debug('📝 No summaries extracted from JSON response');

        const diagnosticInput: DiagnosticInput = {
          session_id: sessionId,
          message: "NO SUMMARIES EXTRACTED - Check logs for valid JSON response",
          severity: 'warn',
          created_at: timestamp,
          project: projectPrefix,
          origin: 'compressor'
        };

        await storage.createDiagnostic(diagnosticInput);
        this.debugLog(`⚠️ No summaries for session ${sessionId} - Check if Claude returned valid JSON in <JSONResponse> tags`);
      } else {
        // Prepare memory records for bulk insertion
        const memoryInputs: MemoryInput[] = summaries.map((summary) => ({
          session_id: sessionId,
          text: summary.text || '',
          document_id: summary.document_id,
          keywords: summary.keywords,
          created_at: summary.timestamp || timestamp,
          project: projectPrefix,
          archive_basename: path.basename(archivePath),
          origin: 'transcript'
        }));

        // Store memories using bulk operation if available, otherwise one by one
        await storage.createMemories(memoryInputs);

        log.debug(`📝 Stored ${summaries.length} summaries using ${storage.backend}`);
        this.debugLog(`💾 Stored ${summaries.length} memories for session: ${sessionId}`);
      }

    } catch (error) {
      // If storage fails, fall back to JSONL as emergency backup
      this.debugLog(`❌ Storage failed, falling back to JSONL: ${error}`);
      log.warn('Storage provider failed, falling back to JSONL', error);

      // Emergency JSONL fallback
      this.appendToIndexJSONL(summaries, overview, projectPrefix, sessionId, messages, archivePath, timestamp);
    }
  }

  /**
   * Emergency fallback method using original JSONL approach
   */
  private appendToIndexJSONL(summaries: any[], overview: string | null, projectPrefix: string, sessionId: string, messages: TranscriptMessage[], archivePath: string, timestamp: string): void {
    // Use PathResolver's getIndexPath() for consistency
    const indexPath = this.paths.getIndexPath();
    const indexDir = this.paths.getConfigDir();
    PathResolver.ensureDirectory(indexDir);

    // Write session header as JSON object
    const sessionHeader = {
      type: "session",
      session_id: sessionId,
      timestamp: timestamp,
      project: projectPrefix
    };
    fs.appendFileSync(indexPath, JSON.stringify(sessionHeader) + '\n');

    // Add overview as JSON object if present
    if (overview) {
      const overviewObj = {
        type: "overview",
        content: overview,
        session_id: sessionId,
        project: projectPrefix,
        timestamp: timestamp
      };
      fs.appendFileSync(indexPath, JSON.stringify(overviewObj) + '\n');
    }

    // If no summaries from Claude, write diagnostic info
    if (!summaries || summaries.length === 0) {
      log.debug('📝 No summaries extracted from JSON response');
      const diagnosticObj = {
        type: "diagnostic",
        message: "NO SUMMARIES EXTRACTED - Check logs for valid JSON response",
        session_id: sessionId,
        project: projectPrefix,
        timestamp: timestamp
      };
      fs.appendFileSync(indexPath, JSON.stringify(diagnosticObj) + '\n');
      this.debugLog(`⚠️ No summaries for session ${sessionId} - Check if Claude returned valid JSON in <JSONResponse> tags`);
    } else {
      // Write each summary as JSONL memory object
      summaries.forEach((summary) => {
        const memoryObj = {
          type: "memory",
          text: summary.text,
          document_id: summary.document_id,
          keywords: summary.keywords,
          session_id: sessionId,
          project: projectPrefix,
          timestamp: summary.timestamp || timestamp,
          archive: path.basename(archivePath)
        };
        fs.appendFileSync(indexPath, JSON.stringify(memoryObj) + '\n');
      });

      log.debug(`📝 Appended ${summaries.length} summaries to index as JSONL`);
    }

    log.debug(`Index path: ${indexPath}`);
  }
}