mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
* fix: export/import scripts now use API instead of direct DB access Export script fix: - Add format=json parameter to SearchManager for raw data output - Add getSdkSessionsBySessionIds method to SessionStore - Add POST /api/sdk-sessions/batch endpoint to DataRoutes - Refactor export-memories.ts to use HTTP API Import script fix: - Add import methods to SessionStore with duplicate detection - Add POST /api/import endpoint to DataRoutes - Refactor import-memories.ts to use HTTP API 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * fix: update analyze-transformations-smart.js to use bun:sqlite Replace better-sqlite3 import with bun:sqlite to align with v7.1.0 migration. * chore: remove all better-sqlite3 references from codebase - Updated scripts/analyze-transformations-smart.js to use bun:sqlite - Merged PR #332: Refactored import/export scripts to use worker API instead of direct DB access - Updated PM2-to-Bun migration documentation All better-sqlite3 references have been removed from source code. Documentation references remain as appropriate historical context. * build: update plugin artifacts with merged changes Include built artifacts from PR #332 merge and analyze-transformations-smart.js update. --------- Co-authored-by: lee <loyalpartner@163.com> Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
411 lines
14 KiB
JavaScript
411 lines
14 KiB
JavaScript
#!/usr/bin/env node
|
|
|
|
import fs from 'fs';
|
|
import { Database } from 'bun:sqlite';
|
|
import readline from 'readline';
|
|
import path from 'path';
|
|
import { homedir } from 'os';
|
|
import { globSync } from 'glob';
|
|
|
|
// =============================================================================
|
|
// TOOL REPLACEMENT DECISION TABLE
|
|
// =============================================================================
|
|
//
|
|
// KEY INSIGHT: Observations are the SEMANTIC SYNTHESIS of tool results.
|
|
// They contain what Claude LEARNED, which is what future Claude needs.
|
|
//
|
|
// Tool | Replace OUTPUT? | Reason
|
|
// ------------------|-----------------|----------------------------------------
|
|
// Read | ✅ YES | Observation = what was learned from file
|
|
// Bash | ✅ YES | Observation = what command revealed
|
|
// Grep | ✅ YES | Observation = what search found
|
|
// Task | ✅ YES | Observation = what agent discovered
|
|
// WebFetch | ✅ YES | Observation = what page contained
|
|
// Glob | ⚠️ MAYBE | File lists are often small already
|
|
// WebSearch | ⚠️ MAYBE | Results are moderate size
|
|
// Edit | ❌ NO | OUTPUT is tiny ("success"), INPUT is ground truth
|
|
// Write | ❌ NO | OUTPUT is tiny, INPUT is the file content
|
|
// NotebookEdit | ❌ NO | OUTPUT is tiny, INPUT is the code
|
|
// TodoWrite | ❌ NO | Both tiny
|
|
// AskUserQuestion | ❌ NO | Both small, user input matters
|
|
// mcp__* | ⚠️ MAYBE | Varies by tool
|
|
//
|
|
// NEVER REPLACE INPUT - it contains the action (diff, command, query, path)
|
|
// ONLY REPLACE OUTPUT - swap raw results for semantic synthesis (observation)
|
|
//
|
|
// REPLACEMENT FORMAT:
|
|
// Original output gets replaced with:
|
|
// "[Strategically Omitted by Claude-Mem to save tokens]
|
|
//
|
|
// [Observation: Title here]
|
|
// Facts: ...
|
|
// Concepts: ..."
|
|
// =============================================================================
|
|
|
|
// Configuration
|
|
const DB_PATH = path.join(homedir(), '.claude-mem', 'claude-mem.db');
|
|
const MAX_TRANSCRIPTS = parseInt(process.env.MAX_TRANSCRIPTS || '500', 10);
|
|
|
|
// Find transcript files (most recent first)
|
|
const TRANSCRIPT_DIR = path.join(homedir(), '.claude/projects/-Users-alexnewman-Scripts-claude-mem');
|
|
const allTranscriptFiles = globSync(path.join(TRANSCRIPT_DIR, '*.jsonl'));
|
|
|
|
// Sort by modification time (most recent first), take MAX_TRANSCRIPTS
|
|
const transcriptFiles = allTranscriptFiles
|
|
.map(f => ({ path: f, mtime: fs.statSync(f).mtime }))
|
|
.sort((a, b) => b.mtime - a.mtime)
|
|
.slice(0, MAX_TRANSCRIPTS)
|
|
.map(f => f.path);
|
|
|
|
console.log(`Config: MAX_TRANSCRIPTS=${MAX_TRANSCRIPTS}`);
|
|
console.log(`Using ${transcriptFiles.length} most recent transcript files (of ${allTranscriptFiles.length} total)\n`);
|
|
|
|
// Map to store original content from transcript (both inputs and outputs)
|
|
const originalContent = new Map();
|
|
|
|
// Track contaminated (already transformed) transcripts
|
|
let skippedTranscripts = 0;
|
|
|
|
// Marker for already-transformed content (endless mode replacement format)
|
|
const TRANSFORMATION_MARKER = '**Key Facts:**';
|
|
|
|
// Auto-discover agent transcripts linked to main session
|
|
async function discoverAgentFiles(mainTranscriptPath) {
|
|
console.log('Discovering linked agent transcripts...');
|
|
|
|
const agentIds = new Set();
|
|
const fileStream = fs.createReadStream(mainTranscriptPath);
|
|
const rl = readline.createInterface({
|
|
input: fileStream,
|
|
crlfDelay: Infinity
|
|
});
|
|
|
|
for await (const line of rl) {
|
|
if (!line.includes('agentId')) continue;
|
|
|
|
try {
|
|
const obj = JSON.parse(line);
|
|
|
|
// Check for agentId in toolUseResult
|
|
if (obj.toolUseResult?.agentId) {
|
|
agentIds.add(obj.toolUseResult.agentId);
|
|
}
|
|
} catch (e) {
|
|
// Skip malformed lines
|
|
}
|
|
}
|
|
|
|
// Build agent file paths
|
|
const directory = path.dirname(mainTranscriptPath);
|
|
const agentFiles = Array.from(agentIds).map(id =>
|
|
path.join(directory, `agent-${id}.jsonl`)
|
|
).filter(filePath => fs.existsSync(filePath));
|
|
|
|
console.log(` → Found ${agentIds.size} agent IDs`);
|
|
console.log(` → ${agentFiles.length} agent files exist on disk\n`);
|
|
|
|
return agentFiles;
|
|
}
|
|
|
|
// Parse transcript to get BOTH tool_use (inputs) and tool_result (outputs) content
|
|
// Returns true if transcript is clean, false if contaminated (already transformed)
|
|
async function loadOriginalContentFromFile(filePath, fileLabel) {
|
|
const fileStream = fs.createReadStream(filePath);
|
|
const rl = readline.createInterface({
|
|
input: fileStream,
|
|
crlfDelay: Infinity
|
|
});
|
|
|
|
let count = 0;
|
|
let isContaminated = false;
|
|
const toolUseIdsFromThisFile = new Set();
|
|
|
|
for await (const line of rl) {
|
|
if (!line.includes('toolu_')) continue;
|
|
|
|
try {
|
|
const obj = JSON.parse(line);
|
|
|
|
if (obj.message?.content) {
|
|
for (const item of obj.message.content) {
|
|
// Capture tool_use (inputs)
|
|
if (item.type === 'tool_use' && item.id) {
|
|
const existing = originalContent.get(item.id) || { input: '', output: '', name: '' };
|
|
existing.input = JSON.stringify(item.input || {});
|
|
existing.name = item.name;
|
|
originalContent.set(item.id, existing);
|
|
toolUseIdsFromThisFile.add(item.id);
|
|
count++;
|
|
}
|
|
|
|
// Capture tool_result (outputs)
|
|
if (item.type === 'tool_result' && item.tool_use_id) {
|
|
const content = typeof item.content === 'string' ? item.content : JSON.stringify(item.content);
|
|
|
|
// Check for transformation marker - if found, transcript is contaminated
|
|
if (content.includes(TRANSFORMATION_MARKER)) {
|
|
isContaminated = true;
|
|
}
|
|
|
|
const existing = originalContent.get(item.tool_use_id) || { input: '', output: '', name: '' };
|
|
existing.output = content;
|
|
originalContent.set(item.tool_use_id, existing);
|
|
toolUseIdsFromThisFile.add(item.tool_use_id);
|
|
}
|
|
}
|
|
}
|
|
} catch (e) {
|
|
// Skip malformed lines
|
|
}
|
|
}
|
|
|
|
// If contaminated, remove all data from this file and report
|
|
if (isContaminated) {
|
|
for (const id of toolUseIdsFromThisFile) {
|
|
originalContent.delete(id);
|
|
}
|
|
console.log(` ⚠️ Skipped ${fileLabel} (already transformed)`);
|
|
return false;
|
|
}
|
|
|
|
if (count > 0) {
|
|
console.log(` → Found ${count} tool uses in ${fileLabel}`);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
async function loadOriginalContent() {
|
|
console.log('Loading original content from transcripts...');
|
|
console.log(` → Scanning ${transcriptFiles.length} transcript files...\n`);
|
|
|
|
let cleanTranscripts = 0;
|
|
|
|
// Load from all transcript files
|
|
for (const transcriptFile of transcriptFiles) {
|
|
const filename = path.basename(transcriptFile);
|
|
const isClean = await loadOriginalContentFromFile(transcriptFile, filename);
|
|
if (isClean) {
|
|
cleanTranscripts++;
|
|
} else {
|
|
skippedTranscripts++;
|
|
}
|
|
}
|
|
|
|
// Also check for any agent files not already included
|
|
for (const transcriptFile of transcriptFiles) {
|
|
if (transcriptFile.includes('agent-')) continue; // Already an agent file
|
|
const agentFiles = await discoverAgentFiles(transcriptFile);
|
|
for (const agentFile of agentFiles) {
|
|
if (transcriptFiles.includes(agentFile)) continue; // Already processed
|
|
const filename = path.basename(agentFile);
|
|
const isClean = await loadOriginalContentFromFile(agentFile, `agent transcript (${filename})`);
|
|
if (!isClean) {
|
|
skippedTranscripts++;
|
|
}
|
|
}
|
|
}
|
|
|
|
console.log(`\nTotal: Loaded original content for ${originalContent.size} tool uses (inputs + outputs)`);
|
|
if (skippedTranscripts > 0) {
|
|
console.log(`⚠️ Skipped ${skippedTranscripts} transcripts (already transformed with endless mode)`);
|
|
}
|
|
console.log();
|
|
}
|
|
|
|
// Strip __N suffix from tool_use_id to get base ID
|
|
function getBaseToolUseId(id) {
|
|
return id ? id.replace(/__\d+$/, '') : id;
|
|
}
|
|
|
|
// Query observations from database using tool_use_ids found in transcripts
|
|
// Handles suffixed IDs like toolu_abc__1, toolu_abc__2 matching transcript's toolu_abc
|
|
function queryObservations() {
|
|
// Get tool_use_ids from the loaded transcript content
|
|
const toolUseIds = Array.from(originalContent.keys());
|
|
|
|
if (toolUseIds.length === 0) {
|
|
console.log('No tool use IDs found in transcripts\n');
|
|
return [];
|
|
}
|
|
|
|
console.log(`Querying observations for ${toolUseIds.length} tool use IDs from transcripts...`);
|
|
|
|
const db = new Database(DB_PATH, { readonly: true });
|
|
|
|
// Build LIKE clauses to match both exact IDs and suffixed variants (toolu_abc, toolu_abc__1, etc)
|
|
const likeConditions = toolUseIds.map(() => 'tool_use_id LIKE ?').join(' OR ');
|
|
const likeParams = toolUseIds.map(id => `${id}%`);
|
|
|
|
const query = `
|
|
SELECT
|
|
id,
|
|
tool_use_id,
|
|
type,
|
|
narrative,
|
|
title,
|
|
facts,
|
|
concepts,
|
|
LENGTH(COALESCE(facts,'')) as facts_len,
|
|
LENGTH(COALESCE(title,'')) + LENGTH(COALESCE(facts,'')) as title_facts_len,
|
|
LENGTH(COALESCE(title,'')) + LENGTH(COALESCE(facts,'')) + LENGTH(COALESCE(concepts,'')) as compact_len,
|
|
LENGTH(COALESCE(narrative,'')) as narrative_len,
|
|
LENGTH(COALESCE(title,'')) + LENGTH(COALESCE(narrative,'')) + LENGTH(COALESCE(facts,'')) + LENGTH(COALESCE(concepts,'')) as full_obs_len
|
|
FROM observations
|
|
WHERE ${likeConditions}
|
|
ORDER BY created_at DESC
|
|
`;
|
|
|
|
const observations = db.prepare(query).all(...likeParams);
|
|
db.close();
|
|
|
|
console.log(`Found ${observations.length} observations matching tool use IDs (including suffixed variants)\n`);
|
|
|
|
return observations;
|
|
}
|
|
|
|
// Tools eligible for OUTPUT replacement (observation = semantic synthesis of result)
|
|
const REPLACEABLE_TOOLS = new Set(['Read', 'Bash', 'Grep', 'Task', 'WebFetch', 'Glob', 'WebSearch']);
|
|
|
|
// Analyze OUTPUT-only replacement for eligible tools
|
|
function analyzeTransformations(observations) {
|
|
console.log('='.repeat(110));
|
|
console.log('OUTPUT REPLACEMENT ANALYSIS (Eligible Tools Only)');
|
|
console.log('='.repeat(110));
|
|
console.log();
|
|
console.log('Eligible tools:', Array.from(REPLACEABLE_TOOLS).join(', '));
|
|
console.log();
|
|
|
|
// Group observations by BASE tool_use_id (strip __N suffix)
|
|
// This groups toolu_abc, toolu_abc__1, toolu_abc__2 together
|
|
const obsByToolId = new Map();
|
|
observations.forEach(obs => {
|
|
const baseId = getBaseToolUseId(obs.tool_use_id);
|
|
if (!obsByToolId.has(baseId)) {
|
|
obsByToolId.set(baseId, []);
|
|
}
|
|
obsByToolId.get(baseId).push(obs);
|
|
});
|
|
|
|
// Define strategies to test
|
|
const strategies = [
|
|
{ name: 'facts_only', field: 'facts_len', desc: 'Facts only (~400 chars)' },
|
|
{ name: 'title_facts', field: 'title_facts_len', desc: 'Title + Facts (~450 chars)' },
|
|
{ name: 'compact', field: 'compact_len', desc: 'Title + Facts + Concepts (~500 chars)' },
|
|
{ name: 'narrative', field: 'narrative_len', desc: 'Narrative only (~700 chars)' },
|
|
{ name: 'full', field: 'full_obs_len', desc: 'Full observation (~1200 chars)' }
|
|
];
|
|
|
|
// Track results per strategy
|
|
const results = {};
|
|
strategies.forEach(s => {
|
|
results[s.name] = {
|
|
transforms: 0,
|
|
noTransform: 0,
|
|
saved: 0,
|
|
totalOriginal: 0
|
|
};
|
|
});
|
|
|
|
// Track stats
|
|
let eligible = 0;
|
|
let ineligible = 0;
|
|
let noTranscript = 0;
|
|
const toolCounts = {};
|
|
|
|
// Analyze each tool use
|
|
obsByToolId.forEach((obsArray, toolUseId) => {
|
|
const original = originalContent.get(toolUseId);
|
|
const toolName = original?.name || 'unknown';
|
|
const outputLen = original?.output?.length || 0;
|
|
|
|
// Skip if no transcript data
|
|
if (!original || outputLen === 0) {
|
|
noTranscript++;
|
|
return;
|
|
}
|
|
|
|
// Skip if tool not eligible for replacement
|
|
if (!REPLACEABLE_TOOLS.has(toolName)) {
|
|
ineligible++;
|
|
return;
|
|
}
|
|
|
|
eligible++;
|
|
toolCounts[toolName] = (toolCounts[toolName] || 0) + 1;
|
|
|
|
// Sum lengths across ALL observations for this tool use (handles multiple obs per tool_use_id)
|
|
// Test each strategy - OUTPUT replacement only
|
|
strategies.forEach(strategy => {
|
|
const obsLen = obsArray.reduce((sum, obs) => sum + (obs[strategy.field] || 0), 0);
|
|
const r = results[strategy.name];
|
|
|
|
r.totalOriginal += outputLen;
|
|
|
|
if (obsLen > 0 && obsLen < outputLen) {
|
|
r.transforms++;
|
|
r.saved += (outputLen - obsLen);
|
|
} else {
|
|
r.noTransform++;
|
|
}
|
|
});
|
|
});
|
|
|
|
// Print results
|
|
console.log('TOOL BREAKDOWN:');
|
|
Object.entries(toolCounts).sort((a, b) => b[1] - a[1]).forEach(([tool, count]) => {
|
|
console.log(` ${tool}: ${count}`);
|
|
});
|
|
console.log();
|
|
console.log('-'.repeat(100));
|
|
console.log(`Eligible tool uses: ${eligible}`);
|
|
console.log(`Ineligible (Edit/Write/etc): ${ineligible}`);
|
|
console.log(`No transcript data: ${noTranscript}`);
|
|
console.log('-'.repeat(100));
|
|
console.log();
|
|
console.log('Strategy Transforms No Transform Chars Saved Original Size Savings %');
|
|
console.log('-'.repeat(100));
|
|
|
|
strategies.forEach(strategy => {
|
|
const r = results[strategy.name];
|
|
const pct = r.totalOriginal > 0 ? ((r.saved / r.totalOriginal) * 100).toFixed(1) : '0.0';
|
|
console.log(
|
|
`${strategy.desc.padEnd(35)} ${String(r.transforms).padStart(10)} ${String(r.noTransform).padStart(12)} ${String(r.saved.toLocaleString()).padStart(13)} ${String(r.totalOriginal.toLocaleString()).padStart(15)} ${pct.padStart(8)}%`
|
|
);
|
|
});
|
|
|
|
console.log('-'.repeat(100));
|
|
console.log();
|
|
|
|
// Find best strategy
|
|
let bestStrategy = null;
|
|
let bestSavings = 0;
|
|
strategies.forEach(strategy => {
|
|
if (results[strategy.name].saved > bestSavings) {
|
|
bestSavings = results[strategy.name].saved;
|
|
bestStrategy = strategy;
|
|
}
|
|
});
|
|
|
|
if (bestStrategy) {
|
|
const r = results[bestStrategy.name];
|
|
const pct = ((r.saved / r.totalOriginal) * 100).toFixed(1);
|
|
console.log(`BEST STRATEGY: ${bestStrategy.desc}`);
|
|
console.log(` - Transforms ${r.transforms} of ${eligible} eligible tool uses (${((r.transforms/eligible)*100).toFixed(1)}%)`);
|
|
console.log(` - Saves ${r.saved.toLocaleString()} of ${r.totalOriginal.toLocaleString()} chars (${pct}% reduction)`);
|
|
}
|
|
|
|
console.log();
|
|
}
|
|
|
|
// Main execution
|
|
async function main() {
|
|
await loadOriginalContent();
|
|
const observations = queryObservations();
|
|
analyzeTransformations(observations);
|
|
}
|
|
|
|
main().catch(error => {
|
|
console.error('Fatal error:', error);
|
|
process.exit(1);
|
|
});
|