mirror of
https://github.com/thedotmack/claude-mem
synced 2026-04-25 17:15:04 +02:00
Merge branch 'pr-1368' into integration/validation-batch
# Conflicts: # plugin/scripts/context-generator.cjs # plugin/scripts/mcp-server.cjs # plugin/scripts/worker-service.cjs # plugin/ui/viewer-bundle.js
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -18,6 +18,8 @@ import { logger } from '../../utils/logger.js';
|
||||
import { buildInitPrompt, buildObservationPrompt, buildSummaryPrompt, buildContinuationPrompt } from '../../sdk/prompts.js';
|
||||
import { SettingsDefaultsManager } from '../../shared/SettingsDefaultsManager.js';
|
||||
import { getCredential } from '../../shared/EnvManager.js';
|
||||
import { USER_SETTINGS_PATH } from '../../shared/paths.js';
|
||||
import { estimateTokens } from '../../shared/timeline-formatting.js';
|
||||
import type { ActiveSession, ConversationMessage } from '../worker-types.js';
|
||||
import { ModeManager } from '../domain/ModeManager.js';
|
||||
import {
|
||||
@@ -56,6 +58,10 @@ const GEMINI_RPM_LIMITS: Record<GeminiModel, number> = {
|
||||
// Track last request time for rate limiting
|
||||
let lastRequestTime = 0;
|
||||
|
||||
// Context window limits (prevents O(N²) token cost growth)
|
||||
const DEFAULT_MAX_CONTEXT_MESSAGES = 20; // Maximum messages to keep in conversation history
|
||||
const DEFAULT_MAX_ESTIMATED_TOKENS = 100000; // ~100k tokens max context (safety limit)
|
||||
|
||||
/**
|
||||
* Enforce RPM rate limit for Gemini free tier.
|
||||
* Waits the required time between requests based on model's RPM limit + 100ms safety buffer.
|
||||
@@ -346,6 +352,54 @@ export class GeminiAgent {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate conversation history to prevent runaway context costs.
|
||||
* Keeps most recent messages within both message count and token budget.
|
||||
* Returns a new array — never mutates the original history.
|
||||
*/
|
||||
private truncateHistory(history: ConversationMessage[]): ConversationMessage[] {
|
||||
const settings = SettingsDefaultsManager.loadFromFile(USER_SETTINGS_PATH);
|
||||
|
||||
const MAX_CONTEXT_MESSAGES = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) || DEFAULT_MAX_CONTEXT_MESSAGES;
|
||||
const MAX_ESTIMATED_TOKENS = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) || DEFAULT_MAX_ESTIMATED_TOKENS;
|
||||
|
||||
if (history.length <= MAX_CONTEXT_MESSAGES) {
|
||||
// Check token count even if message count is ok
|
||||
const totalTokens = history.reduce((sum, m) => sum + estimateTokens(m.content), 0);
|
||||
if (totalTokens <= MAX_ESTIMATED_TOKENS) {
|
||||
return history;
|
||||
}
|
||||
}
|
||||
|
||||
// Sliding window: keep most recent messages within limits
|
||||
const truncated: ConversationMessage[] = [];
|
||||
let tokenCount = 0;
|
||||
|
||||
// Process messages in reverse (most recent first)
|
||||
for (let i = history.length - 1; i >= 0; i--) {
|
||||
const msg = history[i];
|
||||
const msgTokens = estimateTokens(msg.content);
|
||||
|
||||
// Always include at least the newest message — an empty contents array
|
||||
// would cause a hard Gemini API error, which is worse than an oversized request.
|
||||
if (truncated.length > 0 && (truncated.length >= MAX_CONTEXT_MESSAGES || tokenCount + msgTokens > MAX_ESTIMATED_TOKENS)) {
|
||||
logger.warn('SDK', 'Context window truncated to prevent runaway costs', {
|
||||
originalMessages: history.length,
|
||||
keptMessages: truncated.length,
|
||||
droppedMessages: i + 1,
|
||||
estimatedTokens: tokenCount,
|
||||
tokenLimit: MAX_ESTIMATED_TOKENS
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
truncated.unshift(msg); // Add to beginning
|
||||
tokenCount += msgTokens;
|
||||
}
|
||||
|
||||
return truncated;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert shared ConversationMessage array to Gemini's contents format
|
||||
* Maps 'assistant' role to 'model' for Gemini API compatibility
|
||||
@@ -358,8 +412,8 @@ export class GeminiAgent {
|
||||
}
|
||||
|
||||
/**
|
||||
* Query Gemini via REST API with full conversation history (multi-turn)
|
||||
* Sends the entire conversation context for coherent responses
|
||||
* Query Gemini via REST API with truncated conversation history (multi-turn)
|
||||
* Truncates history to prevent O(N²) token cost growth, then sends for coherent responses
|
||||
*/
|
||||
private async queryGeminiMultiTurn(
|
||||
history: ConversationMessage[],
|
||||
@@ -367,11 +421,13 @@ export class GeminiAgent {
|
||||
model: GeminiModel,
|
||||
rateLimitingEnabled: boolean
|
||||
): Promise<{ content: string; tokensUsed?: number }> {
|
||||
const contents = this.conversationToGeminiContents(history);
|
||||
const totalChars = history.reduce((sum, m) => sum + m.content.length, 0);
|
||||
const truncatedHistory = this.truncateHistory(history);
|
||||
const contents = this.conversationToGeminiContents(truncatedHistory);
|
||||
const totalChars = truncatedHistory.reduce((sum, m) => sum + m.content.length, 0);
|
||||
|
||||
logger.debug('SDK', `Querying Gemini multi-turn (${model})`, {
|
||||
turns: history.length,
|
||||
turns: truncatedHistory.length,
|
||||
totalTurns: history.length,
|
||||
totalChars
|
||||
});
|
||||
|
||||
|
||||
@@ -94,6 +94,8 @@ export class SettingsRoutes extends BaseRouteHandler {
|
||||
'CLAUDE_MEM_GEMINI_API_KEY',
|
||||
'CLAUDE_MEM_GEMINI_MODEL',
|
||||
'CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED',
|
||||
'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES',
|
||||
'CLAUDE_MEM_GEMINI_MAX_TOKENS',
|
||||
// OpenRouter Configuration
|
||||
'CLAUDE_MEM_OPENROUTER_API_KEY',
|
||||
'CLAUDE_MEM_OPENROUTER_MODEL',
|
||||
@@ -248,6 +250,22 @@ export class SettingsRoutes extends BaseRouteHandler {
|
||||
}
|
||||
}
|
||||
|
||||
// Validate CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES
|
||||
if (settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES) {
|
||||
const count = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES, 10);
|
||||
if (isNaN(count) || count < 1 || count > 100) {
|
||||
return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES must be between 1 and 100' };
|
||||
}
|
||||
}
|
||||
|
||||
// Validate CLAUDE_MEM_GEMINI_MAX_TOKENS
|
||||
if (settings.CLAUDE_MEM_GEMINI_MAX_TOKENS) {
|
||||
const tokens = parseInt(settings.CLAUDE_MEM_GEMINI_MAX_TOKENS, 10);
|
||||
if (isNaN(tokens) || tokens < 1000 || tokens > 1000000) {
|
||||
return { valid: false, error: 'CLAUDE_MEM_GEMINI_MAX_TOKENS must be between 1000 and 1000000' };
|
||||
}
|
||||
}
|
||||
|
||||
// Validate CLAUDE_MEM_CONTEXT_OBSERVATIONS
|
||||
if (settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS) {
|
||||
const obsCount = parseInt(settings.CLAUDE_MEM_CONTEXT_OBSERVATIONS, 10);
|
||||
|
||||
@@ -23,6 +23,8 @@ export interface SettingsDefaults {
|
||||
CLAUDE_MEM_GEMINI_API_KEY: string;
|
||||
CLAUDE_MEM_GEMINI_MODEL: string; // 'gemini-2.5-flash-lite' | 'gemini-2.5-flash' | 'gemini-3-flash-preview'
|
||||
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: string; // 'true' | 'false' - enable rate limiting for free tier
|
||||
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: string; // Max messages in Gemini context window (prevents O(N²) cost growth)
|
||||
CLAUDE_MEM_GEMINI_MAX_TOKENS: string; // Max estimated tokens for Gemini context (~100k safety limit)
|
||||
CLAUDE_MEM_OPENROUTER_API_KEY: string;
|
||||
CLAUDE_MEM_OPENROUTER_MODEL: string;
|
||||
CLAUDE_MEM_OPENROUTER_SITE_URL: string;
|
||||
@@ -90,6 +92,8 @@ export class SettingsDefaultsManager {
|
||||
CLAUDE_MEM_GEMINI_API_KEY: '', // Empty by default, can be set via UI or env
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite', // Default Gemini model (highest free tier RPM)
|
||||
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'true', // Rate limiting ON by default for free tier users
|
||||
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20', // Max messages in Gemini context window
|
||||
CLAUDE_MEM_GEMINI_MAX_TOKENS: '100000', // Max estimated tokens (~100k safety limit)
|
||||
CLAUDE_MEM_OPENROUTER_API_KEY: '', // Empty by default, can be set via UI or env
|
||||
CLAUDE_MEM_OPENROUTER_MODEL: 'xiaomi/mimo-v2-flash:free', // Default OpenRouter model (free tier)
|
||||
CLAUDE_MEM_OPENROUTER_SITE_URL: '', // Optional: for OpenRouter analytics
|
||||
|
||||
@@ -358,6 +358,90 @@ describe('GeminiAgent', () => {
|
||||
}
|
||||
});
|
||||
|
||||
describe('conversation history truncation', () => {
|
||||
it('should truncate history when message count exceeds limit', async () => {
|
||||
// Build a history with 25 small messages (limit is 20)
|
||||
const history: any[] = [];
|
||||
for (let i = 0; i < 25; i++) {
|
||||
history.push({ role: i % 2 === 0 ? 'user' : 'assistant', content: `message ${i}` });
|
||||
}
|
||||
|
||||
const session = {
|
||||
sessionDbId: 1,
|
||||
contentSessionId: 'test-session',
|
||||
memorySessionId: 'mem-session-123',
|
||||
project: 'test-project',
|
||||
userPrompt: 'test prompt',
|
||||
conversationHistory: history,
|
||||
lastPromptNumber: 2,
|
||||
cumulativeInputTokens: 0,
|
||||
cumulativeOutputTokens: 0,
|
||||
pendingMessages: [],
|
||||
abortController: new AbortController(),
|
||||
generatorPromise: null,
|
||||
earliestPendingTimestamp: null,
|
||||
currentProvider: null,
|
||||
startTime: Date.now(),
|
||||
processingMessageIds: []
|
||||
} as any;
|
||||
|
||||
global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
|
||||
candidates: [{ content: { parts: [{ text: 'response' }] } }]
|
||||
}))));
|
||||
|
||||
await agent.startSession(session);
|
||||
|
||||
// The request body should have truncated contents (init adds 1 more, so 26 total → truncated to 20)
|
||||
const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
|
||||
expect(body.contents.length).toBeLessThanOrEqual(20);
|
||||
});
|
||||
|
||||
it('should always keep at least the newest message even if it exceeds token limit', async () => {
|
||||
// Override settings to have a very low token limit
|
||||
loadFromFileSpy.mockImplementation(() => ({
|
||||
...SettingsDefaultsManager.getAllDefaults(),
|
||||
CLAUDE_MEM_GEMINI_API_KEY: 'test-api-key',
|
||||
CLAUDE_MEM_GEMINI_MODEL: 'gemini-2.5-flash-lite',
|
||||
CLAUDE_MEM_GEMINI_RATE_LIMITING_ENABLED: 'false',
|
||||
CLAUDE_MEM_GEMINI_MAX_CONTEXT_MESSAGES: '20',
|
||||
CLAUDE_MEM_GEMINI_MAX_TOKENS: '1000', // Very low: ~250 chars
|
||||
CLAUDE_MEM_DATA_DIR: '/tmp/claude-mem-test',
|
||||
}));
|
||||
|
||||
// Create a single large message that exceeds the token limit
|
||||
const largeContent = 'x'.repeat(8000); // ~2000 tokens, well above 1000 limit
|
||||
|
||||
const session = {
|
||||
sessionDbId: 1,
|
||||
contentSessionId: 'test-session',
|
||||
memorySessionId: 'mem-session-123',
|
||||
project: 'test-project',
|
||||
userPrompt: largeContent,
|
||||
conversationHistory: [],
|
||||
lastPromptNumber: 1,
|
||||
cumulativeInputTokens: 0,
|
||||
cumulativeOutputTokens: 0,
|
||||
pendingMessages: [],
|
||||
abortController: new AbortController(),
|
||||
generatorPromise: null,
|
||||
earliestPendingTimestamp: null,
|
||||
currentProvider: null,
|
||||
startTime: Date.now(),
|
||||
processingMessageIds: []
|
||||
} as any;
|
||||
|
||||
global.fetch = mock(() => Promise.resolve(new Response(JSON.stringify({
|
||||
candidates: [{ content: { parts: [{ text: 'response' }] } }]
|
||||
}))));
|
||||
|
||||
await agent.startSession(session);
|
||||
|
||||
// Should still send at least 1 message (the newest), not empty contents
|
||||
const body = JSON.parse((global.fetch as any).mock.calls[0][1].body);
|
||||
expect(body.contents.length).toBeGreaterThanOrEqual(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe('gemini-3-flash-preview model support', () => {
|
||||
it('should accept gemini-3-flash-preview as a valid model', async () => {
|
||||
// The GeminiModel type includes gemini-3-flash-preview - compile-time check
|
||||
|
||||
Reference in New Issue
Block a user