This commit is contained in:
timothycarambat
2025-02-19 12:41:27 -08:00
parent f5186b8eb9
commit 357bf04c19
2 changed files with 12 additions and 14 deletions

View File

@@ -77,9 +77,8 @@ async function messageArrayCompressor(llm, messages = [], rawHistory = []) {
compressedContext = context;
}
system.content = `${compressedPrompt}${
compressedContext ? `\nContext: ${compressedContext}` : ""
}`;
system.content = `${compressedPrompt}${compressedContext ? `\nContext: ${compressedContext}` : ""
}`;
resolve(system);
});
@@ -285,23 +284,22 @@ function truncateContent({
const initialInputSize = tokenManager.countFromString(input);
if (initialInputSize < targetTokenSize) return input;
console.log("input", input.length);
if (input.length > TokenManager.MAX_STRING_LENGTH) {
console.log(
"[truncateContent] input is very large - truncating end of input by estimate"
);
const charsToTruncate = targetTokenSize * TokenManager.TOKEN_CHAR_ESTIMATE; // approx number of chars to truncate
return input.slice(0, input.length - charsToTruncate);
const charsToTruncate = input.length - (targetTokenSize * TokenManager.TOKEN_CHAR_ESTIMATE); // approx number of chars to truncate
const truncatedInput = input.slice(0, (charsToTruncate * -1)) + truncText;
console.log(`[Content Truncated (estimated)] ${initialInputSize} input tokens, target: ${targetTokenSize} => ${tokenManager.countFromString(truncatedInput)} tokens.`);
return truncatedInput;
}
// if the delta is the token difference between where our prompt is in size
// and where we ideally need to land.
console.log("Truncating input via encoder method");
const delta = initialInputSize - targetTokenSize;
const tokenChunks = tokenManager.tokensFromString(input);
const allowedTokens = tokenChunks.slice(0, delta * -1);
const truncatedText = tokenManager.bytesFromTokens(allowedTokens) + truncText;
console.log(
`[Content Truncated] ${initialInputSize} => ${allowedTokens.length} tokens.`
);
console.log(`[Content Truncated (encoder)] ${initialInputSize} tokens, target: ${targetTokenSize} => ${allowedTokens.length} tokens.`);
return truncatedText;
}

View File

@@ -12,8 +12,8 @@ const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");
class TokenManager {
static instance = null;
static currentModel = null;
static MAX_STRING_LENGTH = 400_000; // 400k chars as a sanity limit for low-end devices
static TOKEN_CHAR_ESTIMATE = 6;
static MAX_STRING_LENGTH = 400_000; // 1M chars as a sanity limit for low-end devices
static TOKEN_CHAR_ESTIMATE = 3;
constructor(model = "gpt-3.5-turbo") {
if (TokenManager.instance && TokenManager.currentModel === model) {
@@ -75,7 +75,7 @@ class TokenManager {
*/
countFromString(input = "") {
if (input.length > TokenManager.MAX_STRING_LENGTH) {
this.log("input is very large - estimating tokens for performance");
this.log("estimating token count for performance...");
return Math.ceil(input.length / TokenManager.TOKEN_CHAR_ESTIMATE);
}