wip

2026-04-25 17:15:37 +02:00 · 2025-02-19 12:41:27 -08:00
parent f5186b8eb9
commit 357bf04c19
2 changed files with 12 additions and 14 deletions
--- a/server/utils/helpers/chat/index.js
+++ b/server/utils/helpers/chat/index.js
@@ -77,9 +77,8 @@ async function messageArrayCompressor(llm, messages = [], rawHistory = []) {
      compressedContext = context;
    }

-    system.content = `${compressedPrompt}${
-      compressedContext ? `\nContext: ${compressedContext}` : ""
-    }`;
+    system.content = `${compressedPrompt}${compressedContext ? `\nContext: ${compressedContext}` : ""
+      }`;
    resolve(system);
  });

@@ -285,23 +284,22 @@ function truncateContent({
  const initialInputSize = tokenManager.countFromString(input);
  if (initialInputSize < targetTokenSize) return input;

+  console.log("input", input.length);
  if (input.length > TokenManager.MAX_STRING_LENGTH) {
-    console.log(
-      "[truncateContent] input is very large - truncating end of input by estimate"
-    );
-    const charsToTruncate = targetTokenSize * TokenManager.TOKEN_CHAR_ESTIMATE; // approx number of chars to truncate
-    return input.slice(0, input.length - charsToTruncate);
+    const charsToTruncate = input.length - (targetTokenSize * TokenManager.TOKEN_CHAR_ESTIMATE); // approx number of chars to truncate
+    const truncatedInput = input.slice(0, (charsToTruncate * -1)) + truncText;
+    console.log(`[Content Truncated (estimated)] ${initialInputSize} input tokens, target: ${targetTokenSize} => ${tokenManager.countFromString(truncatedInput)} tokens.`);
+    return truncatedInput;
  }

  // if the delta is the token difference between where our prompt is in size
  // and where we ideally need to land.
+  console.log("Truncating input via encoder method");
  const delta = initialInputSize - targetTokenSize;
  const tokenChunks = tokenManager.tokensFromString(input);
  const allowedTokens = tokenChunks.slice(0, delta * -1);
  const truncatedText = tokenManager.bytesFromTokens(allowedTokens) + truncText;
-  console.log(
-    `[Content Truncated] ${initialInputSize} => ${allowedTokens.length} tokens.`
-  );
+  console.log(`[Content Truncated (encoder)] ${initialInputSize} tokens, target: ${targetTokenSize} => ${allowedTokens.length} tokens.`);
  return truncatedText;
 }

--- a/server/utils/helpers/tiktoken.js
+++ b/server/utils/helpers/tiktoken.js
@@ -12,8 +12,8 @@ const { getEncodingNameForModel, getEncoding } = require("js-tiktoken");
 class TokenManager {
  static instance = null;
  static currentModel = null;
-  static MAX_STRING_LENGTH = 400_000; // 400k chars as a sanity limit for low-end devices
-  static TOKEN_CHAR_ESTIMATE = 6;
+  static MAX_STRING_LENGTH = 400_000; // 1M chars as a sanity limit for low-end devices
+  static TOKEN_CHAR_ESTIMATE = 3;

  constructor(model = "gpt-3.5-turbo") {
    if (TokenManager.instance && TokenManager.currentModel === model) {
@@ -75,7 +75,7 @@ class TokenManager {
   */
  countFromString(input = "") {
    if (input.length > TokenManager.MAX_STRING_LENGTH) {
-      this.log("input is very large - estimating tokens for performance");
+      this.log("estimating token count for performance...");
      return Math.ceil(input.length / TokenManager.TOKEN_CHAR_ESTIMATE);
    }