mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-26 01:25:15 +02:00
Add tokenizer improvments via Singleton class and estimation (#3072)
* Add tokenizer improvments via Singleton class linting * dev build * Estimation fallback when string exceeds a fixed byte size * Add notice to tiktoken on backend
This commit is contained in:
@@ -66,7 +66,7 @@ async function loadGithubRepo(args, response) {
|
||||
published: new Date().toLocaleString(),
|
||||
wordCount: doc.pageContent.split(" ").length,
|
||||
pageContent: doc.pageContent,
|
||||
token_count_estimate: tokenizeString(doc.pageContent).length,
|
||||
token_count_estimate: tokenizeString(doc.pageContent),
|
||||
};
|
||||
console.log(
|
||||
`[Github Loader]: Saving ${doc.metadata.source} to ${outFolder}`
|
||||
|
||||
Reference in New Issue
Block a user