Files
anything-llm/server/utils/boot/eagerLoadContextWindows.js
Sean Hatfield 0b18ac6577 Model context limit auto-detection for LM Studio and Ollama LLM Providers (#4468)
* auto model context limit detection for ollama llm provider

* auto model context limit detection for lmstudio llm provider

* Patch Ollama to function and sync context windows like Foundry

* normalize how model context windows are cached from endpoint service
todo: move this into global utility class with MODEL_MAP
eager load models on boot to pre-cache them
add performance model improvements into ollama agent as well as apply n_ctx

* remove debug log

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
2025-10-02 11:54:19 -07:00

36 lines
1.2 KiB
JavaScript

/**
* Eagerly load the context windows for the current provider.
* This is done to ensure that the context windows are pre-cached when the server boots.
*
* This prevents us from having misreporting of the context window before a chat is ever sent.
* eg: when viewing the attachments in the workspace - the context window would be misreported if a chat
* has not been sent yet.
*/
async function eagerLoadContextWindows() {
const currentProvider = process.env.LLM_PROVIDER;
const log = (provider) => {
console.log(`\x1b[32mPre-cached context windows for ${provider}\x1b[0m`);
};
switch (currentProvider) {
case "lmstudio":
const { LMStudioLLM } = require("../AiProviders/lmStudio");
await LMStudioLLM.cacheContextWindows(true);
log("LMStudio");
break;
case "ollama":
const { OllamaAILLM } = require("../AiProviders/ollama");
await OllamaAILLM.cacheContextWindows(true);
log("Ollama");
break;
case "foundry":
const { FoundryLLM } = require("../AiProviders/foundry");
await FoundryLLM.cacheContextWindows(true);
log("Foundry");
break;
}
}
module.exports = eagerLoadContextWindows;