mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
* add microsoft foundry local llm and agent providers * minor change to fix early stop token + overloading of context window always use user defined window _unless_ it is larger than the models real contenxt window cache the context windows when we can from the API (0.7.*)+ Unload model forcefully on model change to prevent resource hogging * add back token preference since some models have very large windows and can crash a machine normalize cases --------- Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
104 lines
2.6 KiB
JavaScript
104 lines
2.6 KiB
JavaScript
const OpenAI = require("openai");
|
|
const Provider = require("./ai-provider.js");
|
|
const InheritMultiple = require("./helpers/classes.js");
|
|
const UnTooled = require("./helpers/untooled.js");
|
|
const {
|
|
parseFoundryBasePath,
|
|
FoundryLLM,
|
|
} = require("../../../AiProviders/foundry/index.js");
|
|
|
|
/**
|
|
* The agent provider for the Foundry provider.
|
|
* Uses untooled because it doesn't support tool calling.
|
|
*/
|
|
class FoundryProvider extends InheritMultiple([Provider, UnTooled]) {
|
|
model;
|
|
|
|
constructor(config = {}) {
|
|
const { model = process.env.FOUNDRY_MODEL_PREF } = config;
|
|
super();
|
|
const client = new OpenAI({
|
|
baseURL: parseFoundryBasePath(process.env.FOUNDRY_BASE_PATH),
|
|
apiKey: null,
|
|
maxRetries: 3,
|
|
});
|
|
|
|
this._client = client;
|
|
this.model = model;
|
|
this.verbose = true;
|
|
}
|
|
|
|
/**
|
|
* Get the client.
|
|
* @returns {OpenAI.OpenAI}
|
|
*/
|
|
get client() {
|
|
return this._client;
|
|
}
|
|
|
|
get supportsAgentStreaming() {
|
|
return true;
|
|
}
|
|
|
|
async #handleFunctionCallChat({ messages = [] }) {
|
|
await FoundryLLM.cacheContextWindows();
|
|
return await this.client.chat.completions
|
|
.create({
|
|
model: this.model,
|
|
messages,
|
|
max_completion_tokens: FoundryLLM.promptWindowLimit(this.model),
|
|
})
|
|
.then((result) => {
|
|
if (!result.hasOwnProperty("choices"))
|
|
throw new Error("Microsoft Foundry Local chat: No results!");
|
|
if (result.choices.length === 0)
|
|
throw new Error("Microsoft Foundry Local chat: No results length!");
|
|
return result.choices[0].message.content;
|
|
})
|
|
.catch((_) => {
|
|
return null;
|
|
});
|
|
}
|
|
|
|
async #handleFunctionCallStream({ messages = [] }) {
|
|
await FoundryLLM.cacheContextWindows();
|
|
return await this.client.chat.completions.create({
|
|
model: this.model,
|
|
stream: true,
|
|
messages,
|
|
max_completion_tokens: FoundryLLM.promptWindowLimit(this.model),
|
|
});
|
|
}
|
|
|
|
async stream(messages, functions = [], eventHandler = null) {
|
|
return await UnTooled.prototype.stream.call(
|
|
this,
|
|
messages,
|
|
functions,
|
|
this.#handleFunctionCallStream.bind(this),
|
|
eventHandler
|
|
);
|
|
}
|
|
|
|
async complete(messages, functions = []) {
|
|
return await UnTooled.prototype.complete.call(
|
|
this,
|
|
messages,
|
|
functions,
|
|
this.#handleFunctionCallChat.bind(this)
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Get the cost of the completion.
|
|
*
|
|
* @param _usage The completion to get the cost for.
|
|
* @returns The cost of the completion.
|
|
*/
|
|
getCost(_usage) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
module.exports = FoundryProvider;
|