Files
anything-llm/server/utils/agents/aibitat/providers/foundry.js
Sean Hatfield 599a3fd8b8 Microsoft Foundry Local LLM provider & agent provider (#4435)
* add microsoft foundry local llm and agent providers

* minor change to fix early stop token + overloading of context window
always use user defined window _unless_ it is larger than the models real contenxt window
cache the context windows when we can from the API (0.7.*)+
Unload model forcefully on model change to prevent resource hogging

* add back token preference since some models have very large windows and can crash a machine
normalize cases

---------

Co-authored-by: Timothy Carambat <rambat1010@gmail.com>
2025-10-01 20:04:13 -07:00

104 lines
2.6 KiB
JavaScript

const OpenAI = require("openai");
const Provider = require("./ai-provider.js");
const InheritMultiple = require("./helpers/classes.js");
const UnTooled = require("./helpers/untooled.js");
const {
parseFoundryBasePath,
FoundryLLM,
} = require("../../../AiProviders/foundry/index.js");
/**
* The agent provider for the Foundry provider.
* Uses untooled because it doesn't support tool calling.
*/
class FoundryProvider extends InheritMultiple([Provider, UnTooled]) {
model;
constructor(config = {}) {
const { model = process.env.FOUNDRY_MODEL_PREF } = config;
super();
const client = new OpenAI({
baseURL: parseFoundryBasePath(process.env.FOUNDRY_BASE_PATH),
apiKey: null,
maxRetries: 3,
});
this._client = client;
this.model = model;
this.verbose = true;
}
/**
* Get the client.
* @returns {OpenAI.OpenAI}
*/
get client() {
return this._client;
}
get supportsAgentStreaming() {
return true;
}
async #handleFunctionCallChat({ messages = [] }) {
await FoundryLLM.cacheContextWindows();
return await this.client.chat.completions
.create({
model: this.model,
messages,
max_completion_tokens: FoundryLLM.promptWindowLimit(this.model),
})
.then((result) => {
if (!result.hasOwnProperty("choices"))
throw new Error("Microsoft Foundry Local chat: No results!");
if (result.choices.length === 0)
throw new Error("Microsoft Foundry Local chat: No results length!");
return result.choices[0].message.content;
})
.catch((_) => {
return null;
});
}
async #handleFunctionCallStream({ messages = [] }) {
await FoundryLLM.cacheContextWindows();
return await this.client.chat.completions.create({
model: this.model,
stream: true,
messages,
max_completion_tokens: FoundryLLM.promptWindowLimit(this.model),
});
}
async stream(messages, functions = [], eventHandler = null) {
return await UnTooled.prototype.stream.call(
this,
messages,
functions,
this.#handleFunctionCallStream.bind(this),
eventHandler
);
}
async complete(messages, functions = []) {
return await UnTooled.prototype.complete.call(
this,
messages,
functions,
this.#handleFunctionCallChat.bind(this)
);
}
/**
* Get the cost of the completion.
*
* @param _usage The completion to get the cost for.
* @returns The cost of the completion.
*/
getCost(_usage) {
return 0;
}
}
module.exports = FoundryProvider;