mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
Update Lemonade Integration to support v10.1.0 changes (#5378)
Update Lemonade Integraion Fix ApiKey nullification check causing hard throw
This commit is contained in:
@@ -106,7 +106,7 @@ export default function LemonadeOptions({ settings }) {
|
||||
type="url"
|
||||
name="LemonadeLLMBasePath"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="http://localhost:8000"
|
||||
placeholder="http://localhost:13305"
|
||||
value={cleanBasePath(basePathValue.value)}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
@@ -150,7 +150,7 @@ export default function LemonadeOptions({ settings }) {
|
||||
type="number"
|
||||
name="LemonadeLLMModelTokenLimit"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="4096"
|
||||
placeholder="8192"
|
||||
min={1}
|
||||
value={maxTokens}
|
||||
onChange={(e) => setMaxTokens(Number(e.target.value))}
|
||||
|
||||
@@ -67,6 +67,12 @@ export const LEMONADE_COMMON_URLS = [
|
||||
"http://127.0.0.1:8000/live",
|
||||
"http://host.docker.internal:8000/live",
|
||||
"http://172.17.0.1:8000/live",
|
||||
|
||||
// In Lemonade 10.1.0 the base port is 13305
|
||||
"http://localhost:13305/live",
|
||||
"http://127.0.0.1:13305/live",
|
||||
"http://host.docker.internal:13305/live",
|
||||
"http://172.17.0.1:13305/live",
|
||||
];
|
||||
|
||||
export function fullApiUrl() {
|
||||
|
||||
@@ -22,7 +22,7 @@ class LemonadeLLM {
|
||||
process.env.LEMONADE_LLM_BASE_PATH,
|
||||
"openai"
|
||||
),
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY || null,
|
||||
});
|
||||
|
||||
this.model = modelPreference || process.env.LEMONADE_LLM_MODEL_PREF;
|
||||
@@ -202,7 +202,7 @@ class LemonadeLLM {
|
||||
process.env.LEMONADE_LLM_BASE_PATH,
|
||||
"openai"
|
||||
),
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY || null,
|
||||
});
|
||||
|
||||
const { labels = [] } = await client.models.retrieve(this.model);
|
||||
@@ -223,6 +223,41 @@ class LemonadeLLM {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the currently loaded models from the Lemonade server.
|
||||
* @returns {Promise<string[]>}
|
||||
*/
|
||||
static async getCurrentlyLoadedModels() {
|
||||
const endpoint = new URL(
|
||||
parseLemonadeServerEndpoint(process.env.LEMONADE_LLM_BASE_PATH, "openai")
|
||||
);
|
||||
endpoint.pathname += "/health";
|
||||
const loadedModels = await fetch(endpoint.toString(), {
|
||||
method: "GET",
|
||||
headers: {
|
||||
...(process.env.LEMONADE_LLM_API_KEY
|
||||
? { Authorization: `Bearer ${process.env.LEMONADE_LLM_API_KEY}` }
|
||||
: {}),
|
||||
},
|
||||
})
|
||||
.then((response) => {
|
||||
if (!response.ok)
|
||||
throw new Error(
|
||||
`Failed to get currently loaded models: ${response.statusText}`
|
||||
);
|
||||
return response.json();
|
||||
})
|
||||
.then(({ all_models_loaded = [] } = {}) => {
|
||||
return all_models_loaded.map((model) => {
|
||||
return {
|
||||
model_name: model.model_name,
|
||||
ctx_size: model?.recipe_options?.ctx_size ?? 8192,
|
||||
};
|
||||
});
|
||||
});
|
||||
return loadedModels;
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to load a model from the Lemonade server.
|
||||
* Does not check if the model is already loaded or unloads any models.
|
||||
@@ -230,12 +265,33 @@ class LemonadeLLM {
|
||||
*/
|
||||
static async loadModel(model, basePath = process.env.LEMONADE_LLM_BASE_PATH) {
|
||||
try {
|
||||
const desiredCtxSize = Number(this.promptWindowLimit());
|
||||
const currentlyLoadedModels =
|
||||
await LemonadeLLM.getCurrentlyLoadedModels();
|
||||
const modelAlreadyLoaded = currentlyLoadedModels.find(
|
||||
(m) => m.model_name === model
|
||||
);
|
||||
|
||||
if (modelAlreadyLoaded) {
|
||||
if (modelAlreadyLoaded.ctx_size === desiredCtxSize) {
|
||||
LemonadeLLM.slog(
|
||||
`Model ${model} already loaded with ctx size ${desiredCtxSize}`
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
LemonadeLLM.slog(
|
||||
`Model ${model} needs to be reloaded again with ctx size ${desiredCtxSize}`
|
||||
);
|
||||
}
|
||||
|
||||
const endpoint = new URL(parseLemonadeServerEndpoint(basePath, "openai"));
|
||||
endpoint.pathname += "/load";
|
||||
|
||||
LemonadeLLM.slog(
|
||||
`Loading model ${model} with context size ${this.promptWindowLimit()}`
|
||||
`Loading model ${model} with context size ${desiredCtxSize}`
|
||||
);
|
||||
|
||||
await fetch(endpoint.toString(), {
|
||||
method: "POST",
|
||||
headers: {
|
||||
@@ -246,7 +302,7 @@ class LemonadeLLM {
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model_name: String(model),
|
||||
ctx_size: Number(this.promptWindowLimit()),
|
||||
ctx_size: desiredCtxSize,
|
||||
}),
|
||||
})
|
||||
.then((response) => {
|
||||
|
||||
@@ -14,7 +14,7 @@ class LemonadeEmbedder {
|
||||
process.env.EMBEDDING_BASE_PATH,
|
||||
"openai"
|
||||
),
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY || null,
|
||||
});
|
||||
this.model = process.env.EMBEDDING_MODEL_PREF;
|
||||
|
||||
|
||||
@@ -402,7 +402,7 @@ class Provider {
|
||||
configuration: {
|
||||
baseURL: process.env.LEMONADE_LLM_BASE_PATH,
|
||||
},
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY || null,
|
||||
...config,
|
||||
});
|
||||
default:
|
||||
|
||||
@@ -27,7 +27,7 @@ class LemonadeProvider extends InheritMultiple([Provider, UnTooled]) {
|
||||
process.env.LEMONADE_LLM_BASE_PATH,
|
||||
"openai"
|
||||
),
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY ?? null,
|
||||
apiKey: process.env.LEMONADE_LLM_API_KEY || null,
|
||||
maxRetries: 3,
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user