- Token context window
+ Model context window
,
+ description: "Run LLMs using Docker Model Runner.",
+ requiredConfig: [
+ "DockerModelRunnerBasePath",
+ "DockerModelRunnerModelPref",
+ "DockerModelRunnerModelTokenLimit",
+ ],
+ },
{
name: "Local AI",
value: "localai",
@@ -371,6 +385,7 @@ export const AVAILABLE_LLM_PROVIDERS = [
},
];
+export const LLM_PREFERENCE_CHANGED_EVENT = "llm-preference-changed";
export default function GeneralLLMPreference() {
const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false);
@@ -428,6 +443,21 @@ export default function GeneralLLMPreference() {
fetchKeys();
}, []);
+ // Some more complex LLM options do not bubble up the change event, so we need to listen to the custom event
+ // we can emit from the LLM options component using window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT));
+ useEffect(() => {
+ function updateHasChanges() {
+ setHasChanges(true);
+ }
+ window.addEventListener(LLM_PREFERENCE_CHANGED_EVENT, updateHasChanges);
+ return () => {
+ window.removeEventListener(
+ LLM_PREFERENCE_CHANGED_EVENT,
+ updateHasChanges
+ );
+ };
+ }, []);
+
useEffect(() => {
const filtered = AVAILABLE_LLM_PROVIDERS.filter((llm) =>
llm.name.toLowerCase().includes(searchQuery.toLowerCase())
diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
index a0cf2ae8f..3f32dcbd6 100644
--- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
+++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx
@@ -31,6 +31,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
import CometApiLogo from "@/media/llmprovider/cometapi.png";
import GiteeAILogo from "@/media/llmprovider/giteeai.png";
+import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
@@ -63,6 +64,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions";
import GiteeAiOptions from "@/components/LLMSelection/GiteeAIOptions";
+import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import System from "@/models/system";
@@ -139,6 +141,13 @@ const LLMS = [
description:
"Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
},
+ {
+ name: "Docker Model Runner",
+ value: "docker-model-runner",
+ logo: DockerModelRunnerLogo,
+ options: (settings) =>
,
+ description: "Run LLMs using Docker Model Runner.",
+ },
{
name: "Local AI",
value: "localai",
diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
index c96531e26..b2a5945b3 100644
--- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
+++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx
@@ -37,6 +37,7 @@ const ENABLED_PROVIDERS = [
"zai",
"giteeai",
"cohere",
+ "docker-model-runner",
// TODO: More agent support.
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
];
@@ -46,6 +47,7 @@ const WARN_PERFORMANCE = [
"ollama",
"localai",
"textgenwebui",
+ "docker-model-runner",
];
const LLM_DEFAULT = {
diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js
index c1fae8fc3..52f710834 100644
--- a/frontend/src/utils/constants.js
+++ b/frontend/src/utils/constants.js
@@ -53,6 +53,14 @@ export const NVIDIA_NIM_COMMON_URLS = [
"http://172.17.0.1:8000/v1/version",
];
+export const DOCKER_MODEL_RUNNER_COMMON_URLS = [
+ "http://localhost:12434/engines/llama.cpp/v1",
+ "http://127.0.0.1:12434/engines/llama.cpp/v1",
+ "http://model-runner.docker.internal/engines/llama.cpp/v1",
+ "http://host.docker.internal:12434/engines/llama.cpp/v1",
+ "http://172.17.0.1:12434/engines/llama.cpp/v1",
+];
+
export function fullApiUrl() {
if (API_BASE !== "/api") return API_BASE;
return `${window.location.origin}/api`;
diff --git a/locales/README.fa-IR.md b/locales/README.fa-IR.md
index 342aed2dd..70fbf8855 100644
--- a/locales/README.fa-IR.md
+++ b/locales/README.fa-IR.md
@@ -105,6 +105,7 @@ AnythingLLM اسناد شما را به اشیایی به نام `workspaces` ت
- [Z.AI (chat models)](https://z.ai/model-api)
- [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
+- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
diff --git a/locales/README.ja-JP.md b/locales/README.ja-JP.md
index afb4e5aff..cd1798bda 100644
--- a/locales/README.ja-JP.md
+++ b/locales/README.ja-JP.md
@@ -94,6 +94,7 @@ AnythingLLMは、ドキュメントを`ワークスペース`と呼ばれるオ
- [Z.AI (チャットモデル)](https://z.ai/model-api)
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
- [CometAPI (チャットモデル)](https://api.cometapi.com/)
+- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**埋め込みモデル:**
diff --git a/locales/README.tr-TR.md b/locales/README.tr-TR.md
index 1743db318..37f300180 100644
--- a/locales/README.tr-TR.md
+++ b/locales/README.tr-TR.md
@@ -102,6 +102,7 @@ AnythingLLM, belgelerinizi **"çalışma alanları" (workspaces)** adı verilen
- [Z.AI (chat models)](https://z.ai/model-api)
- [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
+- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**Embedder modelleri:**
diff --git a/locales/README.zh-CN.md b/locales/README.zh-CN.md
index c5408dc69..4c978a9a9 100644
--- a/locales/README.zh-CN.md
+++ b/locales/README.zh-CN.md
@@ -102,6 +102,7 @@ AnythingLLM将您的文档划分为称为`workspaces` (工作区)的对象。工
- [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
- [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm)
- [CometAPI (聊天模型)](https://api.cometapi.com/)
+- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
**支持的嵌入模型:**
diff --git a/server/.env.example b/server/.env.example
index 2f7b96dbf..b408b6fa0 100644
--- a/server/.env.example
+++ b/server/.env.example
@@ -161,6 +161,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# GITEE_AI_MODEL_PREF=
# GITEE_AI_MODEL_TOKEN_LIMIT=
+# LLM_PROVIDER='docker-model-runner'
+# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
+# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini'
+# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096
+
###########################################
######## Embedding API SElECTION ##########
###########################################
diff --git a/server/endpoints/utils.js b/server/endpoints/utils.js
index 327b58f8d..30d2e9b7a 100644
--- a/server/endpoints/utils.js
+++ b/server/endpoints/utils.js
@@ -154,6 +154,9 @@ function getModelTag() {
case "cohere":
model = process.env.COHERE_MODEL_PREF;
break;
+ case "docker-model-runner":
+ model = process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
+ break;
default:
model = "--";
break;
diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js
index 28e44ca66..53f7dd76d 100644
--- a/server/models/systemSettings.js
+++ b/server/models/systemSettings.js
@@ -646,6 +646,13 @@ const SystemSettings = {
GiteeAIApiKey: !!process.env.GITEE_AI_API_KEY,
GiteeAIModelPref: process.env.GITEE_AI_MODEL_PREF,
GiteeAITokenLimit: process.env.GITEE_AI_MODEL_TOKEN_LIMIT || 8192,
+
+ // Docker Model Runner Keys
+ DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
+ DockerModelRunnerModelPref:
+ process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF,
+ DockerModelRunnerModelTokenLimit:
+ process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT || 8192,
};
},
diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore
index 6bda7b7a3..2e6b5c3e9 100644
--- a/server/storage/models/.gitignore
+++ b/server/storage/models/.gitignore
@@ -13,4 +13,5 @@ context-windows/*
MintplexLabs
cometapi
fireworks
-giteeai
\ No newline at end of file
+giteeai
+docker-model-runner
\ No newline at end of file
diff --git a/server/utils/AiProviders/dockerModelRunner/index.js b/server/utils/AiProviders/dockerModelRunner/index.js
new file mode 100644
index 000000000..81f1f36a8
--- /dev/null
+++ b/server/utils/AiProviders/dockerModelRunner/index.js
@@ -0,0 +1,434 @@
+const fs = require("fs");
+const path = require("path");
+const { NativeEmbedder } = require("../../EmbeddingEngines/native");
+const {
+ handleDefaultStreamResponseV2,
+ formatChatHistory,
+} = require("../../helpers/chat/responses");
+const {
+ LLMPerformanceMonitor,
+} = require("../../helpers/chat/LLMPerformanceMonitor");
+const { OpenAI: OpenAIApi } = require("openai");
+const { humanFileSize } = require("../../helpers");
+const { safeJsonParse } = require("../../http");
+
+class DockerModelRunnerLLM {
+ static cacheTime = 1000 * 60 * 60 * 24; // 24 hours
+ static cacheFolder = path.resolve(
+ process.env.STORAGE_DIR
+ ? path.resolve(process.env.STORAGE_DIR, "models", "docker-model-runner")
+ : path.resolve(__dirname, `../../../storage/models/docker-model-runner`)
+ );
+
+ constructor(embedder = null, modelPreference = null) {
+ if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
+ throw new Error("No Docker Model Runner API Base Path was set.");
+ if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF)
+ throw new Error("No Docker Model Runner Model Pref was set.");
+
+ this.dmr = new OpenAIApi({
+ baseURL: parseDockerModelRunnerEndpoint(
+ process.env.DOCKER_MODEL_RUNNER_BASE_PATH
+ ),
+ apiKey: null,
+ });
+
+ this.model =
+ modelPreference || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
+ this.embedder = embedder ?? new NativeEmbedder();
+ this.defaultTemp = 0.7;
+
+ this.limits = {
+ history: this.promptWindowLimit() * 0.15,
+ system: this.promptWindowLimit() * 0.15,
+ user: this.promptWindowLimit() * 0.7,
+ };
+
+ this.#log(`initialized with model: ${this.model}`);
+ }
+
+ #log(text, ...args) {
+ console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
+ }
+
+ static slog(text, ...args) {
+ console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
+ }
+
+ async assertModelContextLimits() {
+ if (this.limits !== null) return;
+ this.limits = {
+ history: this.promptWindowLimit() * 0.15,
+ system: this.promptWindowLimit() * 0.15,
+ user: this.promptWindowLimit() * 0.7,
+ };
+ }
+
+ #appendContext(contextTexts = []) {
+ if (!contextTexts || !contextTexts.length) return "";
+ return (
+ "\nContext:\n" +
+ contextTexts
+ .map((text, i) => {
+ return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
+ })
+ .join("")
+ );
+ }
+
+ streamingEnabled() {
+ return "streamGetChatCompletion" in this;
+ }
+
+ /** DMR does not support curling the context window limit from the API, so we return the system defined limit. */
+ static promptWindowLimit(_) {
+ const systemDefinedLimit =
+ Number(process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT) || 8192;
+ return systemDefinedLimit;
+ }
+
+ promptWindowLimit() {
+ return this.constructor.promptWindowLimit(this.model);
+ }
+
+ async isValidChatCompletionModel(_ = "") {
+ return true;
+ }
+
+ /**
+ * Generates appropriate content array for a message + attachments.
+ * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
+ * @returns {string|object[]}
+ */
+ #generateContent({ userPrompt, attachments = [] }) {
+ if (!attachments.length) {
+ return userPrompt;
+ }
+
+ const content = [{ type: "text", text: userPrompt }];
+ for (let attachment of attachments) {
+ content.push({
+ type: "image_url",
+ image_url: {
+ url: attachment.contentString,
+ detail: "auto",
+ },
+ });
+ }
+ return content.flat();
+ }
+
+ /**
+ * Construct the user prompt for this model.
+ * @param {{attachments: import("../../helpers").Attachment[]}} param0
+ * @returns
+ */
+ constructPrompt({
+ systemPrompt = "",
+ contextTexts = [],
+ chatHistory = [],
+ userPrompt = "",
+ attachments = [],
+ }) {
+ const prompt = {
+ role: "system",
+ content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
+ };
+ return [
+ prompt,
+ ...formatChatHistory(chatHistory, this.#generateContent),
+ {
+ role: "user",
+ content: this.#generateContent({ userPrompt, attachments }),
+ },
+ ];
+ }
+
+ async getChatCompletion(messages = null, { temperature = 0.7 }) {
+ if (!this.model)
+ throw new Error(
+ `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
+ );
+
+ const result = await LLMPerformanceMonitor.measureAsyncFunction(
+ this.dmr.chat.completions.create({
+ model: this.model,
+ messages,
+ temperature,
+ })
+ );
+
+ if (
+ !result.output.hasOwnProperty("choices") ||
+ result.output.choices.length === 0
+ )
+ return null;
+
+ return {
+ textResponse: result.output.choices[0].message.content,
+ metrics: {
+ prompt_tokens: result.output.usage?.prompt_tokens || 0,
+ completion_tokens: result.output.usage?.completion_tokens || 0,
+ total_tokens: result.output.usage?.total_tokens || 0,
+ outputTps: result.output.usage?.completion_tokens / result.duration,
+ duration: result.duration,
+ model: this.model,
+ timestamp: new Date(),
+ },
+ };
+ }
+
+ async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
+ if (!this.model)
+ throw new Error(
+ `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
+ );
+
+ const measuredStreamRequest = await LLMPerformanceMonitor.measureStream({
+ func: this.dmr.chat.completions.create({
+ model: this.model,
+ stream: true,
+ messages,
+ temperature,
+ }),
+ messages,
+ runPromptTokenCalculation: true,
+ modelTag: this.model,
+ });
+ return measuredStreamRequest;
+ }
+
+ handleStream(response, stream, responseProps) {
+ return handleDefaultStreamResponseV2(response, stream, responseProps);
+ }
+
+ // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
+ async embedTextInput(textInput) {
+ return await this.embedder.embedTextInput(textInput);
+ }
+ async embedChunks(textChunks = []) {
+ return await this.embedder.embedChunks(textChunks);
+ }
+
+ async compressMessages(promptArgs = {}, rawHistory = []) {
+ await this.assertModelContextLimits();
+ const { messageArrayCompressor } = require("../../helpers/chat");
+ const messageArray = this.constructPrompt(promptArgs);
+ return await messageArrayCompressor(this, messageArray, rawHistory);
+ }
+}
+
+/**
+ * Parse the base path of the Docker Model Runner endpoint and return the host and port.
+ * @param {string} basePath - The base path of the Docker Model Runner endpoint.
+ * @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible)
+ * @returns {string | null}
+ */
+function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") {
+ if (!basePath) return null;
+ try {
+ const url = new URL(basePath);
+ if (to === "openai") url.pathname = "engines/v1";
+ else if (to === "dmr") url.pathname = "";
+ return url.toString();
+ } catch (e) {
+ return basePath;
+ }
+}
+
+/**
+ * @typedef {Object} DockerRunnerInstalledModel
+ * @property {string} id - The SHA256 identifier of the model layer/blob.
+ * @property {string[]} tags - List of tags or aliases associated with this model (e.g., "ai/qwen3:4B-UD-Q4_K_XL").
+ * @property {number} created - The Unix timestamp (seconds) when the model was created.
+ * @property {string} config - The configuration of the model.
+ * @property {string} config.format - The file format (e.g., "gguf").
+ * @property {string} config.quantization - The quantization level (e.g., "MOSTLY_Q4_K_M", "Q4_0").
+ * @property {string} config.parameters - The parameter count formatted as a string (e.g., "4.02 B").
+ * @property {string} config.architecture - The base architecture of the model (e.g., "qwen3", "llama").
+ * @property {string} config.size - The physical file size formatted as a string (e.g., "2.37 GiB").
+ * @property {string} config?.gguf - Raw GGUF metadata headers containing tokenizer, architecture details, and licensing.
+ * @property {string} config?.gguf['general.base_model.0.organization'] - The tokenizer of the model.
+ * @property {string} config?.gguf['general.basename'] - The base name of the model (the real name of the model, not the tag)
+ * @property {string} config?.gguf['*.context_length'] - The context length of the model. will be something like qwen3.context_length
+ */
+
+/**
+ * Fetch the remote models from the Docker Hub and cache the results.
+ * @returns {Promise>}
+ */
+async function fetchRemoteModels() {
+ const cachePath = path.resolve(
+ DockerModelRunnerLLM.cacheFolder,
+ "models.json"
+ );
+ const cachedAtPath = path.resolve(
+ DockerModelRunnerLLM.cacheFolder,
+ ".cached_at"
+ );
+ let cacheTime = 0;
+
+ if (fs.existsSync(cachePath) && fs.existsSync(cachedAtPath)) {
+ cacheTime = Number(fs.readFileSync(cachedAtPath, "utf8"));
+ if (Date.now() - cacheTime < DockerModelRunnerLLM.cacheTime)
+ return safeJsonParse(fs.readFileSync(cachePath, "utf8"));
+ }
+
+ DockerModelRunnerLLM.slog(`Refreshing remote models from Docker Hub`);
+ // Now hit the Docker Hub API to get the remote model namespace and root tags
+ const availableNamespaces = []; // array of strings like ai/mistral, ai/qwen3, etc
+ let nextPage =
+ "https://hub.docker.com/v2/namespaces/ai/repositories?page_size=100&page=1";
+ while (nextPage) {
+ const response = await fetch(nextPage)
+ .then((res) => res.json())
+ .then((data) => {
+ const namespaces = data.results
+ .filter(
+ (result) =>
+ result.namespace &&
+ result.name &&
+ result.content_types.includes("model") &&
+ result.namespace === "ai"
+ )
+ .map((result) => result.namespace + "/" + result.name);
+ availableNamespaces.push(...namespaces);
+ })
+ .catch((e) => {
+ DockerModelRunnerLLM.slog(
+ `Error fetching remote models from Docker Hub`,
+ e
+ );
+ return [];
+ });
+ if (!response) break;
+ if (!response || !response.next) break;
+ nextPage = response.next;
+ }
+
+ const availableRemoteModels = {};
+ const BATCH_SIZE = 10;
+
+ // Run batch requests to avoid rate limiting but also
+ // improve the speed of the total request time.
+ for (let i = 0; i < availableNamespaces.length; i += BATCH_SIZE) {
+ const batch = availableNamespaces.slice(i, i + BATCH_SIZE);
+ DockerModelRunnerLLM.slog(
+ `Fetching tags for batch ${Math.floor(i / BATCH_SIZE) + 1} of ${Math.ceil(availableNamespaces.length / BATCH_SIZE)}`
+ );
+
+ await Promise.all(
+ batch.map(async (namespace) => {
+ const [organization, model] = namespace.split("/");
+ const namespaceUrl = new URL(
+ "https://hub.docker.com/v2/namespaces/ai/repositories/" +
+ model +
+ "/tags"
+ );
+
+ DockerModelRunnerLLM.slog(
+ `Fetching tags for ${namespaceUrl.toString()}`
+ );
+ await fetch(namespaceUrl.toString())
+ .then((res) => res.json())
+ .then((data) => {
+ const tags = data.results.map((result) => {
+ return {
+ id: `${organization}/${model}:${result.name}`,
+ name: `${model}:${result.name}`,
+ size: humanFileSize(result.full_size),
+ organization: model,
+ };
+ });
+ availableRemoteModels[model] = tags;
+ })
+ .catch((e) => {
+ DockerModelRunnerLLM.slog(
+ `Error fetching tags for ${namespaceUrl.toString()}`,
+ e
+ );
+ });
+ })
+ );
+ }
+
+ if (Object.keys(availableRemoteModels).length === 0) {
+ DockerModelRunnerLLM.slog(
+ `No remote models found - API may be down or not available`
+ );
+ return {};
+ }
+
+ if (!fs.existsSync(DockerModelRunnerLLM.cacheFolder))
+ fs.mkdirSync(DockerModelRunnerLLM.cacheFolder, { recursive: true });
+ fs.writeFileSync(cachePath, JSON.stringify(availableRemoteModels), {
+ encoding: "utf8",
+ });
+ fs.writeFileSync(cachedAtPath, String(Number(new Date())), {
+ encoding: "utf8",
+ });
+ return availableRemoteModels;
+}
+
+/**
+ * This function will fetch the remote models from the Docker Hub as well
+ * as the local models installed on the system.
+ * @param {string} basePath - The base path of the Docker Model Runner endpoint.
+ */
+async function getDockerModels(basePath = null) {
+ let availableModels = {};
+ /** @type {Array} */
+ let installedModels = {};
+
+ try {
+ // Grab the locally installed models from the Docker Model Runner API
+ const dmrUrl = new URL(
+ parseDockerModelRunnerEndpoint(
+ basePath ?? process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
+ "dmr"
+ )
+ );
+ dmrUrl.pathname = "/models";
+
+ await fetch(dmrUrl.toString())
+ .then((res) => res.json())
+ .then((data) => {
+ data?.map((model) => {
+ const id = model.tags.at(0);
+ // eg: ai/qwen3:latest -> qwen3
+ const tag =
+ id?.split("/").pop()?.split(":")?.at(1) ??
+ id?.split(":").at(1) ??
+ "latest";
+ const organization = id?.split("/").pop()?.split(":")?.at(0) ?? id;
+ installedModels[id] = {
+ id: id,
+ name: `${organization}:${tag}`,
+ size: model.config?.size ?? "Unknown size",
+ organization: organization,
+ };
+ });
+ });
+
+ // Now hit the Docker Hub API to get the remote model namespace and root tags
+ const remoteModels = await fetchRemoteModels();
+ for (const [modelName, tags] of Object.entries(remoteModels)) {
+ availableModels[modelName] = { tags: [] };
+ for (const tag of tags) {
+ if (!installedModels[tag.id])
+ availableModels[modelName].tags.push({ ...tag, downloaded: false });
+ else availableModels[modelName].tags.push({ ...tag, downloaded: true });
+ }
+ }
+ } catch (e) {
+ DockerModelRunnerLLM.slog(`Error getting Docker models`, e);
+ } finally {
+ return Object.values(availableModels).flatMap((m) => m.tags);
+ }
+}
+
+module.exports = {
+ DockerModelRunnerLLM,
+ parseDockerModelRunnerEndpoint,
+ getDockerModels,
+};
diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js
index cc15c123d..0a2f6f450 100644
--- a/server/utils/agents/aibitat/index.js
+++ b/server/utils/agents/aibitat/index.js
@@ -992,6 +992,8 @@ ${this.getHistory({ to: route.to })
return new Providers.GiteeAIProvider({ model: config.model });
case "cohere":
return new Providers.CohereProvider({ model: config.model });
+ case "docker-model-runner":
+ return new Providers.DockerModelRunnerProvider({ model: config.model });
default:
throw new Error(
`Unknown provider: ${config.provider}. Please use a valid provider.`
diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js
index 3752b161a..9d00c9b37 100644
--- a/server/utils/agents/aibitat/providers/ai-provider.js
+++ b/server/utils/agents/aibitat/providers/ai-provider.js
@@ -18,6 +18,9 @@ const { ChatOllama } = require("@langchain/community/chat_models/ollama");
const { toValidNumber, safeJsonParse } = require("../../../http");
const { getLLMProviderClass } = require("../../../helpers");
const { parseLMStudioBasePath } = require("../../../AiProviders/lmStudio");
+const {
+ parseDockerModelRunnerEndpoint,
+} = require("../../../AiProviders/dockerModelRunner");
const { parseFoundryBasePath } = require("../../../AiProviders/foundry");
const {
SystemPromptVariables,
@@ -313,6 +316,16 @@ class Provider {
...config,
});
}
+ case "docker-model-runner":
+ return new ChatOpenAI({
+ configuration: {
+ baseURL: parseDockerModelRunnerEndpoint(
+ process.env.DOCKER_MODEL_RUNNER_BASE_PATH
+ ),
+ },
+ apiKey: null,
+ ...config,
+ });
default:
throw new Error(`Unsupported provider ${provider} for this task.`);
}
diff --git a/server/utils/agents/aibitat/providers/dockerModelRunner.js b/server/utils/agents/aibitat/providers/dockerModelRunner.js
new file mode 100644
index 000000000..e00f8bc50
--- /dev/null
+++ b/server/utils/agents/aibitat/providers/dockerModelRunner.js
@@ -0,0 +1,101 @@
+const OpenAI = require("openai");
+const Provider = require("./ai-provider.js");
+const InheritMultiple = require("./helpers/classes.js");
+const UnTooled = require("./helpers/untooled.js");
+const {
+ parseDockerModelRunnerEndpoint,
+} = require("../../../AiProviders/dockerModelRunner/index.js");
+
+/**
+ * The agent provider for the Docker Model Runner.
+ */
+class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
+ model;
+
+ /**
+ *
+ * @param {{model?: string}} config
+ */
+ constructor(config = {}) {
+ super();
+ const model =
+ config?.model || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF || null;
+ const client = new OpenAI({
+ baseURL: parseDockerModelRunnerEndpoint(
+ process.env.DOCKER_MODEL_RUNNER_BASE_PATH
+ ),
+ apiKey: null,
+ maxRetries: 3,
+ });
+
+ this._client = client;
+ this.model = model;
+ this.verbose = true;
+ }
+
+ get client() {
+ return this._client;
+ }
+
+ get supportsAgentStreaming() {
+ return true;
+ }
+
+ async #handleFunctionCallChat({ messages = [] }) {
+ return await this.client.chat.completions
+ .create({
+ model: this.model,
+ messages,
+ })
+ .then((result) => {
+ if (!result.hasOwnProperty("choices"))
+ throw new Error("Docker Model Runner chat: No results!");
+ if (result.choices.length === 0)
+ throw new Error("Docker Model Runner chat: No results length!");
+ return result.choices[0].message.content;
+ })
+ .catch((_) => {
+ return null;
+ });
+ }
+
+ async #handleFunctionCallStream({ messages = [] }) {
+ return await this.client.chat.completions.create({
+ model: this.model,
+ stream: true,
+ messages,
+ });
+ }
+
+ async stream(messages, functions = [], eventHandler = null) {
+ return await UnTooled.prototype.stream.call(
+ this,
+ messages,
+ functions,
+ this.#handleFunctionCallStream.bind(this),
+ eventHandler
+ );
+ }
+
+ async complete(messages, functions = []) {
+ return await UnTooled.prototype.complete.call(
+ this,
+ messages,
+ functions,
+ this.#handleFunctionCallChat.bind(this)
+ );
+ }
+
+ /**
+ * Get the cost of the completion.
+ *
+ * @param _usage The completion to get the cost for.
+ * @returns The cost of the completion.
+ * Stubbed since Docker Model Runner has no cost basis.
+ */
+ getCost(_usage) {
+ return 0;
+ }
+}
+
+module.exports = DockerModelRunnerProvider;
diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js
index e4a11995a..c53c01c38 100644
--- a/server/utils/agents/aibitat/providers/index.js
+++ b/server/utils/agents/aibitat/providers/index.js
@@ -29,6 +29,7 @@ const CometApiProvider = require("./cometapi.js");
const FoundryProvider = require("./foundry.js");
const GiteeAIProvider = require("./giteeai.js");
const CohereProvider = require("./cohere.js");
+const DockerModelRunnerProvider = require("./dockerModelRunner.js");
module.exports = {
OpenAIProvider,
@@ -62,4 +63,5 @@ module.exports = {
FoundryProvider,
GiteeAIProvider,
CohereProvider,
+ DockerModelRunnerProvider,
};
diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js
index 038496cc9..24c496b61 100644
--- a/server/utils/agents/index.js
+++ b/server/utils/agents/index.js
@@ -217,6 +217,12 @@ class AgentHandler {
if (!process.env.COHERE_API_KEY)
throw new Error("Cohere API key must be provided to use agents.");
break;
+ case "docker-model-runner":
+ if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
+ throw new Error(
+ "Docker Model Runner base path must be provided to use agents."
+ );
+ break;
default:
throw new Error(
"No workspace agent provider set. Please set your agent provider in the workspace's settings"
@@ -297,6 +303,8 @@ class AgentHandler {
return process.env.GITEE_AI_MODEL_PREF ?? null;
case "cohere":
return process.env.COHERE_MODEL_PREF ?? "command-r-08-2024";
+ case "docker-model-runner":
+ return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF ?? null;
default:
return null;
}
diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js
index e7e094a16..649d2e566 100644
--- a/server/utils/helpers/customModels.js
+++ b/server/utils/helpers/customModels.js
@@ -13,6 +13,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio");
const { GeminiLLM } = require("../AiProviders/gemini");
const { fetchCometApiModels } = require("../AiProviders/cometapi");
const { parseFoundryBasePath } = require("../AiProviders/foundry");
+const { getDockerModels } = require("../AiProviders/dockerModelRunner");
const SUPPORT_CUSTOM_MODELS = [
"openai",
@@ -43,6 +44,7 @@ const SUPPORT_CUSTOM_MODELS = [
"cohere",
"zai",
"giteeai",
+ "docker-model-runner",
// Embedding Engines
"native-embedder",
"cohere-embedder",
@@ -116,6 +118,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
return await getOpenRouterEmbeddingModels();
case "giteeai":
return await getGiteeAIModels(apiKey);
+ case "docker-model-runner":
+ return await getDockerModelRunnerModels(basePath);
default:
return { models: [], error: "Invalid provider for custom models" };
}
@@ -864,6 +868,19 @@ async function getOpenRouterEmbeddingModels() {
return { models, error: null };
}
+async function getDockerModelRunnerModels(basePath = null) {
+ try {
+ const models = await getDockerModels(basePath);
+ return { models, error: null };
+ } catch (e) {
+ console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message);
+ return {
+ models: [],
+ error: "Could not fetch Docker Model Runner Models",
+ };
+ }
+}
+
module.exports = {
getCustomModels,
SUPPORT_CUSTOM_MODELS,
diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js
index 9b3520155..d508f7eea 100644
--- a/server/utils/helpers/index.js
+++ b/server/utils/helpers/index.js
@@ -229,6 +229,11 @@ function getLLMProvider({ provider = null, model = null } = {}) {
case "giteeai":
const { GiteeAILLM } = require("../AiProviders/giteeai");
return new GiteeAILLM(embedder, model);
+ case "docker-model-runner":
+ const {
+ DockerModelRunnerLLM,
+ } = require("../AiProviders/dockerModelRunner");
+ return new DockerModelRunnerLLM(embedder, model);
default:
throw new Error(
`ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
@@ -394,6 +399,11 @@ function getLLMProviderClass({ provider = null } = {}) {
case "giteeai":
const { GiteeAILLM } = require("../AiProviders/giteeai");
return GiteeAILLM;
+ case "docker-model-runner":
+ const {
+ DockerModelRunnerLLM,
+ } = require("../AiProviders/dockerModelRunner");
+ return DockerModelRunnerLLM;
default:
return null;
}
@@ -470,6 +480,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
return process.env.ZAI_MODEL_PREF;
case "giteeai":
return process.env.GITEE_AI_MODEL_PREF;
+ case "docker-model-runner":
+ return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
default:
return null;
}
@@ -495,6 +507,30 @@ function toChunks(arr, size) {
);
}
+function humanFileSize(bytes, si = false, dp = 1) {
+ const thresh = si ? 1000 : 1024;
+
+ if (Math.abs(bytes) < thresh) {
+ return bytes + " B";
+ }
+
+ const units = si
+ ? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
+ : ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
+ let u = -1;
+ const r = 10 ** dp;
+
+ do {
+ bytes /= thresh;
+ ++u;
+ } while (
+ Math.round(Math.abs(bytes) * r) / r >= thresh &&
+ u < units.length - 1
+ );
+
+ return bytes.toFixed(dp) + " " + units[u];
+}
+
module.exports = {
getEmbeddingEngineSelection,
maximumChunkLength,
@@ -503,4 +539,5 @@ module.exports = {
getBaseLLMProviderModel,
getLLMProvider,
toChunks,
+ humanFileSize,
};
diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js
index 6603d9a77..256bba377 100644
--- a/server/utils/helpers/updateENV.js
+++ b/server/utils/helpers/updateENV.js
@@ -789,6 +789,20 @@ const KEY_MAPPING = {
envKey: "GITEE_AI_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
+
+ // Docker Model Runner Options
+ DockerModelRunnerBasePath: {
+ envKey: "DOCKER_MODEL_RUNNER_BASE_PATH",
+ checks: [isValidURL],
+ },
+ DockerModelRunnerModelPref: {
+ envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_PREF",
+ checks: [isNotEmpty],
+ },
+ DockerModelRunnerModelTokenLimit: {
+ envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT",
+ checks: [nonZero],
+ },
};
function isNotEmpty(input = "") {
@@ -902,6 +916,7 @@ function supportedLLM(input = "") {
"foundry",
"zai",
"giteeai",
+ "docker-model-runner",
].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`;
}