diff --git a/.github/workflows/dev-build.yaml b/.github/workflows/dev-build.yaml index 5094c9415..62f2dd980 100644 --- a/.github/workflows/dev-build.yaml +++ b/.github/workflows/dev-build.yaml @@ -6,7 +6,7 @@ concurrency: on: push: - branches: ['4822-feat-remove-workspace-creation-onboarding-page'] # put your current branch to create a build. Core team only. + branches: ['4391-dmr-support'] # put your current branch to create a build. Core team only. paths-ignore: - '**.md' - 'cloud-deployments/*' diff --git a/README.md b/README.md index 4aabec7e1..7b98c252c 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace - [Moonshot AI](https://www.moonshot.ai/) - [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local) - [CometAPI (chat models)](https://api.cometapi.com/) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **Embedder models:** diff --git a/docker/.env.example b/docker/.env.example index c3fa55440..b17517e57 100644 --- a/docker/.env.example +++ b/docker/.env.example @@ -162,6 +162,11 @@ GID='1000' # GITEE_AI_MODEL_PREF= # GITEE_AI_MODEL_TOKEN_LIMIT= +# LLM_PROVIDER='docker-model-runner' +# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434' +# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini' +# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/frontend/src/components/LLMSelection/DPAISOptions/index.jsx b/frontend/src/components/LLMSelection/DPAISOptions/index.jsx index 00995a28d..78f09532a 100644 --- a/frontend/src/components/LLMSelection/DPAISOptions/index.jsx +++ b/frontend/src/components/LLMSelection/DPAISOptions/index.jsx @@ -33,7 +33,7 @@ export default function DellProAIStudioOptions({ />
} props.models - The models to display. + * @param {(model: string, progressCallback: (percentage: number) => void) => void} props.downloadModel - The function to download the model. + * @param {(model: string) => void} props.uninstallModel - The function to uninstall the model. + * @param {(model: string) => void} props.setActiveModel - The function to set the active model. + * @param {string} props.selectedModelId - The ID of the selected model. + * @param {object} props.ui - The UI configuration. + * @param {boolean} props.ui.showRuntime - Whether to show the runtime. + * @returns {React.ReactNode} + */ +export default function ModelTable({ + alias = "", + models = [], + downloadModel = null, + uninstallModel = null, + setActiveModel = () => {}, + selectedModelId = "", + ui = { + showRuntime: true, + }, +}) { + const [showAll, setShowAll] = useState( + models.some((model) => model.downloaded) + ); + const totalModels = models.length; + + return ( +
+ + +
+ ); +} + +/** + * @param {{deviceType: ModelDefinition["deviceType"]}} deviceType + * @returns {React.ReactNode} + */ +function DeviceTypeTag({ deviceType }) { + const Wrapper = ({ text, bgClass, textClass }) => { + return ( +
+ +

{text}

+
+ ); + }; + + switch (deviceType?.toLowerCase()) { + case "cpu": + return ( + + ); + case "gpu": + return ( + + ); + case "npu": + return ( + + ); + default: + return ( + + ); + } +} + +/** + * @param {object} props - The props of the component. + * @param {ModelDefinition} props.model - The model to display. + * @param {(model: string, progressCallback: (percentage: number) => void) => Promise} props.downloadModel - The function to download the model. + * @param {(model: string) => Promise} props.uninstallModel - The function to uninstall the model. + * @param {(model: string) => void} props.setActiveModel - The function to set the active model. + * @param {string} props.selectedModelId - The ID of the selected model. + * @param {object} props.ui - The UI configuration. + * @param {boolean} props.ui.showRuntime - Whether to show the runtime. + * @returns {React.ReactNode} + */ +function ModelRow({ + model, + downloadModel = null, + uninstallModel = null, + setActiveModel, + selectedModelId, + ui = { + showRuntime: true, + }, +}) { + const modelRowRef = useRef(null); + const [showOptions, setShowOptions] = useState(false); + const [processing, setProcessing] = useState(false); + const [_downloadPercentage, setDownloadPercentage] = useState(0); + const fileSize = + typeof model.size === "number" + ? humanFileSize(model.size * 1e6, true, 2) + : (model.size ?? "Unknown size"); + const [isActiveModel, setIsActiveModel] = useState( + selectedModelId === model.id + ); + + async function handleSetActiveModel() { + setDownloadPercentage(0); + if (model.downloaded) setActiveModel(model.id); + else { + try { + if (!downloadModel) return; + setProcessing(true); + await downloadModel(model.id, fileSize, (percentage) => + setDownloadPercentage(percentage) + ); + } catch { + } finally { + setProcessing(false); + } + } + } + + async function handleUninstallModel() { + if (!uninstallModel) return; + try { + setProcessing(true); + await uninstallModel(model.id); + } catch { + } finally { + setProcessing(false); + } + } + + useEffect(() => { + if (selectedModelId === model.id) { + setIsActiveModel(true); + modelRowRef.current.classList.add("!bg-gray-200/10"); + setTimeout( + () => modelRowRef.current.classList.remove("!bg-gray-200/10"), + 800 + ); + } else { + setIsActiveModel(false); + } + }, [selectedModelId]); + + return ( +
+ + +
+ {isActiveModel && ( +
+ +

Active

+
+ )} + + {!isActiveModel && model.downloaded && !uninstallModel && ( +

+ Installed +

+ )} + + {!model.downloaded && ( +

+ Not Installed +

+ )} +
+ +
+ {uninstallModel && model.downloaded ? ( + <> + + {showOptions && ( +
+ +
+ )} + + ) : null} + {!model.downloaded ? ( + + ) : null} +
+
+ ); +} diff --git a/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx new file mode 100644 index 000000000..cd38cc0f3 --- /dev/null +++ b/frontend/src/components/LLMSelection/DockerModelRunnerOptions/index.jsx @@ -0,0 +1,426 @@ +import { useState, useEffect } from "react"; +import System from "@/models/system"; +import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery"; +import { + ArrowClockwise, + CircleNotch, + MagnifyingGlass, + Info, +} from "@phosphor-icons/react"; +import strDistance from "js-levenshtein"; +import { LLM_PREFERENCE_CHANGED_EVENT } from "@/pages/GeneralSettings/LLMPreference"; +import { DOCKER_MODEL_RUNNER_COMMON_URLS } from "@/utils/constants"; +import { Tooltip } from "react-tooltip"; +import { Link } from "react-router-dom"; +import ModelTable from "./ModelTable"; +import * as Skeleton from "react-loading-skeleton"; +import "react-loading-skeleton/dist/skeleton.css"; + +export default function DockerModelRunnerOptions({ settings }) { + const { + autoDetecting: loading, + basePath, + basePathValue, + handleAutoDetectClick, + } = useProviderEndpointAutoDiscovery({ + provider: "docker-model-runner", + initialBasePath: settings?.DockerModelRunnerBasePath, + ENDPOINTS: DOCKER_MODEL_RUNNER_COMMON_URLS, + }); + + const [maxTokens, setMaxTokens] = useState( + settings?.DockerModelRunnerModelTokenLimit || 4096 + ); + + return ( +
+
+
+
+
+ + {loading ? ( + + ) : ( + <> + {!basePathValue.value && ( + + )} + + )} +
+ + Enter the URL where the Docker Model Runner is running. +
+
+ You must have enabled the Docker Model Runner TCP support + for this to work. +
+
+ + Learn more → + +
+
+ +
+
+ + +
+
+
+ + + The maximum number of tokens that can be used for a model context + window. +
+
+ To set the context window limit for a model, you can use the{" "} + docker run command with the{" "} + --context-window parameter. +
+
+ + docker model configure --context-size 8192 ai/qwen3:latest + +
+
+ + Learn more → + +
+
+ +
+
+ setMaxTokens(Number(e.target.value))} + onScroll={(e) => e.target.blur()} + required={true} + autoComplete="off" + /> +
+ +
+
+ ); +} + +function DockerModelRunnerModelSelection({ settings, basePath = null }) { + const [selectedModelId, setSelectedModelId] = useState( + settings?.DockerModelRunnerModelPref + ); + const [customModels, setCustomModels] = useState([]); + const [filteredModels, setFilteredModels] = useState([]); + const [loading, setLoading] = useState(true); + const [searchQuery, setSearchQuery] = useState(""); + + async function fetchModels() { + if (!basePath) { + setCustomModels([]); + setFilteredModels([]); + setLoading(false); + setSearchQuery(""); + return; + } + setLoading(true); + const { models } = await System.customModels( + "docker-model-runner", + null, + basePath + ); + setCustomModels(models || []); + setFilteredModels(models || []); + setSearchQuery(""); + setLoading(false); + } + + useEffect(() => { + fetchModels(); + }, [basePath]); + + useEffect(() => { + if (!searchQuery || !customModels.length) { + setFilteredModels(customModels || []); + return; + } + + const normalizedSearchQuery = searchQuery.toLowerCase().trim(); + const filteredModels = new Map(); + + customModels.forEach((model) => { + const modelNameNormalized = model.name.toLowerCase(); + const modelOrganizationNormalized = model.organization.toLowerCase(); + + if (modelNameNormalized.startsWith(normalizedSearchQuery)) + filteredModels.set(model.id, model); + if (modelOrganizationNormalized.startsWith(normalizedSearchQuery)) + filteredModels.set(model.id, model); + if (strDistance(modelNameNormalized, normalizedSearchQuery) <= 2) + filteredModels.set(model.id, model); + if (strDistance(modelOrganizationNormalized, normalizedSearchQuery) <= 2) + filteredModels.set(model.id, model); + }); + + setFilteredModels(Array.from(filteredModels.values())); + }, [searchQuery]); + + function downloadModel(modelId, _fileSize, progressCallback) { + const [name, tag] = modelId.split(":"); + + // Open the model in the Docker Hub (via browser since they may not be installed locally) + window.open(`https://hub.docker.com/layers/${name}/${tag}`, "_blank"); + progressCallback(100); + } + + function groupModelsByAlias(models) { + const mapping = new Map(); + mapping.set("installed", new Map()); + mapping.set("not installed", new Map()); + + const groupedModels = models.reduce((acc, model) => { + acc[model.organization] = acc[model.organization] || []; + acc[model.organization].push(model); + return acc; + }, {}); + + Object.entries(groupedModels).forEach(([organization, models]) => { + const hasInstalled = models.some((model) => model.downloaded); + if (hasInstalled) { + const installedModels = models.filter((model) => model.downloaded); + mapping + .get("installed") + .set("Downloaded Models", [ + ...(mapping.get("installed").get("Downloaded Models") || []), + ...installedModels, + ]); + } + const tags = models.map((model) => ({ + ...model, + name: model.name.split(":")[1], + })); + mapping.get("not installed").set(organization, tags); + }); + + const orderedMap = new Map(); + mapping + .get("installed") + .entries() + .forEach(([organization, models]) => + orderedMap.set(organization, models) + ); + mapping + .get("not installed") + .entries() + .forEach(([organization, models]) => + orderedMap.set(organization, models) + ); + return Object.fromEntries(orderedMap); + } + + function handleSetActiveModel(modelId) { + if (modelId === selectedModelId) return; + setSelectedModelId(modelId); + window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT)); + } + + const groupedModels = groupModelsByAlias(filteredModels); + return ( + + + {loading ? ( + + ) : filteredModels.length === 0 ? ( +
+

No models found!

+
+ ) : ( + Object.entries(groupedModels).map(([alias, models]) => ( + + )) + )} +
+ ); +} + +function Layout({ + children, + fetchModels = null, + searchQuery = "", + setSearchQuery = () => {}, + loading = false, +}) { + const [isRefreshing, setIsRefreshing] = useState(false); + async function refreshModels() { + setIsRefreshing(true); + try { + await fetchModels?.(); + } catch { + } finally { + setIsRefreshing(false); + } + } + + return ( +
+
+ +
+
+
+ + { + e.preventDefault(); + e.stopPropagation(); + setSearchQuery(e.target.value); + }} + /> +
+ {!!fetchModels && ( + + )} +
+ {children} +
+ ); +} + +function LoadingSkeleton() { + return ( +
+ +
+ ); +} diff --git a/frontend/src/components/LLMSelection/FoundryOptions/index.jsx b/frontend/src/components/LLMSelection/FoundryOptions/index.jsx index e03c62d69..7d5d380c7 100644 --- a/frontend/src/components/LLMSelection/FoundryOptions/index.jsx +++ b/frontend/src/components/LLMSelection/FoundryOptions/index.jsx @@ -92,7 +92,7 @@ export default function FoundryOptions({ settings }) {
, + description: "Run LLMs using Docker Model Runner.", + requiredConfig: [ + "DockerModelRunnerBasePath", + "DockerModelRunnerModelPref", + "DockerModelRunnerModelTokenLimit", + ], + }, { name: "Local AI", value: "localai", @@ -371,6 +385,7 @@ export const AVAILABLE_LLM_PROVIDERS = [ }, ]; +export const LLM_PREFERENCE_CHANGED_EVENT = "llm-preference-changed"; export default function GeneralLLMPreference() { const [saving, setSaving] = useState(false); const [hasChanges, setHasChanges] = useState(false); @@ -428,6 +443,21 @@ export default function GeneralLLMPreference() { fetchKeys(); }, []); + // Some more complex LLM options do not bubble up the change event, so we need to listen to the custom event + // we can emit from the LLM options component using window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT)); + useEffect(() => { + function updateHasChanges() { + setHasChanges(true); + } + window.addEventListener(LLM_PREFERENCE_CHANGED_EVENT, updateHasChanges); + return () => { + window.removeEventListener( + LLM_PREFERENCE_CHANGED_EVENT, + updateHasChanges + ); + }; + }, []); + useEffect(() => { const filtered = AVAILABLE_LLM_PROVIDERS.filter((llm) => llm.name.toLowerCase().includes(searchQuery.toLowerCase()) diff --git a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx index a0cf2ae8f..3f32dcbd6 100644 --- a/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx +++ b/frontend/src/pages/OnboardingFlow/Steps/LLMPreference/index.jsx @@ -31,6 +31,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png"; import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png"; import CometApiLogo from "@/media/llmprovider/cometapi.png"; import GiteeAILogo from "@/media/llmprovider/giteeai.png"; +import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png"; import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions"; import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions"; @@ -63,6 +64,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions"; import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions"; import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions"; import GiteeAiOptions from "@/components/LLMSelection/GiteeAIOptions"; +import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions"; import LLMItem from "@/components/LLMSelection/LLMItem"; import System from "@/models/system"; @@ -139,6 +141,13 @@ const LLMS = [ description: "Discover, download, and run thousands of cutting edge LLMs in a few clicks.", }, + { + name: "Docker Model Runner", + value: "docker-model-runner", + logo: DockerModelRunnerLogo, + options: (settings) => , + description: "Run LLMs using Docker Model Runner.", + }, { name: "Local AI", value: "localai", diff --git a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx index c96531e26..b2a5945b3 100644 --- a/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx +++ b/frontend/src/pages/WorkspaceSettings/AgentConfig/AgentLLMSelection/index.jsx @@ -37,6 +37,7 @@ const ENABLED_PROVIDERS = [ "zai", "giteeai", "cohere", + "docker-model-runner", // TODO: More agent support. // "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested. ]; @@ -46,6 +47,7 @@ const WARN_PERFORMANCE = [ "ollama", "localai", "textgenwebui", + "docker-model-runner", ]; const LLM_DEFAULT = { diff --git a/frontend/src/utils/constants.js b/frontend/src/utils/constants.js index c1fae8fc3..52f710834 100644 --- a/frontend/src/utils/constants.js +++ b/frontend/src/utils/constants.js @@ -53,6 +53,14 @@ export const NVIDIA_NIM_COMMON_URLS = [ "http://172.17.0.1:8000/v1/version", ]; +export const DOCKER_MODEL_RUNNER_COMMON_URLS = [ + "http://localhost:12434/engines/llama.cpp/v1", + "http://127.0.0.1:12434/engines/llama.cpp/v1", + "http://model-runner.docker.internal/engines/llama.cpp/v1", + "http://host.docker.internal:12434/engines/llama.cpp/v1", + "http://172.17.0.1:12434/engines/llama.cpp/v1", +]; + export function fullApiUrl() { if (API_BASE !== "/api") return API_BASE; return `${window.location.origin}/api`; diff --git a/locales/README.fa-IR.md b/locales/README.fa-IR.md index 342aed2dd..70fbf8855 100644 --- a/locales/README.fa-IR.md +++ b/locales/README.fa-IR.md @@ -105,6 +105,7 @@ AnythingLLM اسناد شما را به اشیایی به نام `workspaces` ت - [Z.AI (chat models)](https://z.ai/model-api) - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
diff --git a/locales/README.ja-JP.md b/locales/README.ja-JP.md index afb4e5aff..cd1798bda 100644 --- a/locales/README.ja-JP.md +++ b/locales/README.ja-JP.md @@ -94,6 +94,7 @@ AnythingLLMは、ドキュメントを`ワークスペース`と呼ばれるオ - [Z.AI (チャットモデル)](https://z.ai/model-api) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) - [CometAPI (チャットモデル)](https://api.cometapi.com/) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **埋め込みモデル:** diff --git a/locales/README.tr-TR.md b/locales/README.tr-TR.md index 1743db318..37f300180 100644 --- a/locales/README.tr-TR.md +++ b/locales/README.tr-TR.md @@ -102,6 +102,7 @@ AnythingLLM, belgelerinizi **"çalışma alanları" (workspaces)** adı verilen - [Z.AI (chat models)](https://z.ai/model-api) - [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO](https://ppinfra.com?utm_source=github_anything-llm) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **Embedder modelleri:** diff --git a/locales/README.zh-CN.md b/locales/README.zh-CN.md index c5408dc69..4c978a9a9 100644 --- a/locales/README.zh-CN.md +++ b/locales/README.zh-CN.md @@ -102,6 +102,7 @@ AnythingLLM将您的文档划分为称为`workspaces` (工作区)的对象。工 - [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link) - [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm) - [CometAPI (聊天模型)](https://api.cometapi.com/) +- [Docker Model Runner](https://docs.docker.com/ai/model-runner/) **支持的嵌入模型:** diff --git a/server/.env.example b/server/.env.example index 2f7b96dbf..b408b6fa0 100644 --- a/server/.env.example +++ b/server/.env.example @@ -161,6 +161,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long. # GITEE_AI_MODEL_PREF= # GITEE_AI_MODEL_TOKEN_LIMIT= +# LLM_PROVIDER='docker-model-runner' +# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434' +# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini' +# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096 + ########################################### ######## Embedding API SElECTION ########## ########################################### diff --git a/server/endpoints/utils.js b/server/endpoints/utils.js index 327b58f8d..30d2e9b7a 100644 --- a/server/endpoints/utils.js +++ b/server/endpoints/utils.js @@ -154,6 +154,9 @@ function getModelTag() { case "cohere": model = process.env.COHERE_MODEL_PREF; break; + case "docker-model-runner": + model = process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF; + break; default: model = "--"; break; diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 28e44ca66..53f7dd76d 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -646,6 +646,13 @@ const SystemSettings = { GiteeAIApiKey: !!process.env.GITEE_AI_API_KEY, GiteeAIModelPref: process.env.GITEE_AI_MODEL_PREF, GiteeAITokenLimit: process.env.GITEE_AI_MODEL_TOKEN_LIMIT || 8192, + + // Docker Model Runner Keys + DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH, + DockerModelRunnerModelPref: + process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF, + DockerModelRunnerModelTokenLimit: + process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT || 8192, }; }, diff --git a/server/storage/models/.gitignore b/server/storage/models/.gitignore index 6bda7b7a3..2e6b5c3e9 100644 --- a/server/storage/models/.gitignore +++ b/server/storage/models/.gitignore @@ -13,4 +13,5 @@ context-windows/* MintplexLabs cometapi fireworks -giteeai \ No newline at end of file +giteeai +docker-model-runner \ No newline at end of file diff --git a/server/utils/AiProviders/dockerModelRunner/index.js b/server/utils/AiProviders/dockerModelRunner/index.js new file mode 100644 index 000000000..81f1f36a8 --- /dev/null +++ b/server/utils/AiProviders/dockerModelRunner/index.js @@ -0,0 +1,434 @@ +const fs = require("fs"); +const path = require("path"); +const { NativeEmbedder } = require("../../EmbeddingEngines/native"); +const { + handleDefaultStreamResponseV2, + formatChatHistory, +} = require("../../helpers/chat/responses"); +const { + LLMPerformanceMonitor, +} = require("../../helpers/chat/LLMPerformanceMonitor"); +const { OpenAI: OpenAIApi } = require("openai"); +const { humanFileSize } = require("../../helpers"); +const { safeJsonParse } = require("../../http"); + +class DockerModelRunnerLLM { + static cacheTime = 1000 * 60 * 60 * 24; // 24 hours + static cacheFolder = path.resolve( + process.env.STORAGE_DIR + ? path.resolve(process.env.STORAGE_DIR, "models", "docker-model-runner") + : path.resolve(__dirname, `../../../storage/models/docker-model-runner`) + ); + + constructor(embedder = null, modelPreference = null) { + if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH) + throw new Error("No Docker Model Runner API Base Path was set."); + if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF) + throw new Error("No Docker Model Runner Model Pref was set."); + + this.dmr = new OpenAIApi({ + baseURL: parseDockerModelRunnerEndpoint( + process.env.DOCKER_MODEL_RUNNER_BASE_PATH + ), + apiKey: null, + }); + + this.model = + modelPreference || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF; + this.embedder = embedder ?? new NativeEmbedder(); + this.defaultTemp = 0.7; + + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + + this.#log(`initialized with model: ${this.model}`); + } + + #log(text, ...args) { + console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args); + } + + static slog(text, ...args) { + console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args); + } + + async assertModelContextLimits() { + if (this.limits !== null) return; + this.limits = { + history: this.promptWindowLimit() * 0.15, + system: this.promptWindowLimit() * 0.15, + user: this.promptWindowLimit() * 0.7, + }; + } + + #appendContext(contextTexts = []) { + if (!contextTexts || !contextTexts.length) return ""; + return ( + "\nContext:\n" + + contextTexts + .map((text, i) => { + return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`; + }) + .join("") + ); + } + + streamingEnabled() { + return "streamGetChatCompletion" in this; + } + + /** DMR does not support curling the context window limit from the API, so we return the system defined limit. */ + static promptWindowLimit(_) { + const systemDefinedLimit = + Number(process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT) || 8192; + return systemDefinedLimit; + } + + promptWindowLimit() { + return this.constructor.promptWindowLimit(this.model); + } + + async isValidChatCompletionModel(_ = "") { + return true; + } + + /** + * Generates appropriate content array for a message + attachments. + * @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}} + * @returns {string|object[]} + */ + #generateContent({ userPrompt, attachments = [] }) { + if (!attachments.length) { + return userPrompt; + } + + const content = [{ type: "text", text: userPrompt }]; + for (let attachment of attachments) { + content.push({ + type: "image_url", + image_url: { + url: attachment.contentString, + detail: "auto", + }, + }); + } + return content.flat(); + } + + /** + * Construct the user prompt for this model. + * @param {{attachments: import("../../helpers").Attachment[]}} param0 + * @returns + */ + constructPrompt({ + systemPrompt = "", + contextTexts = [], + chatHistory = [], + userPrompt = "", + attachments = [], + }) { + const prompt = { + role: "system", + content: `${systemPrompt}${this.#appendContext(contextTexts)}`, + }; + return [ + prompt, + ...formatChatHistory(chatHistory, this.#generateContent), + { + role: "user", + content: this.#generateContent({ userPrompt, attachments }), + }, + ]; + } + + async getChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const result = await LLMPerformanceMonitor.measureAsyncFunction( + this.dmr.chat.completions.create({ + model: this.model, + messages, + temperature, + }) + ); + + if ( + !result.output.hasOwnProperty("choices") || + result.output.choices.length === 0 + ) + return null; + + return { + textResponse: result.output.choices[0].message.content, + metrics: { + prompt_tokens: result.output.usage?.prompt_tokens || 0, + completion_tokens: result.output.usage?.completion_tokens || 0, + total_tokens: result.output.usage?.total_tokens || 0, + outputTps: result.output.usage?.completion_tokens / result.duration, + duration: result.duration, + model: this.model, + timestamp: new Date(), + }, + }; + } + + async streamGetChatCompletion(messages = null, { temperature = 0.7 }) { + if (!this.model) + throw new Error( + `Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!` + ); + + const measuredStreamRequest = await LLMPerformanceMonitor.measureStream({ + func: this.dmr.chat.completions.create({ + model: this.model, + stream: true, + messages, + temperature, + }), + messages, + runPromptTokenCalculation: true, + modelTag: this.model, + }); + return measuredStreamRequest; + } + + handleStream(response, stream, responseProps) { + return handleDefaultStreamResponseV2(response, stream, responseProps); + } + + // Simple wrapper for dynamic embedder & normalize interface for all LLM implementations + async embedTextInput(textInput) { + return await this.embedder.embedTextInput(textInput); + } + async embedChunks(textChunks = []) { + return await this.embedder.embedChunks(textChunks); + } + + async compressMessages(promptArgs = {}, rawHistory = []) { + await this.assertModelContextLimits(); + const { messageArrayCompressor } = require("../../helpers/chat"); + const messageArray = this.constructPrompt(promptArgs); + return await messageArrayCompressor(this, messageArray, rawHistory); + } +} + +/** + * Parse the base path of the Docker Model Runner endpoint and return the host and port. + * @param {string} basePath - The base path of the Docker Model Runner endpoint. + * @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible) + * @returns {string | null} + */ +function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") { + if (!basePath) return null; + try { + const url = new URL(basePath); + if (to === "openai") url.pathname = "engines/v1"; + else if (to === "dmr") url.pathname = ""; + return url.toString(); + } catch (e) { + return basePath; + } +} + +/** + * @typedef {Object} DockerRunnerInstalledModel + * @property {string} id - The SHA256 identifier of the model layer/blob. + * @property {string[]} tags - List of tags or aliases associated with this model (e.g., "ai/qwen3:4B-UD-Q4_K_XL"). + * @property {number} created - The Unix timestamp (seconds) when the model was created. + * @property {string} config - The configuration of the model. + * @property {string} config.format - The file format (e.g., "gguf"). + * @property {string} config.quantization - The quantization level (e.g., "MOSTLY_Q4_K_M", "Q4_0"). + * @property {string} config.parameters - The parameter count formatted as a string (e.g., "4.02 B"). + * @property {string} config.architecture - The base architecture of the model (e.g., "qwen3", "llama"). + * @property {string} config.size - The physical file size formatted as a string (e.g., "2.37 GiB"). + * @property {string} config?.gguf - Raw GGUF metadata headers containing tokenizer, architecture details, and licensing. + * @property {string} config?.gguf['general.base_model.0.organization'] - The tokenizer of the model. + * @property {string} config?.gguf['general.basename'] - The base name of the model (the real name of the model, not the tag) + * @property {string} config?.gguf['*.context_length'] - The context length of the model. will be something like qwen3.context_length + */ + +/** + * Fetch the remote models from the Docker Hub and cache the results. + * @returns {Promise>} + */ +async function fetchRemoteModels() { + const cachePath = path.resolve( + DockerModelRunnerLLM.cacheFolder, + "models.json" + ); + const cachedAtPath = path.resolve( + DockerModelRunnerLLM.cacheFolder, + ".cached_at" + ); + let cacheTime = 0; + + if (fs.existsSync(cachePath) && fs.existsSync(cachedAtPath)) { + cacheTime = Number(fs.readFileSync(cachedAtPath, "utf8")); + if (Date.now() - cacheTime < DockerModelRunnerLLM.cacheTime) + return safeJsonParse(fs.readFileSync(cachePath, "utf8")); + } + + DockerModelRunnerLLM.slog(`Refreshing remote models from Docker Hub`); + // Now hit the Docker Hub API to get the remote model namespace and root tags + const availableNamespaces = []; // array of strings like ai/mistral, ai/qwen3, etc + let nextPage = + "https://hub.docker.com/v2/namespaces/ai/repositories?page_size=100&page=1"; + while (nextPage) { + const response = await fetch(nextPage) + .then((res) => res.json()) + .then((data) => { + const namespaces = data.results + .filter( + (result) => + result.namespace && + result.name && + result.content_types.includes("model") && + result.namespace === "ai" + ) + .map((result) => result.namespace + "/" + result.name); + availableNamespaces.push(...namespaces); + }) + .catch((e) => { + DockerModelRunnerLLM.slog( + `Error fetching remote models from Docker Hub`, + e + ); + return []; + }); + if (!response) break; + if (!response || !response.next) break; + nextPage = response.next; + } + + const availableRemoteModels = {}; + const BATCH_SIZE = 10; + + // Run batch requests to avoid rate limiting but also + // improve the speed of the total request time. + for (let i = 0; i < availableNamespaces.length; i += BATCH_SIZE) { + const batch = availableNamespaces.slice(i, i + BATCH_SIZE); + DockerModelRunnerLLM.slog( + `Fetching tags for batch ${Math.floor(i / BATCH_SIZE) + 1} of ${Math.ceil(availableNamespaces.length / BATCH_SIZE)}` + ); + + await Promise.all( + batch.map(async (namespace) => { + const [organization, model] = namespace.split("/"); + const namespaceUrl = new URL( + "https://hub.docker.com/v2/namespaces/ai/repositories/" + + model + + "/tags" + ); + + DockerModelRunnerLLM.slog( + `Fetching tags for ${namespaceUrl.toString()}` + ); + await fetch(namespaceUrl.toString()) + .then((res) => res.json()) + .then((data) => { + const tags = data.results.map((result) => { + return { + id: `${organization}/${model}:${result.name}`, + name: `${model}:${result.name}`, + size: humanFileSize(result.full_size), + organization: model, + }; + }); + availableRemoteModels[model] = tags; + }) + .catch((e) => { + DockerModelRunnerLLM.slog( + `Error fetching tags for ${namespaceUrl.toString()}`, + e + ); + }); + }) + ); + } + + if (Object.keys(availableRemoteModels).length === 0) { + DockerModelRunnerLLM.slog( + `No remote models found - API may be down or not available` + ); + return {}; + } + + if (!fs.existsSync(DockerModelRunnerLLM.cacheFolder)) + fs.mkdirSync(DockerModelRunnerLLM.cacheFolder, { recursive: true }); + fs.writeFileSync(cachePath, JSON.stringify(availableRemoteModels), { + encoding: "utf8", + }); + fs.writeFileSync(cachedAtPath, String(Number(new Date())), { + encoding: "utf8", + }); + return availableRemoteModels; +} + +/** + * This function will fetch the remote models from the Docker Hub as well + * as the local models installed on the system. + * @param {string} basePath - The base path of the Docker Model Runner endpoint. + */ +async function getDockerModels(basePath = null) { + let availableModels = {}; + /** @type {Array} */ + let installedModels = {}; + + try { + // Grab the locally installed models from the Docker Model Runner API + const dmrUrl = new URL( + parseDockerModelRunnerEndpoint( + basePath ?? process.env.DOCKER_MODEL_RUNNER_BASE_PATH, + "dmr" + ) + ); + dmrUrl.pathname = "/models"; + + await fetch(dmrUrl.toString()) + .then((res) => res.json()) + .then((data) => { + data?.map((model) => { + const id = model.tags.at(0); + // eg: ai/qwen3:latest -> qwen3 + const tag = + id?.split("/").pop()?.split(":")?.at(1) ?? + id?.split(":").at(1) ?? + "latest"; + const organization = id?.split("/").pop()?.split(":")?.at(0) ?? id; + installedModels[id] = { + id: id, + name: `${organization}:${tag}`, + size: model.config?.size ?? "Unknown size", + organization: organization, + }; + }); + }); + + // Now hit the Docker Hub API to get the remote model namespace and root tags + const remoteModels = await fetchRemoteModels(); + for (const [modelName, tags] of Object.entries(remoteModels)) { + availableModels[modelName] = { tags: [] }; + for (const tag of tags) { + if (!installedModels[tag.id]) + availableModels[modelName].tags.push({ ...tag, downloaded: false }); + else availableModels[modelName].tags.push({ ...tag, downloaded: true }); + } + } + } catch (e) { + DockerModelRunnerLLM.slog(`Error getting Docker models`, e); + } finally { + return Object.values(availableModels).flatMap((m) => m.tags); + } +} + +module.exports = { + DockerModelRunnerLLM, + parseDockerModelRunnerEndpoint, + getDockerModels, +}; diff --git a/server/utils/agents/aibitat/index.js b/server/utils/agents/aibitat/index.js index cc15c123d..0a2f6f450 100644 --- a/server/utils/agents/aibitat/index.js +++ b/server/utils/agents/aibitat/index.js @@ -992,6 +992,8 @@ ${this.getHistory({ to: route.to }) return new Providers.GiteeAIProvider({ model: config.model }); case "cohere": return new Providers.CohereProvider({ model: config.model }); + case "docker-model-runner": + return new Providers.DockerModelRunnerProvider({ model: config.model }); default: throw new Error( `Unknown provider: ${config.provider}. Please use a valid provider.` diff --git a/server/utils/agents/aibitat/providers/ai-provider.js b/server/utils/agents/aibitat/providers/ai-provider.js index 3752b161a..9d00c9b37 100644 --- a/server/utils/agents/aibitat/providers/ai-provider.js +++ b/server/utils/agents/aibitat/providers/ai-provider.js @@ -18,6 +18,9 @@ const { ChatOllama } = require("@langchain/community/chat_models/ollama"); const { toValidNumber, safeJsonParse } = require("../../../http"); const { getLLMProviderClass } = require("../../../helpers"); const { parseLMStudioBasePath } = require("../../../AiProviders/lmStudio"); +const { + parseDockerModelRunnerEndpoint, +} = require("../../../AiProviders/dockerModelRunner"); const { parseFoundryBasePath } = require("../../../AiProviders/foundry"); const { SystemPromptVariables, @@ -313,6 +316,16 @@ class Provider { ...config, }); } + case "docker-model-runner": + return new ChatOpenAI({ + configuration: { + baseURL: parseDockerModelRunnerEndpoint( + process.env.DOCKER_MODEL_RUNNER_BASE_PATH + ), + }, + apiKey: null, + ...config, + }); default: throw new Error(`Unsupported provider ${provider} for this task.`); } diff --git a/server/utils/agents/aibitat/providers/dockerModelRunner.js b/server/utils/agents/aibitat/providers/dockerModelRunner.js new file mode 100644 index 000000000..e00f8bc50 --- /dev/null +++ b/server/utils/agents/aibitat/providers/dockerModelRunner.js @@ -0,0 +1,101 @@ +const OpenAI = require("openai"); +const Provider = require("./ai-provider.js"); +const InheritMultiple = require("./helpers/classes.js"); +const UnTooled = require("./helpers/untooled.js"); +const { + parseDockerModelRunnerEndpoint, +} = require("../../../AiProviders/dockerModelRunner/index.js"); + +/** + * The agent provider for the Docker Model Runner. + */ +class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) { + model; + + /** + * + * @param {{model?: string}} config + */ + constructor(config = {}) { + super(); + const model = + config?.model || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF || null; + const client = new OpenAI({ + baseURL: parseDockerModelRunnerEndpoint( + process.env.DOCKER_MODEL_RUNNER_BASE_PATH + ), + apiKey: null, + maxRetries: 3, + }); + + this._client = client; + this.model = model; + this.verbose = true; + } + + get client() { + return this._client; + } + + get supportsAgentStreaming() { + return true; + } + + async #handleFunctionCallChat({ messages = [] }) { + return await this.client.chat.completions + .create({ + model: this.model, + messages, + }) + .then((result) => { + if (!result.hasOwnProperty("choices")) + throw new Error("Docker Model Runner chat: No results!"); + if (result.choices.length === 0) + throw new Error("Docker Model Runner chat: No results length!"); + return result.choices[0].message.content; + }) + .catch((_) => { + return null; + }); + } + + async #handleFunctionCallStream({ messages = [] }) { + return await this.client.chat.completions.create({ + model: this.model, + stream: true, + messages, + }); + } + + async stream(messages, functions = [], eventHandler = null) { + return await UnTooled.prototype.stream.call( + this, + messages, + functions, + this.#handleFunctionCallStream.bind(this), + eventHandler + ); + } + + async complete(messages, functions = []) { + return await UnTooled.prototype.complete.call( + this, + messages, + functions, + this.#handleFunctionCallChat.bind(this) + ); + } + + /** + * Get the cost of the completion. + * + * @param _usage The completion to get the cost for. + * @returns The cost of the completion. + * Stubbed since Docker Model Runner has no cost basis. + */ + getCost(_usage) { + return 0; + } +} + +module.exports = DockerModelRunnerProvider; diff --git a/server/utils/agents/aibitat/providers/index.js b/server/utils/agents/aibitat/providers/index.js index e4a11995a..c53c01c38 100644 --- a/server/utils/agents/aibitat/providers/index.js +++ b/server/utils/agents/aibitat/providers/index.js @@ -29,6 +29,7 @@ const CometApiProvider = require("./cometapi.js"); const FoundryProvider = require("./foundry.js"); const GiteeAIProvider = require("./giteeai.js"); const CohereProvider = require("./cohere.js"); +const DockerModelRunnerProvider = require("./dockerModelRunner.js"); module.exports = { OpenAIProvider, @@ -62,4 +63,5 @@ module.exports = { FoundryProvider, GiteeAIProvider, CohereProvider, + DockerModelRunnerProvider, }; diff --git a/server/utils/agents/index.js b/server/utils/agents/index.js index 038496cc9..24c496b61 100644 --- a/server/utils/agents/index.js +++ b/server/utils/agents/index.js @@ -217,6 +217,12 @@ class AgentHandler { if (!process.env.COHERE_API_KEY) throw new Error("Cohere API key must be provided to use agents."); break; + case "docker-model-runner": + if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH) + throw new Error( + "Docker Model Runner base path must be provided to use agents." + ); + break; default: throw new Error( "No workspace agent provider set. Please set your agent provider in the workspace's settings" @@ -297,6 +303,8 @@ class AgentHandler { return process.env.GITEE_AI_MODEL_PREF ?? null; case "cohere": return process.env.COHERE_MODEL_PREF ?? "command-r-08-2024"; + case "docker-model-runner": + return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF ?? null; default: return null; } diff --git a/server/utils/helpers/customModels.js b/server/utils/helpers/customModels.js index e7e094a16..649d2e566 100644 --- a/server/utils/helpers/customModels.js +++ b/server/utils/helpers/customModels.js @@ -13,6 +13,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio"); const { GeminiLLM } = require("../AiProviders/gemini"); const { fetchCometApiModels } = require("../AiProviders/cometapi"); const { parseFoundryBasePath } = require("../AiProviders/foundry"); +const { getDockerModels } = require("../AiProviders/dockerModelRunner"); const SUPPORT_CUSTOM_MODELS = [ "openai", @@ -43,6 +44,7 @@ const SUPPORT_CUSTOM_MODELS = [ "cohere", "zai", "giteeai", + "docker-model-runner", // Embedding Engines "native-embedder", "cohere-embedder", @@ -116,6 +118,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) { return await getOpenRouterEmbeddingModels(); case "giteeai": return await getGiteeAIModels(apiKey); + case "docker-model-runner": + return await getDockerModelRunnerModels(basePath); default: return { models: [], error: "Invalid provider for custom models" }; } @@ -864,6 +868,19 @@ async function getOpenRouterEmbeddingModels() { return { models, error: null }; } +async function getDockerModelRunnerModels(basePath = null) { + try { + const models = await getDockerModels(basePath); + return { models, error: null }; + } catch (e) { + console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message); + return { + models: [], + error: "Could not fetch Docker Model Runner Models", + }; + } +} + module.exports = { getCustomModels, SUPPORT_CUSTOM_MODELS, diff --git a/server/utils/helpers/index.js b/server/utils/helpers/index.js index 9b3520155..d508f7eea 100644 --- a/server/utils/helpers/index.js +++ b/server/utils/helpers/index.js @@ -229,6 +229,11 @@ function getLLMProvider({ provider = null, model = null } = {}) { case "giteeai": const { GiteeAILLM } = require("../AiProviders/giteeai"); return new GiteeAILLM(embedder, model); + case "docker-model-runner": + const { + DockerModelRunnerLLM, + } = require("../AiProviders/dockerModelRunner"); + return new DockerModelRunnerLLM(embedder, model); default: throw new Error( `ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}` @@ -394,6 +399,11 @@ function getLLMProviderClass({ provider = null } = {}) { case "giteeai": const { GiteeAILLM } = require("../AiProviders/giteeai"); return GiteeAILLM; + case "docker-model-runner": + const { + DockerModelRunnerLLM, + } = require("../AiProviders/dockerModelRunner"); + return DockerModelRunnerLLM; default: return null; } @@ -470,6 +480,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) { return process.env.ZAI_MODEL_PREF; case "giteeai": return process.env.GITEE_AI_MODEL_PREF; + case "docker-model-runner": + return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF; default: return null; } @@ -495,6 +507,30 @@ function toChunks(arr, size) { ); } +function humanFileSize(bytes, si = false, dp = 1) { + const thresh = si ? 1000 : 1024; + + if (Math.abs(bytes) < thresh) { + return bytes + " B"; + } + + const units = si + ? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"] + : ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"]; + let u = -1; + const r = 10 ** dp; + + do { + bytes /= thresh; + ++u; + } while ( + Math.round(Math.abs(bytes) * r) / r >= thresh && + u < units.length - 1 + ); + + return bytes.toFixed(dp) + " " + units[u]; +} + module.exports = { getEmbeddingEngineSelection, maximumChunkLength, @@ -503,4 +539,5 @@ module.exports = { getBaseLLMProviderModel, getLLMProvider, toChunks, + humanFileSize, }; diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 6603d9a77..256bba377 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -789,6 +789,20 @@ const KEY_MAPPING = { envKey: "GITEE_AI_MODEL_TOKEN_LIMIT", checks: [nonZero], }, + + // Docker Model Runner Options + DockerModelRunnerBasePath: { + envKey: "DOCKER_MODEL_RUNNER_BASE_PATH", + checks: [isValidURL], + }, + DockerModelRunnerModelPref: { + envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_PREF", + checks: [isNotEmpty], + }, + DockerModelRunnerModelTokenLimit: { + envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT", + checks: [nonZero], + }, }; function isNotEmpty(input = "") { @@ -902,6 +916,7 @@ function supportedLLM(input = "") { "foundry", "zai", "giteeai", + "docker-model-runner", ].includes(input); return validSelection ? null : `${input} is not a valid LLM provider.`; }