mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
Improved DMR support (#4863)
* Improve DMR support - Autodetect models installed - Grab all models from hub.docker to show available - UI to handle render,search, install, and management of models - Support functionality for chat, stream, and agentic calls * forgot files * fix loader circle being too large fix tooltip width command adjust location of docker installer open for web platform * adjust imports
This commit is contained in:
2
.github/workflows/dev-build.yaml
vendored
2
.github/workflows/dev-build.yaml
vendored
@@ -6,7 +6,7 @@ concurrency:
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: ['4822-feat-remove-workspace-creation-onboarding-page'] # put your current branch to create a build. Core team only.
|
||||
branches: ['4391-dmr-support'] # put your current branch to create a build. Core team only.
|
||||
paths-ignore:
|
||||
- '**.md'
|
||||
- 'cloud-deployments/*'
|
||||
|
||||
@@ -106,6 +106,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace
|
||||
- [Moonshot AI](https://www.moonshot.ai/)
|
||||
- [Microsoft Foundry Local](https://github.com/microsoft/Foundry-Local)
|
||||
- [CometAPI (chat models)](https://api.cometapi.com/)
|
||||
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
|
||||
|
||||
**Embedder models:**
|
||||
|
||||
|
||||
@@ -162,6 +162,11 @@ GID='1000'
|
||||
# GITEE_AI_MODEL_PREF=
|
||||
# GITEE_AI_MODEL_TOKEN_LIMIT=
|
||||
|
||||
# LLM_PROVIDER='docker-model-runner'
|
||||
# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
|
||||
# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini'
|
||||
# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096
|
||||
|
||||
###########################################
|
||||
######## Embedding API SElECTION ##########
|
||||
###########################################
|
||||
|
||||
@@ -33,7 +33,7 @@ export default function DellProAIStudioOptions({
|
||||
/>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
Token context window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -0,0 +1,314 @@
|
||||
import { useRef, useState, useEffect } from "react";
|
||||
import {
|
||||
CaretDown,
|
||||
CaretRight,
|
||||
Cpu,
|
||||
DownloadSimple,
|
||||
CircleNotch,
|
||||
CheckCircle,
|
||||
Dot,
|
||||
Circle,
|
||||
DotsThreeCircleVertical,
|
||||
DotsThreeVertical,
|
||||
CloudArrowDown,
|
||||
} from "@phosphor-icons/react";
|
||||
import pluralize from "pluralize";
|
||||
import { titleCase } from "text-case";
|
||||
import { humanFileSize } from "@/utils/numbers";
|
||||
|
||||
/**
|
||||
* @typedef {Object} ModelDefinition
|
||||
* @property {string} id - The ID of the model.
|
||||
* @property {'CPU' | 'GPU' | 'NPU'} deviceType - The device type of the model.
|
||||
* @property {number} modelSize - The size of the model in megabytes.
|
||||
* @property {boolean} downloaded - Whether the model is downloaded.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {object} props - The props of the component.
|
||||
* @param {string} props.alias - The alias of the model.
|
||||
* @param {Array<ModelDefinition>} props.models - The models to display.
|
||||
* @param {(model: string, progressCallback: (percentage: number) => void) => void} props.downloadModel - The function to download the model.
|
||||
* @param {(model: string) => void} props.uninstallModel - The function to uninstall the model.
|
||||
* @param {(model: string) => void} props.setActiveModel - The function to set the active model.
|
||||
* @param {string} props.selectedModelId - The ID of the selected model.
|
||||
* @param {object} props.ui - The UI configuration.
|
||||
* @param {boolean} props.ui.showRuntime - Whether to show the runtime.
|
||||
* @returns {React.ReactNode}
|
||||
*/
|
||||
export default function ModelTable({
|
||||
alias = "",
|
||||
models = [],
|
||||
downloadModel = null,
|
||||
uninstallModel = null,
|
||||
setActiveModel = () => {},
|
||||
selectedModelId = "",
|
||||
ui = {
|
||||
showRuntime: true,
|
||||
},
|
||||
}) {
|
||||
const [showAll, setShowAll] = useState(
|
||||
models.some((model) => model.downloaded)
|
||||
);
|
||||
const totalModels = models.length;
|
||||
|
||||
return (
|
||||
<div className="flex flex-col w-full border-b border-theme-modal-border py-[18px]">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowAll(!showAll)}
|
||||
className="border-none text-theme-text-secondary text-sm font-medium hover:underline flex items-center gap-x-[8px]"
|
||||
>
|
||||
{showAll ? (
|
||||
<CaretDown
|
||||
size={16}
|
||||
weight="bold"
|
||||
className="text-theme-text-secondary"
|
||||
/>
|
||||
) : (
|
||||
<CaretRight
|
||||
size={16}
|
||||
weight="bold"
|
||||
className="text-theme-text-secondary"
|
||||
/>
|
||||
)}
|
||||
<h3 className="flex items-center gap-x-1 text-theme-text-primary text-base font-bold">
|
||||
{titleCase(alias)}
|
||||
<span className="text-theme-text-secondary font-normal text-sm">
|
||||
({totalModels} {pluralize("Model", totalModels)})
|
||||
</span>
|
||||
</h3>
|
||||
</button>
|
||||
<div hidden={!showAll} className="mt-[16px]">
|
||||
<div className="w-full flex flex-col gap-y-[8px]">
|
||||
{models.map((model) => (
|
||||
<ModelRow
|
||||
key={model.id}
|
||||
model={model}
|
||||
downloadModel={downloadModel}
|
||||
uninstallModel={uninstallModel}
|
||||
setActiveModel={setActiveModel}
|
||||
selectedModelId={selectedModelId}
|
||||
ui={ui}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{deviceType: ModelDefinition["deviceType"]}} deviceType
|
||||
* @returns {React.ReactNode}
|
||||
*/
|
||||
function DeviceTypeTag({ deviceType }) {
|
||||
const Wrapper = ({ text, bgClass, textClass }) => {
|
||||
return (
|
||||
<div
|
||||
className={
|
||||
bgClass + " px-1.5 py-1 rounded-full flex items-center gap-x-1 w-fit"
|
||||
}
|
||||
>
|
||||
<Cpu size={16} weight="bold" className={textClass} />
|
||||
<p className={textClass + " text-xs"}>{text}</p>
|
||||
</div>
|
||||
);
|
||||
};
|
||||
|
||||
switch (deviceType?.toLowerCase()) {
|
||||
case "cpu":
|
||||
return (
|
||||
<Wrapper
|
||||
text="CPU"
|
||||
bgClass="bg-blue-600/20"
|
||||
textClass="text-blue-300"
|
||||
/>
|
||||
);
|
||||
case "gpu":
|
||||
return (
|
||||
<Wrapper
|
||||
text="GPU"
|
||||
bgClass="bg-green-600/20"
|
||||
textClass="text-green-300"
|
||||
/>
|
||||
);
|
||||
case "npu":
|
||||
return (
|
||||
<Wrapper
|
||||
text="NPU"
|
||||
bgClass="bg-indigo-600/20"
|
||||
textClass="text-indigo-300"
|
||||
/>
|
||||
);
|
||||
default:
|
||||
return (
|
||||
<Wrapper
|
||||
text="CPU"
|
||||
bgClass="bg-blue-600/20"
|
||||
textClass="text-blue-300"
|
||||
/>
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {object} props - The props of the component.
|
||||
* @param {ModelDefinition} props.model - The model to display.
|
||||
* @param {(model: string, progressCallback: (percentage: number) => void) => Promise<void>} props.downloadModel - The function to download the model.
|
||||
* @param {(model: string) => Promise<void>} props.uninstallModel - The function to uninstall the model.
|
||||
* @param {(model: string) => void} props.setActiveModel - The function to set the active model.
|
||||
* @param {string} props.selectedModelId - The ID of the selected model.
|
||||
* @param {object} props.ui - The UI configuration.
|
||||
* @param {boolean} props.ui.showRuntime - Whether to show the runtime.
|
||||
* @returns {React.ReactNode}
|
||||
*/
|
||||
function ModelRow({
|
||||
model,
|
||||
downloadModel = null,
|
||||
uninstallModel = null,
|
||||
setActiveModel,
|
||||
selectedModelId,
|
||||
ui = {
|
||||
showRuntime: true,
|
||||
},
|
||||
}) {
|
||||
const modelRowRef = useRef(null);
|
||||
const [showOptions, setShowOptions] = useState(false);
|
||||
const [processing, setProcessing] = useState(false);
|
||||
const [_downloadPercentage, setDownloadPercentage] = useState(0);
|
||||
const fileSize =
|
||||
typeof model.size === "number"
|
||||
? humanFileSize(model.size * 1e6, true, 2)
|
||||
: (model.size ?? "Unknown size");
|
||||
const [isActiveModel, setIsActiveModel] = useState(
|
||||
selectedModelId === model.id
|
||||
);
|
||||
|
||||
async function handleSetActiveModel() {
|
||||
setDownloadPercentage(0);
|
||||
if (model.downloaded) setActiveModel(model.id);
|
||||
else {
|
||||
try {
|
||||
if (!downloadModel) return;
|
||||
setProcessing(true);
|
||||
await downloadModel(model.id, fileSize, (percentage) =>
|
||||
setDownloadPercentage(percentage)
|
||||
);
|
||||
} catch {
|
||||
} finally {
|
||||
setProcessing(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function handleUninstallModel() {
|
||||
if (!uninstallModel) return;
|
||||
try {
|
||||
setProcessing(true);
|
||||
await uninstallModel(model.id);
|
||||
} catch {
|
||||
} finally {
|
||||
setProcessing(false);
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
if (selectedModelId === model.id) {
|
||||
setIsActiveModel(true);
|
||||
modelRowRef.current.classList.add("!bg-gray-200/10");
|
||||
setTimeout(
|
||||
() => modelRowRef.current.classList.remove("!bg-gray-200/10"),
|
||||
800
|
||||
);
|
||||
} else {
|
||||
setIsActiveModel(false);
|
||||
}
|
||||
}, [selectedModelId]);
|
||||
|
||||
return (
|
||||
<div
|
||||
ref={modelRowRef}
|
||||
className="w-full grid grid-cols-[1fr_auto_1fr] items-center gap-x-4 transition-all duration-300 rounded-lg"
|
||||
>
|
||||
<button
|
||||
type="button"
|
||||
className="border-none flex items-center gap-x-[8px] whitespace-nowrap py-[8px]"
|
||||
disabled={processing}
|
||||
onClick={handleSetActiveModel}
|
||||
>
|
||||
{ui.showRuntime && <DeviceTypeTag deviceType={model.deviceType} />}
|
||||
<p className="text-theme-text-primary text-base px-2">{model.name}</p>
|
||||
<p className="text-theme-text-secondary opacity-70 text-base">
|
||||
{fileSize}
|
||||
</p>
|
||||
</button>
|
||||
|
||||
<div className="justify-self-start">
|
||||
{isActiveModel && (
|
||||
<div className="flex items-center justify-center gap-x-[10px] whitespace-nowrap">
|
||||
<Circle size={8} weight="fill" className="text-green-500" />
|
||||
<p className="text-theme-text-primary text-sm">Active</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{!isActiveModel && model.downloaded && !uninstallModel && (
|
||||
<p className="text-theme-text-secondary text-sm italic whitespace-nowrap">
|
||||
Installed
|
||||
</p>
|
||||
)}
|
||||
|
||||
{!model.downloaded && (
|
||||
<p className="text-theme-text-secondary text-sm italic whitespace-nowrap">
|
||||
Not Installed
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<div className="relative justify-self-end">
|
||||
{uninstallModel && model.downloaded ? (
|
||||
<>
|
||||
<button
|
||||
type="button"
|
||||
className="border-none hover:bg-white/20 rounded-lg p-1"
|
||||
onClick={() => setShowOptions(!showOptions)}
|
||||
>
|
||||
<DotsThreeVertical
|
||||
size={22}
|
||||
weight="bold"
|
||||
className="text-theme-text-primary cursor-pointer"
|
||||
/>
|
||||
</button>
|
||||
{showOptions && (
|
||||
<div className="absolute top-[20px] right-[20px] bg-theme-action-menu-bg border border-theme-modal-border rounded-lg py-2 px-4 shadow-lg">
|
||||
<button
|
||||
type="button"
|
||||
className="border-none font-medium group"
|
||||
onClick={handleUninstallModel}
|
||||
>
|
||||
<p className="text-sm text-theme-text-primary group-hover:underline group-hover:text-theme-text-secondary">
|
||||
Uninstall
|
||||
</p>
|
||||
</button>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
) : null}
|
||||
{!model.downloaded ? (
|
||||
<button
|
||||
type="button"
|
||||
className="border-none hover:bg-white/20 rounded-lg p-1 flex items-center gap-x-1"
|
||||
onClick={handleSetActiveModel}
|
||||
>
|
||||
<CloudArrowDown
|
||||
size={16}
|
||||
weight="bold"
|
||||
className="text-blue-300 cursor-pointer"
|
||||
/>
|
||||
<p className="text-sm text-blue-300">Install</p>
|
||||
</button>
|
||||
) : null}
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -0,0 +1,426 @@
|
||||
import { useState, useEffect } from "react";
|
||||
import System from "@/models/system";
|
||||
import useProviderEndpointAutoDiscovery from "@/hooks/useProviderEndpointAutoDiscovery";
|
||||
import {
|
||||
ArrowClockwise,
|
||||
CircleNotch,
|
||||
MagnifyingGlass,
|
||||
Info,
|
||||
} from "@phosphor-icons/react";
|
||||
import strDistance from "js-levenshtein";
|
||||
import { LLM_PREFERENCE_CHANGED_EVENT } from "@/pages/GeneralSettings/LLMPreference";
|
||||
import { DOCKER_MODEL_RUNNER_COMMON_URLS } from "@/utils/constants";
|
||||
import { Tooltip } from "react-tooltip";
|
||||
import { Link } from "react-router-dom";
|
||||
import ModelTable from "./ModelTable";
|
||||
import * as Skeleton from "react-loading-skeleton";
|
||||
import "react-loading-skeleton/dist/skeleton.css";
|
||||
|
||||
export default function DockerModelRunnerOptions({ settings }) {
|
||||
const {
|
||||
autoDetecting: loading,
|
||||
basePath,
|
||||
basePathValue,
|
||||
handleAutoDetectClick,
|
||||
} = useProviderEndpointAutoDiscovery({
|
||||
provider: "docker-model-runner",
|
||||
initialBasePath: settings?.DockerModelRunnerBasePath,
|
||||
ENDPOINTS: DOCKER_MODEL_RUNNER_COMMON_URLS,
|
||||
});
|
||||
|
||||
const [maxTokens, setMaxTokens] = useState(
|
||||
settings?.DockerModelRunnerModelTokenLimit || 4096
|
||||
);
|
||||
|
||||
return (
|
||||
<div className="w-full flex flex-col gap-y-7">
|
||||
<div className="flex gap-[36px] mt-1.5 flex-wrap">
|
||||
<div className="flex flex-col w-60">
|
||||
<div className="flex items-center gap-1 mb-3">
|
||||
<div className="flex justify-between items-center gap-x-2">
|
||||
<label className="text-white text-sm font-semibold">
|
||||
Base URL
|
||||
</label>
|
||||
{loading ? (
|
||||
<CircleNotch className="w-4 h-4 text-theme-text-secondary animate-spin" />
|
||||
) : (
|
||||
<>
|
||||
{!basePathValue.value && (
|
||||
<button
|
||||
onClick={handleAutoDetectClick}
|
||||
className="bg-primary-button text-xs font-medium px-2 py-1 rounded-lg hover:bg-secondary hover:text-white shadow-[0_4px_14px_rgba(0,0,0,0.25)]"
|
||||
>
|
||||
Auto-Detect
|
||||
</button>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</div>
|
||||
<Tooltip
|
||||
id="docker-model-runner-base-url"
|
||||
place="top"
|
||||
delayShow={300}
|
||||
delayHide={800}
|
||||
clickable={true}
|
||||
className="tooltip !text-xs !opacity-100 z-99"
|
||||
style={{
|
||||
maxWidth: "250px",
|
||||
whiteSpace: "normal",
|
||||
wordWrap: "break-word",
|
||||
}}
|
||||
>
|
||||
Enter the URL where the Docker Model Runner is running.
|
||||
<br />
|
||||
<br />
|
||||
You <b>must</b> have enabled the Docker Model Runner TCP support
|
||||
for this to work.
|
||||
<br />
|
||||
<br />
|
||||
<Link
|
||||
to="https://docs.docker.com/ai/model-runner/get-started/#docker-desktop"
|
||||
target="_blank"
|
||||
className="text-blue-500 hover:underline"
|
||||
>
|
||||
Learn more →
|
||||
</Link>
|
||||
</Tooltip>
|
||||
<div
|
||||
className="text-theme-text-secondary cursor-pointer hover:bg-theme-bg-primary flex items-center justify-center rounded-full"
|
||||
data-tooltip-id="docker-model-runner-base-url"
|
||||
data-tooltip-place="top"
|
||||
data-tooltip-delay-hide={800}
|
||||
>
|
||||
<Info size={18} className="text-theme-text-secondary" />
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<input
|
||||
type="url"
|
||||
name="DockerModelRunnerBasePath"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="http://localhost:12434/engines/llama.cpp/v1"
|
||||
value={basePathValue.value}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
onChange={basePath.onChange}
|
||||
onBlur={basePath.onBlur}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<div className="flex items-center gap-1 mb-3">
|
||||
<label className="text-white text-sm font-semibold block">
|
||||
Model context window
|
||||
</label>
|
||||
<Tooltip
|
||||
id="docker-model-runner-model-context-window"
|
||||
place="top"
|
||||
delayShow={300}
|
||||
delayHide={800}
|
||||
clickable={true}
|
||||
className="tooltip !text-xs !opacity-100 z-99"
|
||||
style={{
|
||||
maxWidth: "350px",
|
||||
whiteSpace: "normal",
|
||||
wordWrap: "break-word",
|
||||
}}
|
||||
>
|
||||
The maximum number of tokens that can be used for a model context
|
||||
window.
|
||||
<br />
|
||||
<br />
|
||||
To set the context window limit for a model, you can use the{" "}
|
||||
<code>docker run</code> command with the{" "}
|
||||
<code>--context-window</code> parameter.
|
||||
<br />
|
||||
<br />
|
||||
<code>
|
||||
docker model configure --context-size 8192 ai/qwen3:latest
|
||||
</code>
|
||||
<br />
|
||||
<br />
|
||||
<Link
|
||||
to="https://docs.docker.com/ai/model-runner/#context-size"
|
||||
target="_blank"
|
||||
className="text-blue-500 hover:underline"
|
||||
>
|
||||
Learn more →
|
||||
</Link>
|
||||
</Tooltip>
|
||||
<div
|
||||
className="text-theme-text-secondary cursor-pointer hover:bg-theme-bg-primary flex items-center justify-center rounded-full"
|
||||
data-tooltip-id="docker-model-runner-model-context-window"
|
||||
data-tooltip-place="top"
|
||||
data-tooltip-delay-hide={800}
|
||||
>
|
||||
<Info size={18} className="text-theme-text-secondary" />
|
||||
</div>
|
||||
</div>
|
||||
<input
|
||||
type="number"
|
||||
name="DockerModelRunnerModelTokenLimit"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="4096"
|
||||
min={1}
|
||||
value={maxTokens}
|
||||
onChange={(e) => setMaxTokens(Number(e.target.value))}
|
||||
onScroll={(e) => e.target.blur()}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
/>
|
||||
</div>
|
||||
<DockerModelRunnerModelSelection
|
||||
settings={settings}
|
||||
basePath={basePathValue.value}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function DockerModelRunnerModelSelection({ settings, basePath = null }) {
|
||||
const [selectedModelId, setSelectedModelId] = useState(
|
||||
settings?.DockerModelRunnerModelPref
|
||||
);
|
||||
const [customModels, setCustomModels] = useState([]);
|
||||
const [filteredModels, setFilteredModels] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
|
||||
async function fetchModels() {
|
||||
if (!basePath) {
|
||||
setCustomModels([]);
|
||||
setFilteredModels([]);
|
||||
setLoading(false);
|
||||
setSearchQuery("");
|
||||
return;
|
||||
}
|
||||
setLoading(true);
|
||||
const { models } = await System.customModels(
|
||||
"docker-model-runner",
|
||||
null,
|
||||
basePath
|
||||
);
|
||||
setCustomModels(models || []);
|
||||
setFilteredModels(models || []);
|
||||
setSearchQuery("");
|
||||
setLoading(false);
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
fetchModels();
|
||||
}, [basePath]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!searchQuery || !customModels.length) {
|
||||
setFilteredModels(customModels || []);
|
||||
return;
|
||||
}
|
||||
|
||||
const normalizedSearchQuery = searchQuery.toLowerCase().trim();
|
||||
const filteredModels = new Map();
|
||||
|
||||
customModels.forEach((model) => {
|
||||
const modelNameNormalized = model.name.toLowerCase();
|
||||
const modelOrganizationNormalized = model.organization.toLowerCase();
|
||||
|
||||
if (modelNameNormalized.startsWith(normalizedSearchQuery))
|
||||
filteredModels.set(model.id, model);
|
||||
if (modelOrganizationNormalized.startsWith(normalizedSearchQuery))
|
||||
filteredModels.set(model.id, model);
|
||||
if (strDistance(modelNameNormalized, normalizedSearchQuery) <= 2)
|
||||
filteredModels.set(model.id, model);
|
||||
if (strDistance(modelOrganizationNormalized, normalizedSearchQuery) <= 2)
|
||||
filteredModels.set(model.id, model);
|
||||
});
|
||||
|
||||
setFilteredModels(Array.from(filteredModels.values()));
|
||||
}, [searchQuery]);
|
||||
|
||||
function downloadModel(modelId, _fileSize, progressCallback) {
|
||||
const [name, tag] = modelId.split(":");
|
||||
|
||||
// Open the model in the Docker Hub (via browser since they may not be installed locally)
|
||||
window.open(`https://hub.docker.com/layers/${name}/${tag}`, "_blank");
|
||||
progressCallback(100);
|
||||
}
|
||||
|
||||
function groupModelsByAlias(models) {
|
||||
const mapping = new Map();
|
||||
mapping.set("installed", new Map());
|
||||
mapping.set("not installed", new Map());
|
||||
|
||||
const groupedModels = models.reduce((acc, model) => {
|
||||
acc[model.organization] = acc[model.organization] || [];
|
||||
acc[model.organization].push(model);
|
||||
return acc;
|
||||
}, {});
|
||||
|
||||
Object.entries(groupedModels).forEach(([organization, models]) => {
|
||||
const hasInstalled = models.some((model) => model.downloaded);
|
||||
if (hasInstalled) {
|
||||
const installedModels = models.filter((model) => model.downloaded);
|
||||
mapping
|
||||
.get("installed")
|
||||
.set("Downloaded Models", [
|
||||
...(mapping.get("installed").get("Downloaded Models") || []),
|
||||
...installedModels,
|
||||
]);
|
||||
}
|
||||
const tags = models.map((model) => ({
|
||||
...model,
|
||||
name: model.name.split(":")[1],
|
||||
}));
|
||||
mapping.get("not installed").set(organization, tags);
|
||||
});
|
||||
|
||||
const orderedMap = new Map();
|
||||
mapping
|
||||
.get("installed")
|
||||
.entries()
|
||||
.forEach(([organization, models]) =>
|
||||
orderedMap.set(organization, models)
|
||||
);
|
||||
mapping
|
||||
.get("not installed")
|
||||
.entries()
|
||||
.forEach(([organization, models]) =>
|
||||
orderedMap.set(organization, models)
|
||||
);
|
||||
return Object.fromEntries(orderedMap);
|
||||
}
|
||||
|
||||
function handleSetActiveModel(modelId) {
|
||||
if (modelId === selectedModelId) return;
|
||||
setSelectedModelId(modelId);
|
||||
window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT));
|
||||
}
|
||||
|
||||
const groupedModels = groupModelsByAlias(filteredModels);
|
||||
return (
|
||||
<Layout
|
||||
fetchModels={fetchModels}
|
||||
searchQuery={searchQuery}
|
||||
setSearchQuery={setSearchQuery}
|
||||
loading={loading}
|
||||
>
|
||||
<input
|
||||
type="hidden"
|
||||
name="DockerModelRunnerModelPref"
|
||||
id="DockerModelRunnerModelPref"
|
||||
value={selectedModelId}
|
||||
/>
|
||||
{loading ? (
|
||||
<LoadingSkeleton />
|
||||
) : filteredModels.length === 0 ? (
|
||||
<div className="flex flex-col w-full gap-y-2 mt-4">
|
||||
<p className="text-theme-text-secondary text-sm">No models found!</p>
|
||||
</div>
|
||||
) : (
|
||||
Object.entries(groupedModels).map(([alias, models]) => (
|
||||
<ModelTable
|
||||
key={alias}
|
||||
alias={alias}
|
||||
models={models}
|
||||
setActiveModel={handleSetActiveModel}
|
||||
downloadModel={downloadModel}
|
||||
selectedModelId={selectedModelId}
|
||||
ui={{
|
||||
showRuntime: false,
|
||||
}}
|
||||
/>
|
||||
))
|
||||
)}
|
||||
</Layout>
|
||||
);
|
||||
}
|
||||
|
||||
function Layout({
|
||||
children,
|
||||
fetchModels = null,
|
||||
searchQuery = "",
|
||||
setSearchQuery = () => {},
|
||||
loading = false,
|
||||
}) {
|
||||
const [isRefreshing, setIsRefreshing] = useState(false);
|
||||
async function refreshModels() {
|
||||
setIsRefreshing(true);
|
||||
try {
|
||||
await fetchModels?.();
|
||||
} catch {
|
||||
} finally {
|
||||
setIsRefreshing(false);
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="flex flex-col w-full">
|
||||
<div className="flex gap-x-2 items-center pb-[8px]">
|
||||
<label className="text-theme-text-primary text-base font-semibold">
|
||||
Available Models
|
||||
</label>
|
||||
</div>
|
||||
<div className="flex w-full items-center gap-x-[16px]">
|
||||
<div className="relative flex-1 max-w-[640px]">
|
||||
<MagnifyingGlass
|
||||
size={14}
|
||||
weight="bold"
|
||||
color="var(--theme-text-primary)"
|
||||
className="absolute left-[9px] top-[10px] text-theme-settings-input-placeholder peer-focus:invisible"
|
||||
/>
|
||||
<input
|
||||
type="search"
|
||||
placeholder="Search models"
|
||||
value={searchQuery}
|
||||
disabled={loading}
|
||||
className="min-h-[32px] border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5 pl-[30px] py-2 search-input disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
onChange={(e) => {
|
||||
e.preventDefault();
|
||||
e.stopPropagation();
|
||||
setSearchQuery(e.target.value);
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
{!!fetchModels && (
|
||||
<button
|
||||
type="button"
|
||||
onClick={refreshModels}
|
||||
disabled={isRefreshing || loading}
|
||||
className="border-none text-theme-text-secondary text-sm font-medium hover:underline flex items-center gap-x-1 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||
>
|
||||
{isRefreshing ? (
|
||||
<CircleNotch className="w-4 h-4 text-theme-text-secondary animate-spin" />
|
||||
) : (
|
||||
<ArrowClockwise
|
||||
weight="bold"
|
||||
className="w-4 h-4 text-theme-text-secondary"
|
||||
/>
|
||||
)}
|
||||
<span
|
||||
className={`text-sm font-medium ${isRefreshing ? "hidden" : "text-theme-text-secondary"}`}
|
||||
>
|
||||
Refresh Models
|
||||
</span>
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
{children}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function LoadingSkeleton() {
|
||||
return (
|
||||
<div className="flex flex-col w-full gap-y-4">
|
||||
<Skeleton.default
|
||||
height={100}
|
||||
width="100%"
|
||||
count={7}
|
||||
highlightColor="var(--theme-settings-input-active)"
|
||||
baseColor="var(--theme-settings-input-bg)"
|
||||
enableAnimation={true}
|
||||
containerClassName="w-fill flex gap-[8px] flex-col p-0"
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
@@ -92,7 +92,7 @@ export default function FoundryOptions({ settings }) {
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Token Context Window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -50,7 +50,7 @@ export default function GenericOpenAiOptions({ settings }) {
|
||||
<div className="flex gap-[36px] flex-wrap">
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Token context window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -24,7 +24,7 @@ export default function GiteeAIOptions({ settings }) {
|
||||
<GiteeAIModelSelection settings={settings} />
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
Token context window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -43,7 +43,7 @@ export default function KoboldCPPOptions({ settings }) {
|
||||
/>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
Token context window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -34,7 +34,7 @@ export default function LiteLLMOptions({ settings }) {
|
||||
/>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Token context window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -51,7 +51,7 @@ export default function LocalAiOptions({ settings, showAlert = false }) {
|
||||
/>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-2">
|
||||
Token context window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -18,7 +18,7 @@ export default function TextGenWebUIOptions({ settings }) {
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Token context window
|
||||
Model context window
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
|
||||
@@ -41,6 +41,7 @@ import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
|
||||
import CometApiLogo from "@/media/llmprovider/cometapi.png";
|
||||
import FoundryLogo from "@/media/llmprovider/foundry-local.png";
|
||||
import GiteeAILogo from "@/media/llmprovider/giteeai.png";
|
||||
import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
|
||||
|
||||
const LLM_PROVIDER_PRIVACY_MAP = {
|
||||
openai: {
|
||||
@@ -224,6 +225,13 @@ const LLM_PROVIDER_PRIVACY_MAP = {
|
||||
policyUrl: "https://ai.gitee.com/docs/appendix/privacy",
|
||||
logo: GiteeAILogo,
|
||||
},
|
||||
"docker-model-runner": {
|
||||
name: "Docker Model Runner",
|
||||
description: [
|
||||
"Your model and chats are only accessible on the machine running Docker Model Runner.",
|
||||
],
|
||||
logo: DockerModelRunnerLogo,
|
||||
},
|
||||
};
|
||||
|
||||
const VECTOR_DB_PROVIDER_PRIVACY_MAP = {
|
||||
|
||||
@@ -52,6 +52,7 @@ const groupedProviders = [
|
||||
"novita",
|
||||
"openrouter",
|
||||
"ppio",
|
||||
"docker-model-runner",
|
||||
];
|
||||
export default function useGetProviderModels(provider = null) {
|
||||
const [defaultModels, setDefaultModels] = useState([]);
|
||||
|
||||
BIN
frontend/src/media/llmprovider/docker-model-runner.png
Normal file
BIN
frontend/src/media/llmprovider/docker-model-runner.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 8.5 KiB |
@@ -37,6 +37,7 @@ import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
|
||||
import CometApiLogo from "@/media/llmprovider/cometapi.png";
|
||||
import FoundryLogo from "@/media/llmprovider/foundry-local.png";
|
||||
import GiteeAILogo from "@/media/llmprovider/giteeai.png";
|
||||
import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
|
||||
|
||||
import PreLoader from "@/components/Preloader";
|
||||
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
||||
@@ -71,6 +72,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
|
||||
import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
|
||||
import FoundryOptions from "@/components/LLMSelection/FoundryOptions";
|
||||
import GiteeAIOptions from "@/components/LLMSelection/GiteeAIOptions/index.jsx";
|
||||
import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
|
||||
|
||||
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
||||
@@ -161,6 +163,18 @@ export const AVAILABLE_LLM_PROVIDERS = [
|
||||
"Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
|
||||
requiredConfig: ["LMStudioBasePath"],
|
||||
},
|
||||
{
|
||||
name: "Docker Model Runner",
|
||||
value: "docker-model-runner",
|
||||
logo: DockerModelRunnerLogo,
|
||||
options: (settings) => <DockerModelRunnerOptions settings={settings} />,
|
||||
description: "Run LLMs using Docker Model Runner.",
|
||||
requiredConfig: [
|
||||
"DockerModelRunnerBasePath",
|
||||
"DockerModelRunnerModelPref",
|
||||
"DockerModelRunnerModelTokenLimit",
|
||||
],
|
||||
},
|
||||
{
|
||||
name: "Local AI",
|
||||
value: "localai",
|
||||
@@ -371,6 +385,7 @@ export const AVAILABLE_LLM_PROVIDERS = [
|
||||
},
|
||||
];
|
||||
|
||||
export const LLM_PREFERENCE_CHANGED_EVENT = "llm-preference-changed";
|
||||
export default function GeneralLLMPreference() {
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [hasChanges, setHasChanges] = useState(false);
|
||||
@@ -428,6 +443,21 @@ export default function GeneralLLMPreference() {
|
||||
fetchKeys();
|
||||
}, []);
|
||||
|
||||
// Some more complex LLM options do not bubble up the change event, so we need to listen to the custom event
|
||||
// we can emit from the LLM options component using window.dispatchEvent(new Event(LLM_PREFERENCE_CHANGED_EVENT));
|
||||
useEffect(() => {
|
||||
function updateHasChanges() {
|
||||
setHasChanges(true);
|
||||
}
|
||||
window.addEventListener(LLM_PREFERENCE_CHANGED_EVENT, updateHasChanges);
|
||||
return () => {
|
||||
window.removeEventListener(
|
||||
LLM_PREFERENCE_CHANGED_EVENT,
|
||||
updateHasChanges
|
||||
);
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
const filtered = AVAILABLE_LLM_PROVIDERS.filter((llm) =>
|
||||
llm.name.toLowerCase().includes(searchQuery.toLowerCase())
|
||||
|
||||
@@ -31,6 +31,7 @@ import DellProAiStudioLogo from "@/media/llmprovider/dpais.png";
|
||||
import MoonshotAiLogo from "@/media/llmprovider/moonshotai.png";
|
||||
import CometApiLogo from "@/media/llmprovider/cometapi.png";
|
||||
import GiteeAILogo from "@/media/llmprovider/giteeai.png";
|
||||
import DockerModelRunnerLogo from "@/media/llmprovider/docker-model-runner.png";
|
||||
|
||||
import OpenAiOptions from "@/components/LLMSelection/OpenAiOptions";
|
||||
import GenericOpenAiOptions from "@/components/LLMSelection/GenericOpenAiOptions";
|
||||
@@ -63,6 +64,7 @@ import DellProAiStudioOptions from "@/components/LLMSelection/DPAISOptions";
|
||||
import MoonshotAiOptions from "@/components/LLMSelection/MoonshotAiOptions";
|
||||
import CometApiLLMOptions from "@/components/LLMSelection/CometApiLLMOptions";
|
||||
import GiteeAiOptions from "@/components/LLMSelection/GiteeAIOptions";
|
||||
import DockerModelRunnerOptions from "@/components/LLMSelection/DockerModelRunnerOptions";
|
||||
|
||||
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||
import System from "@/models/system";
|
||||
@@ -139,6 +141,13 @@ const LLMS = [
|
||||
description:
|
||||
"Discover, download, and run thousands of cutting edge LLMs in a few clicks.",
|
||||
},
|
||||
{
|
||||
name: "Docker Model Runner",
|
||||
value: "docker-model-runner",
|
||||
logo: DockerModelRunnerLogo,
|
||||
options: (settings) => <DockerModelRunnerOptions settings={settings} />,
|
||||
description: "Run LLMs using Docker Model Runner.",
|
||||
},
|
||||
{
|
||||
name: "Local AI",
|
||||
value: "localai",
|
||||
|
||||
@@ -37,6 +37,7 @@ const ENABLED_PROVIDERS = [
|
||||
"zai",
|
||||
"giteeai",
|
||||
"cohere",
|
||||
"docker-model-runner",
|
||||
// TODO: More agent support.
|
||||
// "huggingface" // Can be done but already has issues with no-chat templated. Needs to be tested.
|
||||
];
|
||||
@@ -46,6 +47,7 @@ const WARN_PERFORMANCE = [
|
||||
"ollama",
|
||||
"localai",
|
||||
"textgenwebui",
|
||||
"docker-model-runner",
|
||||
];
|
||||
|
||||
const LLM_DEFAULT = {
|
||||
|
||||
@@ -53,6 +53,14 @@ export const NVIDIA_NIM_COMMON_URLS = [
|
||||
"http://172.17.0.1:8000/v1/version",
|
||||
];
|
||||
|
||||
export const DOCKER_MODEL_RUNNER_COMMON_URLS = [
|
||||
"http://localhost:12434/engines/llama.cpp/v1",
|
||||
"http://127.0.0.1:12434/engines/llama.cpp/v1",
|
||||
"http://model-runner.docker.internal/engines/llama.cpp/v1",
|
||||
"http://host.docker.internal:12434/engines/llama.cpp/v1",
|
||||
"http://172.17.0.1:12434/engines/llama.cpp/v1",
|
||||
];
|
||||
|
||||
export function fullApiUrl() {
|
||||
if (API_BASE !== "/api") return API_BASE;
|
||||
return `${window.location.origin}/api`;
|
||||
|
||||
@@ -105,6 +105,7 @@ AnythingLLM اسناد شما را به اشیایی به نام `workspaces` ت
|
||||
- [Z.AI (chat models)](https://z.ai/model-api)
|
||||
- [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
|
||||
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
|
||||
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
|
||||
|
||||
<div dir="rtl">
|
||||
|
||||
|
||||
@@ -94,6 +94,7 @@ AnythingLLMは、ドキュメントを`ワークスペース`と呼ばれるオ
|
||||
- [Z.AI (チャットモデル)](https://z.ai/model-api)
|
||||
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
|
||||
- [CometAPI (チャットモデル)](https://api.cometapi.com/)
|
||||
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
|
||||
|
||||
**埋め込みモデル:**
|
||||
|
||||
|
||||
@@ -102,6 +102,7 @@ AnythingLLM, belgelerinizi **"çalışma alanları" (workspaces)** adı verilen
|
||||
- [Z.AI (chat models)](https://z.ai/model-api)
|
||||
- [Novita AI (chat models)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
|
||||
- [PPIO](https://ppinfra.com?utm_source=github_anything-llm)
|
||||
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
|
||||
|
||||
**Embedder modelleri:**
|
||||
|
||||
|
||||
@@ -102,6 +102,7 @@ AnythingLLM将您的文档划分为称为`workspaces` (工作区)的对象。工
|
||||
- [Novita AI (聊天模型)](https://novita.ai/model-api/product/llm-api?utm_source=github_anything-llm&utm_medium=github_readme&utm_campaign=link)
|
||||
- [PPIO (聊天模型)](https://ppinfra.com?utm_source=github_anything-llm)
|
||||
- [CometAPI (聊天模型)](https://api.cometapi.com/)
|
||||
- [Docker Model Runner](https://docs.docker.com/ai/model-runner/)
|
||||
|
||||
**支持的嵌入模型:**
|
||||
|
||||
|
||||
@@ -161,6 +161,11 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
|
||||
# GITEE_AI_MODEL_PREF=
|
||||
# GITEE_AI_MODEL_TOKEN_LIMIT=
|
||||
|
||||
# LLM_PROVIDER='docker-model-runner'
|
||||
# DOCKER_MODEL_RUNNER_BASE_PATH='http://127.0.0.1:12434'
|
||||
# DOCKER_MODEL_RUNNER_LLM_MODEL_PREF='phi-3.5-mini'
|
||||
# DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT=4096
|
||||
|
||||
###########################################
|
||||
######## Embedding API SElECTION ##########
|
||||
###########################################
|
||||
|
||||
@@ -154,6 +154,9 @@ function getModelTag() {
|
||||
case "cohere":
|
||||
model = process.env.COHERE_MODEL_PREF;
|
||||
break;
|
||||
case "docker-model-runner":
|
||||
model = process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
|
||||
break;
|
||||
default:
|
||||
model = "--";
|
||||
break;
|
||||
|
||||
@@ -646,6 +646,13 @@ const SystemSettings = {
|
||||
GiteeAIApiKey: !!process.env.GITEE_AI_API_KEY,
|
||||
GiteeAIModelPref: process.env.GITEE_AI_MODEL_PREF,
|
||||
GiteeAITokenLimit: process.env.GITEE_AI_MODEL_TOKEN_LIMIT || 8192,
|
||||
|
||||
// Docker Model Runner Keys
|
||||
DockerModelRunnerBasePath: process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
|
||||
DockerModelRunnerModelPref:
|
||||
process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF,
|
||||
DockerModelRunnerModelTokenLimit:
|
||||
process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT || 8192,
|
||||
};
|
||||
},
|
||||
|
||||
|
||||
3
server/storage/models/.gitignore
vendored
3
server/storage/models/.gitignore
vendored
@@ -13,4 +13,5 @@ context-windows/*
|
||||
MintplexLabs
|
||||
cometapi
|
||||
fireworks
|
||||
giteeai
|
||||
giteeai
|
||||
docker-model-runner
|
||||
434
server/utils/AiProviders/dockerModelRunner/index.js
Normal file
434
server/utils/AiProviders/dockerModelRunner/index.js
Normal file
@@ -0,0 +1,434 @@
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { NativeEmbedder } = require("../../EmbeddingEngines/native");
|
||||
const {
|
||||
handleDefaultStreamResponseV2,
|
||||
formatChatHistory,
|
||||
} = require("../../helpers/chat/responses");
|
||||
const {
|
||||
LLMPerformanceMonitor,
|
||||
} = require("../../helpers/chat/LLMPerformanceMonitor");
|
||||
const { OpenAI: OpenAIApi } = require("openai");
|
||||
const { humanFileSize } = require("../../helpers");
|
||||
const { safeJsonParse } = require("../../http");
|
||||
|
||||
class DockerModelRunnerLLM {
|
||||
static cacheTime = 1000 * 60 * 60 * 24; // 24 hours
|
||||
static cacheFolder = path.resolve(
|
||||
process.env.STORAGE_DIR
|
||||
? path.resolve(process.env.STORAGE_DIR, "models", "docker-model-runner")
|
||||
: path.resolve(__dirname, `../../../storage/models/docker-model-runner`)
|
||||
);
|
||||
|
||||
constructor(embedder = null, modelPreference = null) {
|
||||
if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
|
||||
throw new Error("No Docker Model Runner API Base Path was set.");
|
||||
if (!process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF)
|
||||
throw new Error("No Docker Model Runner Model Pref was set.");
|
||||
|
||||
this.dmr = new OpenAIApi({
|
||||
baseURL: parseDockerModelRunnerEndpoint(
|
||||
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
|
||||
),
|
||||
apiKey: null,
|
||||
});
|
||||
|
||||
this.model =
|
||||
modelPreference || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
|
||||
this.embedder = embedder ?? new NativeEmbedder();
|
||||
this.defaultTemp = 0.7;
|
||||
|
||||
this.limits = {
|
||||
history: this.promptWindowLimit() * 0.15,
|
||||
system: this.promptWindowLimit() * 0.15,
|
||||
user: this.promptWindowLimit() * 0.7,
|
||||
};
|
||||
|
||||
this.#log(`initialized with model: ${this.model}`);
|
||||
}
|
||||
|
||||
#log(text, ...args) {
|
||||
console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
static slog(text, ...args) {
|
||||
console.log(`\x1b[32m[Docker Model Runner]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
async assertModelContextLimits() {
|
||||
if (this.limits !== null) return;
|
||||
this.limits = {
|
||||
history: this.promptWindowLimit() * 0.15,
|
||||
system: this.promptWindowLimit() * 0.15,
|
||||
user: this.promptWindowLimit() * 0.7,
|
||||
};
|
||||
}
|
||||
|
||||
#appendContext(contextTexts = []) {
|
||||
if (!contextTexts || !contextTexts.length) return "";
|
||||
return (
|
||||
"\nContext:\n" +
|
||||
contextTexts
|
||||
.map((text, i) => {
|
||||
return `[CONTEXT ${i}]:\n${text}\n[END CONTEXT ${i}]\n\n`;
|
||||
})
|
||||
.join("")
|
||||
);
|
||||
}
|
||||
|
||||
streamingEnabled() {
|
||||
return "streamGetChatCompletion" in this;
|
||||
}
|
||||
|
||||
/** DMR does not support curling the context window limit from the API, so we return the system defined limit. */
|
||||
static promptWindowLimit(_) {
|
||||
const systemDefinedLimit =
|
||||
Number(process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT) || 8192;
|
||||
return systemDefinedLimit;
|
||||
}
|
||||
|
||||
promptWindowLimit() {
|
||||
return this.constructor.promptWindowLimit(this.model);
|
||||
}
|
||||
|
||||
async isValidChatCompletionModel(_ = "") {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates appropriate content array for a message + attachments.
|
||||
* @param {{userPrompt:string, attachments: import("../../helpers").Attachment[]}}
|
||||
* @returns {string|object[]}
|
||||
*/
|
||||
#generateContent({ userPrompt, attachments = [] }) {
|
||||
if (!attachments.length) {
|
||||
return userPrompt;
|
||||
}
|
||||
|
||||
const content = [{ type: "text", text: userPrompt }];
|
||||
for (let attachment of attachments) {
|
||||
content.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: attachment.contentString,
|
||||
detail: "auto",
|
||||
},
|
||||
});
|
||||
}
|
||||
return content.flat();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct the user prompt for this model.
|
||||
* @param {{attachments: import("../../helpers").Attachment[]}} param0
|
||||
* @returns
|
||||
*/
|
||||
constructPrompt({
|
||||
systemPrompt = "",
|
||||
contextTexts = [],
|
||||
chatHistory = [],
|
||||
userPrompt = "",
|
||||
attachments = [],
|
||||
}) {
|
||||
const prompt = {
|
||||
role: "system",
|
||||
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
|
||||
};
|
||||
return [
|
||||
prompt,
|
||||
...formatChatHistory(chatHistory, this.#generateContent),
|
||||
{
|
||||
role: "user",
|
||||
content: this.#generateContent({ userPrompt, attachments }),
|
||||
},
|
||||
];
|
||||
}
|
||||
|
||||
async getChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||
if (!this.model)
|
||||
throw new Error(
|
||||
`Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
|
||||
);
|
||||
|
||||
const result = await LLMPerformanceMonitor.measureAsyncFunction(
|
||||
this.dmr.chat.completions.create({
|
||||
model: this.model,
|
||||
messages,
|
||||
temperature,
|
||||
})
|
||||
);
|
||||
|
||||
if (
|
||||
!result.output.hasOwnProperty("choices") ||
|
||||
result.output.choices.length === 0
|
||||
)
|
||||
return null;
|
||||
|
||||
return {
|
||||
textResponse: result.output.choices[0].message.content,
|
||||
metrics: {
|
||||
prompt_tokens: result.output.usage?.prompt_tokens || 0,
|
||||
completion_tokens: result.output.usage?.completion_tokens || 0,
|
||||
total_tokens: result.output.usage?.total_tokens || 0,
|
||||
outputTps: result.output.usage?.completion_tokens / result.duration,
|
||||
duration: result.duration,
|
||||
model: this.model,
|
||||
timestamp: new Date(),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async streamGetChatCompletion(messages = null, { temperature = 0.7 }) {
|
||||
if (!this.model)
|
||||
throw new Error(
|
||||
`Docker Model Runner chat: ${this.model} is not valid or defined model for chat completion!`
|
||||
);
|
||||
|
||||
const measuredStreamRequest = await LLMPerformanceMonitor.measureStream({
|
||||
func: this.dmr.chat.completions.create({
|
||||
model: this.model,
|
||||
stream: true,
|
||||
messages,
|
||||
temperature,
|
||||
}),
|
||||
messages,
|
||||
runPromptTokenCalculation: true,
|
||||
modelTag: this.model,
|
||||
});
|
||||
return measuredStreamRequest;
|
||||
}
|
||||
|
||||
handleStream(response, stream, responseProps) {
|
||||
return handleDefaultStreamResponseV2(response, stream, responseProps);
|
||||
}
|
||||
|
||||
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
|
||||
async embedTextInput(textInput) {
|
||||
return await this.embedder.embedTextInput(textInput);
|
||||
}
|
||||
async embedChunks(textChunks = []) {
|
||||
return await this.embedder.embedChunks(textChunks);
|
||||
}
|
||||
|
||||
async compressMessages(promptArgs = {}, rawHistory = []) {
|
||||
await this.assertModelContextLimits();
|
||||
const { messageArrayCompressor } = require("../../helpers/chat");
|
||||
const messageArray = this.constructPrompt(promptArgs);
|
||||
return await messageArrayCompressor(this, messageArray, rawHistory);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse the base path of the Docker Model Runner endpoint and return the host and port.
|
||||
* @param {string} basePath - The base path of the Docker Model Runner endpoint.
|
||||
* @param {'openai' | 'dmr'} to - The provider to parse the endpoint for (internal DMR or openai-compatible)
|
||||
* @returns {string | null}
|
||||
*/
|
||||
function parseDockerModelRunnerEndpoint(basePath = null, to = "openai") {
|
||||
if (!basePath) return null;
|
||||
try {
|
||||
const url = new URL(basePath);
|
||||
if (to === "openai") url.pathname = "engines/v1";
|
||||
else if (to === "dmr") url.pathname = "";
|
||||
return url.toString();
|
||||
} catch (e) {
|
||||
return basePath;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @typedef {Object} DockerRunnerInstalledModel
|
||||
* @property {string} id - The SHA256 identifier of the model layer/blob.
|
||||
* @property {string[]} tags - List of tags or aliases associated with this model (e.g., "ai/qwen3:4B-UD-Q4_K_XL").
|
||||
* @property {number} created - The Unix timestamp (seconds) when the model was created.
|
||||
* @property {string} config - The configuration of the model.
|
||||
* @property {string} config.format - The file format (e.g., "gguf").
|
||||
* @property {string} config.quantization - The quantization level (e.g., "MOSTLY_Q4_K_M", "Q4_0").
|
||||
* @property {string} config.parameters - The parameter count formatted as a string (e.g., "4.02 B").
|
||||
* @property {string} config.architecture - The base architecture of the model (e.g., "qwen3", "llama").
|
||||
* @property {string} config.size - The physical file size formatted as a string (e.g., "2.37 GiB").
|
||||
* @property {string} config?.gguf - Raw GGUF metadata headers containing tokenizer, architecture details, and licensing.
|
||||
* @property {string} config?.gguf['general.base_model.0.organization'] - The tokenizer of the model.
|
||||
* @property {string} config?.gguf['general.basename'] - The base name of the model (the real name of the model, not the tag)
|
||||
* @property {string} config?.gguf['*.context_length'] - The context length of the model. will be something like qwen3.context_length
|
||||
*/
|
||||
|
||||
/**
|
||||
* Fetch the remote models from the Docker Hub and cache the results.
|
||||
* @returns {Promise<Record<string, {id: string, name: string, size: string, organization: string}[]>>}
|
||||
*/
|
||||
async function fetchRemoteModels() {
|
||||
const cachePath = path.resolve(
|
||||
DockerModelRunnerLLM.cacheFolder,
|
||||
"models.json"
|
||||
);
|
||||
const cachedAtPath = path.resolve(
|
||||
DockerModelRunnerLLM.cacheFolder,
|
||||
".cached_at"
|
||||
);
|
||||
let cacheTime = 0;
|
||||
|
||||
if (fs.existsSync(cachePath) && fs.existsSync(cachedAtPath)) {
|
||||
cacheTime = Number(fs.readFileSync(cachedAtPath, "utf8"));
|
||||
if (Date.now() - cacheTime < DockerModelRunnerLLM.cacheTime)
|
||||
return safeJsonParse(fs.readFileSync(cachePath, "utf8"));
|
||||
}
|
||||
|
||||
DockerModelRunnerLLM.slog(`Refreshing remote models from Docker Hub`);
|
||||
// Now hit the Docker Hub API to get the remote model namespace and root tags
|
||||
const availableNamespaces = []; // array of strings like ai/mistral, ai/qwen3, etc
|
||||
let nextPage =
|
||||
"https://hub.docker.com/v2/namespaces/ai/repositories?page_size=100&page=1";
|
||||
while (nextPage) {
|
||||
const response = await fetch(nextPage)
|
||||
.then((res) => res.json())
|
||||
.then((data) => {
|
||||
const namespaces = data.results
|
||||
.filter(
|
||||
(result) =>
|
||||
result.namespace &&
|
||||
result.name &&
|
||||
result.content_types.includes("model") &&
|
||||
result.namespace === "ai"
|
||||
)
|
||||
.map((result) => result.namespace + "/" + result.name);
|
||||
availableNamespaces.push(...namespaces);
|
||||
})
|
||||
.catch((e) => {
|
||||
DockerModelRunnerLLM.slog(
|
||||
`Error fetching remote models from Docker Hub`,
|
||||
e
|
||||
);
|
||||
return [];
|
||||
});
|
||||
if (!response) break;
|
||||
if (!response || !response.next) break;
|
||||
nextPage = response.next;
|
||||
}
|
||||
|
||||
const availableRemoteModels = {};
|
||||
const BATCH_SIZE = 10;
|
||||
|
||||
// Run batch requests to avoid rate limiting but also
|
||||
// improve the speed of the total request time.
|
||||
for (let i = 0; i < availableNamespaces.length; i += BATCH_SIZE) {
|
||||
const batch = availableNamespaces.slice(i, i + BATCH_SIZE);
|
||||
DockerModelRunnerLLM.slog(
|
||||
`Fetching tags for batch ${Math.floor(i / BATCH_SIZE) + 1} of ${Math.ceil(availableNamespaces.length / BATCH_SIZE)}`
|
||||
);
|
||||
|
||||
await Promise.all(
|
||||
batch.map(async (namespace) => {
|
||||
const [organization, model] = namespace.split("/");
|
||||
const namespaceUrl = new URL(
|
||||
"https://hub.docker.com/v2/namespaces/ai/repositories/" +
|
||||
model +
|
||||
"/tags"
|
||||
);
|
||||
|
||||
DockerModelRunnerLLM.slog(
|
||||
`Fetching tags for ${namespaceUrl.toString()}`
|
||||
);
|
||||
await fetch(namespaceUrl.toString())
|
||||
.then((res) => res.json())
|
||||
.then((data) => {
|
||||
const tags = data.results.map((result) => {
|
||||
return {
|
||||
id: `${organization}/${model}:${result.name}`,
|
||||
name: `${model}:${result.name}`,
|
||||
size: humanFileSize(result.full_size),
|
||||
organization: model,
|
||||
};
|
||||
});
|
||||
availableRemoteModels[model] = tags;
|
||||
})
|
||||
.catch((e) => {
|
||||
DockerModelRunnerLLM.slog(
|
||||
`Error fetching tags for ${namespaceUrl.toString()}`,
|
||||
e
|
||||
);
|
||||
});
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
if (Object.keys(availableRemoteModels).length === 0) {
|
||||
DockerModelRunnerLLM.slog(
|
||||
`No remote models found - API may be down or not available`
|
||||
);
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!fs.existsSync(DockerModelRunnerLLM.cacheFolder))
|
||||
fs.mkdirSync(DockerModelRunnerLLM.cacheFolder, { recursive: true });
|
||||
fs.writeFileSync(cachePath, JSON.stringify(availableRemoteModels), {
|
||||
encoding: "utf8",
|
||||
});
|
||||
fs.writeFileSync(cachedAtPath, String(Number(new Date())), {
|
||||
encoding: "utf8",
|
||||
});
|
||||
return availableRemoteModels;
|
||||
}
|
||||
|
||||
/**
|
||||
* This function will fetch the remote models from the Docker Hub as well
|
||||
* as the local models installed on the system.
|
||||
* @param {string} basePath - The base path of the Docker Model Runner endpoint.
|
||||
*/
|
||||
async function getDockerModels(basePath = null) {
|
||||
let availableModels = {};
|
||||
/** @type {Array<DockerRunnerInstalledModel>} */
|
||||
let installedModels = {};
|
||||
|
||||
try {
|
||||
// Grab the locally installed models from the Docker Model Runner API
|
||||
const dmrUrl = new URL(
|
||||
parseDockerModelRunnerEndpoint(
|
||||
basePath ?? process.env.DOCKER_MODEL_RUNNER_BASE_PATH,
|
||||
"dmr"
|
||||
)
|
||||
);
|
||||
dmrUrl.pathname = "/models";
|
||||
|
||||
await fetch(dmrUrl.toString())
|
||||
.then((res) => res.json())
|
||||
.then((data) => {
|
||||
data?.map((model) => {
|
||||
const id = model.tags.at(0);
|
||||
// eg: ai/qwen3:latest -> qwen3
|
||||
const tag =
|
||||
id?.split("/").pop()?.split(":")?.at(1) ??
|
||||
id?.split(":").at(1) ??
|
||||
"latest";
|
||||
const organization = id?.split("/").pop()?.split(":")?.at(0) ?? id;
|
||||
installedModels[id] = {
|
||||
id: id,
|
||||
name: `${organization}:${tag}`,
|
||||
size: model.config?.size ?? "Unknown size",
|
||||
organization: organization,
|
||||
};
|
||||
});
|
||||
});
|
||||
|
||||
// Now hit the Docker Hub API to get the remote model namespace and root tags
|
||||
const remoteModels = await fetchRemoteModels();
|
||||
for (const [modelName, tags] of Object.entries(remoteModels)) {
|
||||
availableModels[modelName] = { tags: [] };
|
||||
for (const tag of tags) {
|
||||
if (!installedModels[tag.id])
|
||||
availableModels[modelName].tags.push({ ...tag, downloaded: false });
|
||||
else availableModels[modelName].tags.push({ ...tag, downloaded: true });
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
DockerModelRunnerLLM.slog(`Error getting Docker models`, e);
|
||||
} finally {
|
||||
return Object.values(availableModels).flatMap((m) => m.tags);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
DockerModelRunnerLLM,
|
||||
parseDockerModelRunnerEndpoint,
|
||||
getDockerModels,
|
||||
};
|
||||
@@ -992,6 +992,8 @@ ${this.getHistory({ to: route.to })
|
||||
return new Providers.GiteeAIProvider({ model: config.model });
|
||||
case "cohere":
|
||||
return new Providers.CohereProvider({ model: config.model });
|
||||
case "docker-model-runner":
|
||||
return new Providers.DockerModelRunnerProvider({ model: config.model });
|
||||
default:
|
||||
throw new Error(
|
||||
`Unknown provider: ${config.provider}. Please use a valid provider.`
|
||||
|
||||
@@ -18,6 +18,9 @@ const { ChatOllama } = require("@langchain/community/chat_models/ollama");
|
||||
const { toValidNumber, safeJsonParse } = require("../../../http");
|
||||
const { getLLMProviderClass } = require("../../../helpers");
|
||||
const { parseLMStudioBasePath } = require("../../../AiProviders/lmStudio");
|
||||
const {
|
||||
parseDockerModelRunnerEndpoint,
|
||||
} = require("../../../AiProviders/dockerModelRunner");
|
||||
const { parseFoundryBasePath } = require("../../../AiProviders/foundry");
|
||||
const {
|
||||
SystemPromptVariables,
|
||||
@@ -313,6 +316,16 @@ class Provider {
|
||||
...config,
|
||||
});
|
||||
}
|
||||
case "docker-model-runner":
|
||||
return new ChatOpenAI({
|
||||
configuration: {
|
||||
baseURL: parseDockerModelRunnerEndpoint(
|
||||
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
|
||||
),
|
||||
},
|
||||
apiKey: null,
|
||||
...config,
|
||||
});
|
||||
default:
|
||||
throw new Error(`Unsupported provider ${provider} for this task.`);
|
||||
}
|
||||
|
||||
101
server/utils/agents/aibitat/providers/dockerModelRunner.js
Normal file
101
server/utils/agents/aibitat/providers/dockerModelRunner.js
Normal file
@@ -0,0 +1,101 @@
|
||||
const OpenAI = require("openai");
|
||||
const Provider = require("./ai-provider.js");
|
||||
const InheritMultiple = require("./helpers/classes.js");
|
||||
const UnTooled = require("./helpers/untooled.js");
|
||||
const {
|
||||
parseDockerModelRunnerEndpoint,
|
||||
} = require("../../../AiProviders/dockerModelRunner/index.js");
|
||||
|
||||
/**
|
||||
* The agent provider for the Docker Model Runner.
|
||||
*/
|
||||
class DockerModelRunnerProvider extends InheritMultiple([Provider, UnTooled]) {
|
||||
model;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {{model?: string}} config
|
||||
*/
|
||||
constructor(config = {}) {
|
||||
super();
|
||||
const model =
|
||||
config?.model || process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF || null;
|
||||
const client = new OpenAI({
|
||||
baseURL: parseDockerModelRunnerEndpoint(
|
||||
process.env.DOCKER_MODEL_RUNNER_BASE_PATH
|
||||
),
|
||||
apiKey: null,
|
||||
maxRetries: 3,
|
||||
});
|
||||
|
||||
this._client = client;
|
||||
this.model = model;
|
||||
this.verbose = true;
|
||||
}
|
||||
|
||||
get client() {
|
||||
return this._client;
|
||||
}
|
||||
|
||||
get supportsAgentStreaming() {
|
||||
return true;
|
||||
}
|
||||
|
||||
async #handleFunctionCallChat({ messages = [] }) {
|
||||
return await this.client.chat.completions
|
||||
.create({
|
||||
model: this.model,
|
||||
messages,
|
||||
})
|
||||
.then((result) => {
|
||||
if (!result.hasOwnProperty("choices"))
|
||||
throw new Error("Docker Model Runner chat: No results!");
|
||||
if (result.choices.length === 0)
|
||||
throw new Error("Docker Model Runner chat: No results length!");
|
||||
return result.choices[0].message.content;
|
||||
})
|
||||
.catch((_) => {
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
async #handleFunctionCallStream({ messages = [] }) {
|
||||
return await this.client.chat.completions.create({
|
||||
model: this.model,
|
||||
stream: true,
|
||||
messages,
|
||||
});
|
||||
}
|
||||
|
||||
async stream(messages, functions = [], eventHandler = null) {
|
||||
return await UnTooled.prototype.stream.call(
|
||||
this,
|
||||
messages,
|
||||
functions,
|
||||
this.#handleFunctionCallStream.bind(this),
|
||||
eventHandler
|
||||
);
|
||||
}
|
||||
|
||||
async complete(messages, functions = []) {
|
||||
return await UnTooled.prototype.complete.call(
|
||||
this,
|
||||
messages,
|
||||
functions,
|
||||
this.#handleFunctionCallChat.bind(this)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the cost of the completion.
|
||||
*
|
||||
* @param _usage The completion to get the cost for.
|
||||
* @returns The cost of the completion.
|
||||
* Stubbed since Docker Model Runner has no cost basis.
|
||||
*/
|
||||
getCost(_usage) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = DockerModelRunnerProvider;
|
||||
@@ -29,6 +29,7 @@ const CometApiProvider = require("./cometapi.js");
|
||||
const FoundryProvider = require("./foundry.js");
|
||||
const GiteeAIProvider = require("./giteeai.js");
|
||||
const CohereProvider = require("./cohere.js");
|
||||
const DockerModelRunnerProvider = require("./dockerModelRunner.js");
|
||||
|
||||
module.exports = {
|
||||
OpenAIProvider,
|
||||
@@ -62,4 +63,5 @@ module.exports = {
|
||||
FoundryProvider,
|
||||
GiteeAIProvider,
|
||||
CohereProvider,
|
||||
DockerModelRunnerProvider,
|
||||
};
|
||||
|
||||
@@ -217,6 +217,12 @@ class AgentHandler {
|
||||
if (!process.env.COHERE_API_KEY)
|
||||
throw new Error("Cohere API key must be provided to use agents.");
|
||||
break;
|
||||
case "docker-model-runner":
|
||||
if (!process.env.DOCKER_MODEL_RUNNER_BASE_PATH)
|
||||
throw new Error(
|
||||
"Docker Model Runner base path must be provided to use agents."
|
||||
);
|
||||
break;
|
||||
default:
|
||||
throw new Error(
|
||||
"No workspace agent provider set. Please set your agent provider in the workspace's settings"
|
||||
@@ -297,6 +303,8 @@ class AgentHandler {
|
||||
return process.env.GITEE_AI_MODEL_PREF ?? null;
|
||||
case "cohere":
|
||||
return process.env.COHERE_MODEL_PREF ?? "command-r-08-2024";
|
||||
case "docker-model-runner":
|
||||
return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF ?? null;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ const { fetchPPIOModels } = require("../AiProviders/ppio");
|
||||
const { GeminiLLM } = require("../AiProviders/gemini");
|
||||
const { fetchCometApiModels } = require("../AiProviders/cometapi");
|
||||
const { parseFoundryBasePath } = require("../AiProviders/foundry");
|
||||
const { getDockerModels } = require("../AiProviders/dockerModelRunner");
|
||||
|
||||
const SUPPORT_CUSTOM_MODELS = [
|
||||
"openai",
|
||||
@@ -43,6 +44,7 @@ const SUPPORT_CUSTOM_MODELS = [
|
||||
"cohere",
|
||||
"zai",
|
||||
"giteeai",
|
||||
"docker-model-runner",
|
||||
// Embedding Engines
|
||||
"native-embedder",
|
||||
"cohere-embedder",
|
||||
@@ -116,6 +118,8 @@ async function getCustomModels(provider = "", apiKey = null, basePath = null) {
|
||||
return await getOpenRouterEmbeddingModels();
|
||||
case "giteeai":
|
||||
return await getGiteeAIModels(apiKey);
|
||||
case "docker-model-runner":
|
||||
return await getDockerModelRunnerModels(basePath);
|
||||
default:
|
||||
return { models: [], error: "Invalid provider for custom models" };
|
||||
}
|
||||
@@ -864,6 +868,19 @@ async function getOpenRouterEmbeddingModels() {
|
||||
return { models, error: null };
|
||||
}
|
||||
|
||||
async function getDockerModelRunnerModels(basePath = null) {
|
||||
try {
|
||||
const models = await getDockerModels(basePath);
|
||||
return { models, error: null };
|
||||
} catch (e) {
|
||||
console.error(`DockerModelRunner:getDockerModelRunnerModels`, e.message);
|
||||
return {
|
||||
models: [],
|
||||
error: "Could not fetch Docker Model Runner Models",
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getCustomModels,
|
||||
SUPPORT_CUSTOM_MODELS,
|
||||
|
||||
@@ -229,6 +229,11 @@ function getLLMProvider({ provider = null, model = null } = {}) {
|
||||
case "giteeai":
|
||||
const { GiteeAILLM } = require("../AiProviders/giteeai");
|
||||
return new GiteeAILLM(embedder, model);
|
||||
case "docker-model-runner":
|
||||
const {
|
||||
DockerModelRunnerLLM,
|
||||
} = require("../AiProviders/dockerModelRunner");
|
||||
return new DockerModelRunnerLLM(embedder, model);
|
||||
default:
|
||||
throw new Error(
|
||||
`ENV: No valid LLM_PROVIDER value found in environment! Using ${process.env.LLM_PROVIDER}`
|
||||
@@ -394,6 +399,11 @@ function getLLMProviderClass({ provider = null } = {}) {
|
||||
case "giteeai":
|
||||
const { GiteeAILLM } = require("../AiProviders/giteeai");
|
||||
return GiteeAILLM;
|
||||
case "docker-model-runner":
|
||||
const {
|
||||
DockerModelRunnerLLM,
|
||||
} = require("../AiProviders/dockerModelRunner");
|
||||
return DockerModelRunnerLLM;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
@@ -470,6 +480,8 @@ function getBaseLLMProviderModel({ provider = null } = {}) {
|
||||
return process.env.ZAI_MODEL_PREF;
|
||||
case "giteeai":
|
||||
return process.env.GITEE_AI_MODEL_PREF;
|
||||
case "docker-model-runner":
|
||||
return process.env.DOCKER_MODEL_RUNNER_LLM_MODEL_PREF;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
@@ -495,6 +507,30 @@ function toChunks(arr, size) {
|
||||
);
|
||||
}
|
||||
|
||||
function humanFileSize(bytes, si = false, dp = 1) {
|
||||
const thresh = si ? 1000 : 1024;
|
||||
|
||||
if (Math.abs(bytes) < thresh) {
|
||||
return bytes + " B";
|
||||
}
|
||||
|
||||
const units = si
|
||||
? ["kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"]
|
||||
: ["KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"];
|
||||
let u = -1;
|
||||
const r = 10 ** dp;
|
||||
|
||||
do {
|
||||
bytes /= thresh;
|
||||
++u;
|
||||
} while (
|
||||
Math.round(Math.abs(bytes) * r) / r >= thresh &&
|
||||
u < units.length - 1
|
||||
);
|
||||
|
||||
return bytes.toFixed(dp) + " " + units[u];
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
getEmbeddingEngineSelection,
|
||||
maximumChunkLength,
|
||||
@@ -503,4 +539,5 @@ module.exports = {
|
||||
getBaseLLMProviderModel,
|
||||
getLLMProvider,
|
||||
toChunks,
|
||||
humanFileSize,
|
||||
};
|
||||
|
||||
@@ -789,6 +789,20 @@ const KEY_MAPPING = {
|
||||
envKey: "GITEE_AI_MODEL_TOKEN_LIMIT",
|
||||
checks: [nonZero],
|
||||
},
|
||||
|
||||
// Docker Model Runner Options
|
||||
DockerModelRunnerBasePath: {
|
||||
envKey: "DOCKER_MODEL_RUNNER_BASE_PATH",
|
||||
checks: [isValidURL],
|
||||
},
|
||||
DockerModelRunnerModelPref: {
|
||||
envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_PREF",
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
DockerModelRunnerModelTokenLimit: {
|
||||
envKey: "DOCKER_MODEL_RUNNER_LLM_MODEL_TOKEN_LIMIT",
|
||||
checks: [nonZero],
|
||||
},
|
||||
};
|
||||
|
||||
function isNotEmpty(input = "") {
|
||||
@@ -902,6 +916,7 @@ function supportedLLM(input = "") {
|
||||
"foundry",
|
||||
"zai",
|
||||
"giteeai",
|
||||
"docker-model-runner",
|
||||
].includes(input);
|
||||
return validSelection ? null : `${input} is not a valid LLM provider.`;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user