mirror of
https://github.com/Mintplex-Labs/anything-llm
synced 2026-04-25 17:15:37 +02:00
add jina embedding provider
This commit is contained in:
104
frontend/src/components/EmbeddingSelection/JinaOptions/index.jsx
Normal file
104
frontend/src/components/EmbeddingSelection/JinaOptions/index.jsx
Normal file
@@ -0,0 +1,104 @@
|
||||
import React, { useState } from "react";
|
||||
import { CaretDown, CaretUp } from "@phosphor-icons/react";
|
||||
|
||||
export default function JinaOptions({ settings }) {
|
||||
const [showAdvancedControls, setShowAdvancedControls] = useState(false);
|
||||
return (
|
||||
<div className="w-full flex flex-col gap-y-7">
|
||||
<div className="w-full flex items-center gap-[36px] mt-1.5 flex-wrap">
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
API Key
|
||||
</label>
|
||||
<input
|
||||
type="password"
|
||||
name="JinaApiKey"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="Jina API Key"
|
||||
defaultValue={settings?.JinaApiKey ? "*".repeat(20) : ""}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Model Preference
|
||||
</label>
|
||||
<select
|
||||
name="EmbeddingModelPref"
|
||||
required={true}
|
||||
defaultValue={settings?.EmbeddingModelPref}
|
||||
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<optgroup label="Available embedding models">
|
||||
{[
|
||||
"jina-embeddings-v3",
|
||||
"jina-embeddings-v2-base-en",
|
||||
"jina-embeddings-v2-base-zh",
|
||||
"jina-embeddings-v2-base-de",
|
||||
"jina-embeddings-v2-base-es",
|
||||
"jina-embeddings-v2-base-code",
|
||||
"jina-clip-v2",
|
||||
"jina-clip-v1",
|
||||
].map((model) => {
|
||||
return (
|
||||
<option key={model} value={model}>
|
||||
{model}
|
||||
</option>
|
||||
);
|
||||
})}
|
||||
</optgroup>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
<div className="flex items-center gap-x-3">
|
||||
<button
|
||||
type="button"
|
||||
onClick={() => setShowAdvancedControls(!showAdvancedControls)}
|
||||
className="flex items-center gap-x-2 text-white text-sm font-semibold"
|
||||
>
|
||||
Advanced Settings
|
||||
{showAdvancedControls ? (
|
||||
<CaretUp size={16} weight="bold" />
|
||||
) : (
|
||||
<CaretDown size={16} weight="bold" />
|
||||
)}
|
||||
</button>
|
||||
</div>
|
||||
{showAdvancedControls && (
|
||||
<div className="flex flex-col gap-y-4">
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Task Type
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
name="JinaTask"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="e.g. retrieval.document"
|
||||
defaultValue={settings?.JinaTask}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-3">
|
||||
Max Chunk Length
|
||||
</label>
|
||||
<input
|
||||
type="number"
|
||||
name="EmbeddingModelMaxChunkLength"
|
||||
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||
placeholder="8192"
|
||||
defaultValue={settings?.EmbeddingModelMaxChunkLength || 8192}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
||||
BIN
frontend/src/media/embeddingprovider/jina.png
Normal file
BIN
frontend/src/media/embeddingprovider/jina.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 5.0 KiB |
@@ -15,6 +15,7 @@ import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
|
||||
import LiteLLMLogo from "@/media/llmprovider/litellm.png";
|
||||
import GenericOpenAiLogo from "@/media/llmprovider/generic-openai.png";
|
||||
import MistralAiLogo from "@/media/llmprovider/mistral.jpeg";
|
||||
import JinaAiLogo from "@/media/embeddingprovider/jina.png";
|
||||
|
||||
import PreLoader from "@/components/Preloader";
|
||||
import ChangeWarningModal from "@/components/ChangeWarning";
|
||||
@@ -29,6 +30,7 @@ import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOption
|
||||
import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions";
|
||||
import LiteLLMOptions from "@/components/EmbeddingSelection/LiteLLMOptions";
|
||||
import GenericOpenAiEmbeddingOptions from "@/components/EmbeddingSelection/GenericOpenAiOptions";
|
||||
import JinaOptions from "@/components/EmbeddingSelection/JinaOptions";
|
||||
|
||||
import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
|
||||
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
|
||||
@@ -127,6 +129,13 @@ const EMBEDDERS = [
|
||||
),
|
||||
description: "Run embedding models from any OpenAI compatible API service.",
|
||||
},
|
||||
{
|
||||
name: "Jina AI",
|
||||
value: "jina",
|
||||
logo: JinaAiLogo,
|
||||
options: (settings) => <JinaOptions settings={settings} />,
|
||||
description: "Run powerful multilingual embedding models from Jina AI.",
|
||||
},
|
||||
];
|
||||
|
||||
export default function GeneralEmbeddingPreference() {
|
||||
|
||||
@@ -38,6 +38,7 @@ import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
|
||||
import PPIOLogo from "@/media/llmprovider/ppio.png";
|
||||
import PGVectorLogo from "@/media/vectordbs/pgvector.png";
|
||||
import DPAISLogo from "@/media/llmprovider/dpais.png";
|
||||
import JinaAiLogo from "@/media/embeddingprovider/jina.png";
|
||||
import React, { useState, useEffect } from "react";
|
||||
import paths from "@/utils/paths";
|
||||
import { useNavigate } from "react-router-dom";
|
||||
@@ -187,6 +188,14 @@ export const LLM_SELECTION_PRIVACY = {
|
||||
],
|
||||
logo: GenericOpenAiLogo,
|
||||
},
|
||||
jina: {
|
||||
name: "Jina AI",
|
||||
description: [
|
||||
"Your document text is sent to Jina AI's servers for processing",
|
||||
"Your data is handled according to Jina AI's terms of service and privacy policy",
|
||||
],
|
||||
logo: JinaAiLogo,
|
||||
},
|
||||
cohere: {
|
||||
name: "Cohere",
|
||||
description: [
|
||||
@@ -393,6 +402,14 @@ export const EMBEDDING_ENGINE_PRIVACY = {
|
||||
],
|
||||
logo: GenericOpenAiLogo,
|
||||
},
|
||||
jina: {
|
||||
name: "Jina AI",
|
||||
description: [
|
||||
"Your document text is sent to Jina AI's servers for processing",
|
||||
"Your data is handled according to Jina AI's terms of service and privacy policy",
|
||||
],
|
||||
logo: JinaAiLogo,
|
||||
},
|
||||
gemini: {
|
||||
name: "Google Gemini",
|
||||
description: [
|
||||
@@ -493,8 +510,10 @@ export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
|
||||
</p>
|
||||
</div>
|
||||
<ul className="flex flex-col list-disc ml-4">
|
||||
{LLMSelection.description.map((desc) => (
|
||||
<li className="text-theme-text-primary text-sm">{desc}</li>
|
||||
{LLMSelection.description.map((desc, index) => (
|
||||
<li key={index} className="text-theme-text-secondary text-sm">
|
||||
{desc}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
@@ -513,8 +532,10 @@ export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
|
||||
</p>
|
||||
</div>
|
||||
<ul className="flex flex-col list-disc ml-4">
|
||||
{EmbeddingEngine.description.map((desc) => (
|
||||
<li className="text-theme-text-primary text-sm">{desc}</li>
|
||||
{EmbeddingEngine.description.map((desc, index) => (
|
||||
<li key={index} className="text-theme-text-secondary text-sm">
|
||||
{desc}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
@@ -534,8 +555,10 @@ export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
|
||||
</p>
|
||||
</div>
|
||||
<ul className="flex flex-col list-disc ml-4">
|
||||
{VectorDb.description.map((desc) => (
|
||||
<li className="text-theme-text-primary text-sm">{desc}</li>
|
||||
{VectorDb.description.map((desc, index) => (
|
||||
<li key={index} className="text-theme-text-secondary text-sm">
|
||||
{desc}
|
||||
</li>
|
||||
))}
|
||||
</ul>
|
||||
</div>
|
||||
|
||||
@@ -211,6 +211,8 @@ const SystemSettings = {
|
||||
EmbeddingModelMaxChunkLength:
|
||||
process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH,
|
||||
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
|
||||
JinaApiKey: !!process.env.JINA_API_KEY,
|
||||
JinaTask: process.env.JINA_TASK,
|
||||
GenericOpenAiEmbeddingApiKey:
|
||||
!!process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY,
|
||||
GenericOpenAiEmbeddingMaxConcurrentChunks:
|
||||
@@ -336,7 +338,7 @@ const SystemSettings = {
|
||||
const updatePromises = [];
|
||||
for (const key of Object.keys(updates)) {
|
||||
let validatedValue = updates[key];
|
||||
if (this.validations.hasOwnProperty(key)) {
|
||||
if (Object.prototype.hasOwnProperty.call(this.validations, key)) {
|
||||
if (this.validations[key].constructor.name === "AsyncFunction") {
|
||||
validatedValue = await this.validations[key](updates[key]);
|
||||
} else {
|
||||
|
||||
133
server/utils/EmbeddingEngines/jina/index.js
Normal file
133
server/utils/EmbeddingEngines/jina/index.js
Normal file
@@ -0,0 +1,133 @@
|
||||
const { toChunks, maximumChunkLength } = require("../../helpers");
|
||||
|
||||
class JinaEmbedder {
|
||||
constructor() {
|
||||
this.basePath = "https://api.jina.ai/v1";
|
||||
this.apiKey = process.env.JINA_API_KEY ?? null;
|
||||
this.model = process.env.EMBEDDING_MODEL_PREF ?? "jina-embeddings-v3";
|
||||
this.task = process.env.JINA_TASK ?? null;
|
||||
this.embeddingMaxChunkLength = maximumChunkLength();
|
||||
|
||||
// this.maxConcurrentChunks is delegated to the getter below.
|
||||
// Refer to your specific model and provider you use this class with to determine a valid maxChunkLength
|
||||
this.log(`Initialized ${this.model}`, {
|
||||
baseURL: this.basePath,
|
||||
maxConcurrentChunks: this.maxConcurrentChunks,
|
||||
embeddingMaxChunkLength: this.embeddingMaxChunkLength,
|
||||
});
|
||||
}
|
||||
|
||||
log(text, ...args) {
|
||||
console.log(`\x1b[36m[JinaEmbedder]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
/**
|
||||
* returns the `GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS` env variable as a number
|
||||
* or 500 if the env variable is not set or is not a number.
|
||||
* @returns {number}
|
||||
*/
|
||||
get maxConcurrentChunks() {
|
||||
if (!process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS)
|
||||
return 500;
|
||||
if (
|
||||
isNaN(Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS))
|
||||
)
|
||||
return 500;
|
||||
return Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS);
|
||||
}
|
||||
|
||||
async embedTextInput(textInput) {
|
||||
const result = await this.embedChunks(
|
||||
Array.isArray(textInput) ? textInput : [textInput]
|
||||
);
|
||||
return result?.[0] || [];
|
||||
}
|
||||
|
||||
async embedChunks(textChunks = []) {
|
||||
// Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
|
||||
// we concurrently execute each max batch of text chunks possible.
|
||||
// Refer to constructor maxConcurrentChunks for more info.
|
||||
const embeddingRequests = [];
|
||||
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
|
||||
embeddingRequests.push(
|
||||
new Promise((resolve) => {
|
||||
(async () => {
|
||||
// We are using a fetch request here because the current openai library
|
||||
// does not support the Jina API
|
||||
try {
|
||||
const response = await fetch(`${this.basePath}/embeddings`, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${this.apiKey}`,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: this.model,
|
||||
input: chunk,
|
||||
...(this.task ? { task: this.task } : {}),
|
||||
}),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const error = await response.json();
|
||||
throw {
|
||||
type: error?.error?.code || response.status,
|
||||
message: error?.error?.message || response.statusText,
|
||||
};
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
resolve({ data: result?.data, error: null });
|
||||
} catch (e) {
|
||||
resolve({
|
||||
data: [],
|
||||
error: {
|
||||
type: e?.type || "failed_to_embed",
|
||||
message: e?.message || "Failed to embed text",
|
||||
},
|
||||
});
|
||||
}
|
||||
})();
|
||||
})
|
||||
);
|
||||
}
|
||||
|
||||
const { data = [], error = null } = await Promise.all(
|
||||
embeddingRequests
|
||||
).then((results) => {
|
||||
// If any errors were returned from OpenAI abort the entire sequence because the embeddings
|
||||
// will be incomplete.
|
||||
const errors = results
|
||||
.filter((res) => !!res.error)
|
||||
.map((res) => res.error)
|
||||
.flat();
|
||||
if (errors.length > 0) {
|
||||
let uniqueErrors = new Set();
|
||||
errors.map((error) =>
|
||||
uniqueErrors.add(`[${error.type}]: ${error.message}`)
|
||||
);
|
||||
|
||||
return {
|
||||
data: [],
|
||||
error: Array.from(uniqueErrors).join(", "),
|
||||
};
|
||||
}
|
||||
return {
|
||||
data: results.map((res) => res?.data || []).flat(),
|
||||
error: null,
|
||||
};
|
||||
});
|
||||
|
||||
if (!!error) throw new Error(`Jina Failed to embed: ${error}`);
|
||||
return data.length > 0 &&
|
||||
data.every((embd) =>
|
||||
Object.prototype.hasOwnProperty.call(embd, "embedding")
|
||||
)
|
||||
? data.map((embd) => embd.embedding)
|
||||
: null;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
JinaEmbedder,
|
||||
};
|
||||
@@ -260,6 +260,9 @@ function getEmbeddingEngineSelection() {
|
||||
case "gemini":
|
||||
const { GeminiEmbedder } = require("../EmbeddingEngines/gemini");
|
||||
return new GeminiEmbedder();
|
||||
case "jina":
|
||||
const { JinaEmbedder } = require("../EmbeddingEngines/jina");
|
||||
return new JinaEmbedder();
|
||||
default:
|
||||
return new NativeEmbedder();
|
||||
}
|
||||
|
||||
@@ -301,6 +301,16 @@ const KEY_MAPPING = {
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
|
||||
// Jina Embedding Settings
|
||||
JinaApiKey: {
|
||||
envKey: "JINA_API_KEY",
|
||||
checks: [isNotEmpty],
|
||||
},
|
||||
JinaTask: {
|
||||
envKey: "JINA_TASK",
|
||||
checks: [],
|
||||
},
|
||||
|
||||
// Generic OpenAI Embedding Settings
|
||||
GenericOpenAiEmbeddingApiKey: {
|
||||
envKey: "GENERIC_OPEN_AI_EMBEDDING_API_KEY",
|
||||
@@ -817,6 +827,7 @@ function supportedEmbeddingModel(input = "") {
|
||||
"litellm",
|
||||
"generic-openai",
|
||||
"mistral",
|
||||
"jina",
|
||||
];
|
||||
return supported.includes(input)
|
||||
? null
|
||||
|
||||
Reference in New Issue
Block a user