add jina embedding provider

This commit is contained in:
shatfield4
2025-06-05 17:00:52 -07:00
parent ef0928993e
commit 2ef2419056
8 changed files with 292 additions and 7 deletions

View File

@@ -0,0 +1,104 @@
import React, { useState } from "react";
import { CaretDown, CaretUp } from "@phosphor-icons/react";
export default function JinaOptions({ settings }) {
const [showAdvancedControls, setShowAdvancedControls] = useState(false);
return (
<div className="w-full flex flex-col gap-y-7">
<div className="w-full flex items-center gap-[36px] mt-1.5 flex-wrap">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
API Key
</label>
<input
type="password"
name="JinaApiKey"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="Jina API Key"
defaultValue={settings?.JinaApiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Model Preference
</label>
<select
name="EmbeddingModelPref"
required={true}
defaultValue={settings?.EmbeddingModelPref}
className="border-none bg-theme-settings-input-bg border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<optgroup label="Available embedding models">
{[
"jina-embeddings-v3",
"jina-embeddings-v2-base-en",
"jina-embeddings-v2-base-zh",
"jina-embeddings-v2-base-de",
"jina-embeddings-v2-base-es",
"jina-embeddings-v2-base-code",
"jina-clip-v2",
"jina-clip-v1",
].map((model) => {
return (
<option key={model} value={model}>
{model}
</option>
);
})}
</optgroup>
</select>
</div>
</div>
<div className="flex items-center gap-x-3">
<button
type="button"
onClick={() => setShowAdvancedControls(!showAdvancedControls)}
className="flex items-center gap-x-2 text-white text-sm font-semibold"
>
Advanced Settings
{showAdvancedControls ? (
<CaretUp size={16} weight="bold" />
) : (
<CaretDown size={16} weight="bold" />
)}
</button>
</div>
{showAdvancedControls && (
<div className="flex flex-col gap-y-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Task Type
</label>
<input
type="text"
name="JinaTask"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="e.g. retrieval.document"
defaultValue={settings?.JinaTask}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
Max Chunk Length
</label>
<input
type="number"
name="EmbeddingModelMaxChunkLength"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="8192"
defaultValue={settings?.EmbeddingModelMaxChunkLength || 8192}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
</div>
)}
</div>
);
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

View File

@@ -15,6 +15,7 @@ import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
import LiteLLMLogo from "@/media/llmprovider/litellm.png";
import GenericOpenAiLogo from "@/media/llmprovider/generic-openai.png";
import MistralAiLogo from "@/media/llmprovider/mistral.jpeg";
import JinaAiLogo from "@/media/embeddingprovider/jina.png";
import PreLoader from "@/components/Preloader";
import ChangeWarningModal from "@/components/ChangeWarning";
@@ -29,6 +30,7 @@ import CohereEmbeddingOptions from "@/components/EmbeddingSelection/CohereOption
import VoyageAiOptions from "@/components/EmbeddingSelection/VoyageAiOptions";
import LiteLLMOptions from "@/components/EmbeddingSelection/LiteLLMOptions";
import GenericOpenAiEmbeddingOptions from "@/components/EmbeddingSelection/GenericOpenAiOptions";
import JinaOptions from "@/components/EmbeddingSelection/JinaOptions";
import EmbedderItem from "@/components/EmbeddingSelection/EmbedderItem";
import { CaretUpDown, MagnifyingGlass, X } from "@phosphor-icons/react";
@@ -127,6 +129,13 @@ const EMBEDDERS = [
),
description: "Run embedding models from any OpenAI compatible API service.",
},
{
name: "Jina AI",
value: "jina",
logo: JinaAiLogo,
options: (settings) => <JinaOptions settings={settings} />,
description: "Run powerful multilingual embedding models from Jina AI.",
},
];
export default function GeneralEmbeddingPreference() {

View File

@@ -38,6 +38,7 @@ import VoyageAiLogo from "@/media/embeddingprovider/voyageai.png";
import PPIOLogo from "@/media/llmprovider/ppio.png";
import PGVectorLogo from "@/media/vectordbs/pgvector.png";
import DPAISLogo from "@/media/llmprovider/dpais.png";
import JinaAiLogo from "@/media/embeddingprovider/jina.png";
import React, { useState, useEffect } from "react";
import paths from "@/utils/paths";
import { useNavigate } from "react-router-dom";
@@ -187,6 +188,14 @@ export const LLM_SELECTION_PRIVACY = {
],
logo: GenericOpenAiLogo,
},
jina: {
name: "Jina AI",
description: [
"Your document text is sent to Jina AI's servers for processing",
"Your data is handled according to Jina AI's terms of service and privacy policy",
],
logo: JinaAiLogo,
},
cohere: {
name: "Cohere",
description: [
@@ -393,6 +402,14 @@ export const EMBEDDING_ENGINE_PRIVACY = {
],
logo: GenericOpenAiLogo,
},
jina: {
name: "Jina AI",
description: [
"Your document text is sent to Jina AI's servers for processing",
"Your data is handled according to Jina AI's terms of service and privacy policy",
],
logo: JinaAiLogo,
},
gemini: {
name: "Google Gemini",
description: [
@@ -493,8 +510,10 @@ export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
</p>
</div>
<ul className="flex flex-col list-disc ml-4">
{LLMSelection.description.map((desc) => (
<li className="text-theme-text-primary text-sm">{desc}</li>
{LLMSelection.description.map((desc, index) => (
<li key={index} className="text-theme-text-secondary text-sm">
{desc}
</li>
))}
</ul>
</div>
@@ -513,8 +532,10 @@ export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
</p>
</div>
<ul className="flex flex-col list-disc ml-4">
{EmbeddingEngine.description.map((desc) => (
<li className="text-theme-text-primary text-sm">{desc}</li>
{EmbeddingEngine.description.map((desc, index) => (
<li key={index} className="text-theme-text-secondary text-sm">
{desc}
</li>
))}
</ul>
</div>
@@ -534,8 +555,10 @@ export default function DataHandling({ setHeader, setForwardBtn, setBackBtn }) {
</p>
</div>
<ul className="flex flex-col list-disc ml-4">
{VectorDb.description.map((desc) => (
<li className="text-theme-text-primary text-sm">{desc}</li>
{VectorDb.description.map((desc, index) => (
<li key={index} className="text-theme-text-secondary text-sm">
{desc}
</li>
))}
</ul>
</div>

View File

@@ -211,6 +211,8 @@ const SystemSettings = {
EmbeddingModelMaxChunkLength:
process.env.EMBEDDING_MODEL_MAX_CHUNK_LENGTH,
VoyageAiApiKey: !!process.env.VOYAGEAI_API_KEY,
JinaApiKey: !!process.env.JINA_API_KEY,
JinaTask: process.env.JINA_TASK,
GenericOpenAiEmbeddingApiKey:
!!process.env.GENERIC_OPEN_AI_EMBEDDING_API_KEY,
GenericOpenAiEmbeddingMaxConcurrentChunks:
@@ -336,7 +338,7 @@ const SystemSettings = {
const updatePromises = [];
for (const key of Object.keys(updates)) {
let validatedValue = updates[key];
if (this.validations.hasOwnProperty(key)) {
if (Object.prototype.hasOwnProperty.call(this.validations, key)) {
if (this.validations[key].constructor.name === "AsyncFunction") {
validatedValue = await this.validations[key](updates[key]);
} else {

View File

@@ -0,0 +1,133 @@
const { toChunks, maximumChunkLength } = require("../../helpers");
class JinaEmbedder {
constructor() {
this.basePath = "https://api.jina.ai/v1";
this.apiKey = process.env.JINA_API_KEY ?? null;
this.model = process.env.EMBEDDING_MODEL_PREF ?? "jina-embeddings-v3";
this.task = process.env.JINA_TASK ?? null;
this.embeddingMaxChunkLength = maximumChunkLength();
// this.maxConcurrentChunks is delegated to the getter below.
// Refer to your specific model and provider you use this class with to determine a valid maxChunkLength
this.log(`Initialized ${this.model}`, {
baseURL: this.basePath,
maxConcurrentChunks: this.maxConcurrentChunks,
embeddingMaxChunkLength: this.embeddingMaxChunkLength,
});
}
log(text, ...args) {
console.log(`\x1b[36m[JinaEmbedder]\x1b[0m ${text}`, ...args);
}
/**
* returns the `GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS` env variable as a number
* or 500 if the env variable is not set or is not a number.
* @returns {number}
*/
get maxConcurrentChunks() {
if (!process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS)
return 500;
if (
isNaN(Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS))
)
return 500;
return Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS);
}
async embedTextInput(textInput) {
const result = await this.embedChunks(
Array.isArray(textInput) ? textInput : [textInput]
);
return result?.[0] || [];
}
async embedChunks(textChunks = []) {
// Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
// we concurrently execute each max batch of text chunks possible.
// Refer to constructor maxConcurrentChunks for more info.
const embeddingRequests = [];
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
embeddingRequests.push(
new Promise((resolve) => {
(async () => {
// We are using a fetch request here because the current openai library
// does not support the Jina API
try {
const response = await fetch(`${this.basePath}/embeddings`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${this.apiKey}`,
},
body: JSON.stringify({
model: this.model,
input: chunk,
...(this.task ? { task: this.task } : {}),
}),
});
if (!response.ok) {
const error = await response.json();
throw {
type: error?.error?.code || response.status,
message: error?.error?.message || response.statusText,
};
}
const result = await response.json();
resolve({ data: result?.data, error: null });
} catch (e) {
resolve({
data: [],
error: {
type: e?.type || "failed_to_embed",
message: e?.message || "Failed to embed text",
},
});
}
})();
})
);
}
const { data = [], error = null } = await Promise.all(
embeddingRequests
).then((results) => {
// If any errors were returned from OpenAI abort the entire sequence because the embeddings
// will be incomplete.
const errors = results
.filter((res) => !!res.error)
.map((res) => res.error)
.flat();
if (errors.length > 0) {
let uniqueErrors = new Set();
errors.map((error) =>
uniqueErrors.add(`[${error.type}]: ${error.message}`)
);
return {
data: [],
error: Array.from(uniqueErrors).join(", "),
};
}
return {
data: results.map((res) => res?.data || []).flat(),
error: null,
};
});
if (!!error) throw new Error(`Jina Failed to embed: ${error}`);
return data.length > 0 &&
data.every((embd) =>
Object.prototype.hasOwnProperty.call(embd, "embedding")
)
? data.map((embd) => embd.embedding)
: null;
}
}
module.exports = {
JinaEmbedder,
};

View File

@@ -260,6 +260,9 @@ function getEmbeddingEngineSelection() {
case "gemini":
const { GeminiEmbedder } = require("../EmbeddingEngines/gemini");
return new GeminiEmbedder();
case "jina":
const { JinaEmbedder } = require("../EmbeddingEngines/jina");
return new JinaEmbedder();
default:
return new NativeEmbedder();
}

View File

@@ -301,6 +301,16 @@ const KEY_MAPPING = {
checks: [isNotEmpty],
},
// Jina Embedding Settings
JinaApiKey: {
envKey: "JINA_API_KEY",
checks: [isNotEmpty],
},
JinaTask: {
envKey: "JINA_TASK",
checks: [],
},
// Generic OpenAI Embedding Settings
GenericOpenAiEmbeddingApiKey: {
envKey: "GENERIC_OPEN_AI_EMBEDDING_API_KEY",
@@ -817,6 +827,7 @@ function supportedEmbeddingModel(input = "") {
"litellm",
"generic-openai",
"mistral",
"jina",
];
return supported.includes(input)
? null