diff --git a/server/utils/vectorDbProviders/astra/index.js b/server/utils/vectorDbProviders/astra/index.js index 22d83b15d..b1c281016 100644 --- a/server/utils/vectorDbProviders/astra/index.js +++ b/server/utils/vectorDbProviders/astra/index.js @@ -7,6 +7,7 @@ const { getLLMProvider, getEmbeddingEngineSelection, } = require("../../helpers"); +const { GraphManager } = require("../../graphManager"); const AstraDB = { name: "AstraDB", @@ -282,8 +283,10 @@ const AstraDB = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const result = { + allTexts: [], contextTexts: [], sourceDocuments: [], scores: [], @@ -302,11 +305,27 @@ const AstraDB = { .toArray(); responses.forEach((response) => { + response.metadata.text + ? result.allTexts.push(response.metadata.text) + : null; if (response.$similarity < similarityThreshold) return; result.contextTexts.push(response.metadata.text); result.sourceDocuments.push(response); result.scores.push(response.$similarity); }); + + // Only attempt to expand the original question if we found at least _something_ from the vectorDB + // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. + if (useKGExpansion && result.allTexts.length > 0) { + const expansionTexts = textQuery + ? [textQuery, ...result.allTexts] + : result.allTexts; + result.contextTexts = await new GraphManager().knowledgeGraphSearch( + namespace, + expansionTexts + ); + } + return result; }, allNamespaces: async function (client) { diff --git a/server/utils/vectorDbProviders/chroma/index.js b/server/utils/vectorDbProviders/chroma/index.js index 1529dcab1..1c6b47940 100644 --- a/server/utils/vectorDbProviders/chroma/index.js +++ b/server/utils/vectorDbProviders/chroma/index.js @@ -7,6 +7,7 @@ const { getLLMProvider, getEmbeddingEngineSelection, } = require("../../helpers"); +const { GraphManager } = require("../../graphManager"); const Chroma = { name: "Chroma", @@ -70,9 +71,11 @@ const Chroma = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const collection = await client.getCollection({ name: namespace }); const result = { + allTexts: [], contextTexts: [], sourceDocuments: [], scores: [], @@ -82,7 +85,11 @@ const Chroma = { queryEmbeddings: queryVector, nResults: topN, }); + response.ids[0].forEach((_, i) => { + response.documents[0][i] + ? result.allTexts.push(response.documents[0][i]) + : null; if ( this.distanceToSimilarity(response.distances[0][i]) < similarityThreshold @@ -93,6 +100,18 @@ const Chroma = { result.scores.push(this.distanceToSimilarity(response.distances[0][i])); }); + // Only attempt to expand the original question if we found at least _something_ from the vectorDB + // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. + if (useKGExpansion && result.allTexts.length > 0) { + const expansionTexts = textQuery + ? [textQuery, ...result.allTexts] + : result.allTexts; + result.contextTexts = await new GraphManager().knowledgeGraphSearch( + namespace, + expansionTexts + ); + } + return result; }, namespace: async function (client, namespace = null) { diff --git a/server/utils/vectorDbProviders/lance/index.js b/server/utils/vectorDbProviders/lance/index.js index 3ebb80d82..127150863 100644 --- a/server/utils/vectorDbProviders/lance/index.js +++ b/server/utils/vectorDbProviders/lance/index.js @@ -66,6 +66,7 @@ const LanceDb = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const collection = await client.openTable(namespace); const result = { @@ -92,7 +93,7 @@ const LanceDb = { // Only attempt to expand the original question if we found at least _something_ from the vectorDB // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. - if (result.allTexts.length > 0) { + if (useKGExpansion && result.allTexts.length > 0) { const expansionTexts = textQuery ? [textQuery, ...result.allTexts] : result.allTexts; diff --git a/server/utils/vectorDbProviders/milvus/index.js b/server/utils/vectorDbProviders/milvus/index.js index 63327bbce..74d8c4d61 100644 --- a/server/utils/vectorDbProviders/milvus/index.js +++ b/server/utils/vectorDbProviders/milvus/index.js @@ -12,6 +12,7 @@ const { getLLMProvider, getEmbeddingEngineSelection, } = require("../../helpers"); +const { GraphManager } = require("../../graphManager"); const Milvus = { name: "Milvus", @@ -313,8 +314,10 @@ const Milvus = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const result = { + allTexts: [], contextTexts: [], sourceDocuments: [], scores: [], @@ -324,12 +327,27 @@ const Milvus = { vectors: queryVector, limit: topN, }); + response.results.forEach((match) => { + match.metadata.text ? result.allTexts.push(match.metadata.text) : null; if (match.score < similarityThreshold) return; result.contextTexts.push(match.metadata.text); result.sourceDocuments.push(match); result.scores.push(match.score); }); + + // Only attempt to expand the original question if we found at least _something_ from the vectorDB + // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. + if (useKGExpansion && result.allTexts.length > 0) { + const expansionTexts = textQuery + ? [textQuery, ...result.allTexts] + : result.allTexts; + result.contextTexts = await new GraphManager().knowledgeGraphSearch( + namespace, + expansionTexts + ); + } + return result; }, "namespace-stats": async function (reqBody = {}) { diff --git a/server/utils/vectorDbProviders/pinecone/index.js b/server/utils/vectorDbProviders/pinecone/index.js index d43448610..bd2e2e76d 100644 --- a/server/utils/vectorDbProviders/pinecone/index.js +++ b/server/utils/vectorDbProviders/pinecone/index.js @@ -7,6 +7,7 @@ const { getLLMProvider, getEmbeddingEngineSelection, } = require("../../helpers"); +const { GraphManager } = require("../../graphManager"); const PineconeDB = { name: "Pinecone", @@ -45,8 +46,10 @@ const PineconeDB = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const result = { + allTexts: [], contextTexts: [], sourceDocuments: [], scores: [], @@ -60,12 +63,25 @@ const PineconeDB = { }); response.matches.forEach((match) => { + match.metadata.text ? result.allTexts.push(match.metadata.text) : null; if (match.score < similarityThreshold) return; result.contextTexts.push(match.metadata.text); result.sourceDocuments.push(match); result.scores.push(match.score); }); + // Only attempt to expand the original question if we found at least _something_ from the vectorDB + // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. + if (useKGExpansion && result.allTexts.length > 0) { + const expansionTexts = textQuery + ? [textQuery, ...result.allTexts] + : result.allTexts; + result.contextTexts = await new GraphManager().knowledgeGraphSearch( + namespace, + expansionTexts + ); + } + return result; }, namespace: async function (index, namespace = null) { diff --git a/server/utils/vectorDbProviders/qdrant/index.js b/server/utils/vectorDbProviders/qdrant/index.js index 313c7e4b7..08b0874c1 100644 --- a/server/utils/vectorDbProviders/qdrant/index.js +++ b/server/utils/vectorDbProviders/qdrant/index.js @@ -7,6 +7,7 @@ const { getLLMProvider, getEmbeddingEngineSelection, } = require("../../helpers"); +const { GraphManager } = require("../../graphManager"); const QDrant = { name: "QDrant", @@ -56,9 +57,11 @@ const QDrant = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const { client } = await this.connect(); const result = { + allTexts: [], contextTexts: [], sourceDocuments: [], scores: [], @@ -71,6 +74,9 @@ const QDrant = { }); responses.forEach((response) => { + response?.payload?.text + ? result.allTexts.push(response.payload.text) + : null; if (response.score < similarityThreshold) return; result.contextTexts.push(response?.payload?.text || ""); result.sourceDocuments.push({ @@ -80,6 +86,18 @@ const QDrant = { result.scores.push(response.score); }); + // Only attempt to expand the original question if we found at least _something_ from the vectorDB + // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. + if (useKGExpansion && result.allTexts.length > 0) { + const expansionTexts = textQuery + ? [textQuery, ...result.allTexts] + : result.allTexts; + result.contextTexts = await new GraphManager().knowledgeGraphSearch( + namespace, + expansionTexts + ); + } + return result; }, namespace: async function (client, namespace = null) { diff --git a/server/utils/vectorDbProviders/weaviate/index.js b/server/utils/vectorDbProviders/weaviate/index.js index 51a554780..1535204d7 100644 --- a/server/utils/vectorDbProviders/weaviate/index.js +++ b/server/utils/vectorDbProviders/weaviate/index.js @@ -8,6 +8,7 @@ const { getEmbeddingEngineSelection, } = require("../../helpers"); const { camelCase } = require("../../helpers/camelcase"); +const { GraphManager } = require("../../graphManager"); const Weaviate = { name: "Weaviate", @@ -83,8 +84,10 @@ const Weaviate = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const result = { + allTexts: [], contextTexts: [], sourceDocuments: [], scores: [], @@ -108,12 +111,25 @@ const Weaviate = { _additional: { id, certainty }, ...rest } = response; + rest.text ? result.allTexts.push(rest.text) : null; if (certainty < similarityThreshold) return; result.contextTexts.push(rest.text); result.sourceDocuments.push({ ...rest, id }); result.scores.push(certainty); }); + // Only attempt to expand the original question if we found at least _something_ from the vectorDB + // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. + if (useKGExpansion && result.allTexts.length > 0) { + const expansionTexts = textQuery + ? [textQuery, ...result.allTexts] + : result.allTexts; + result.contextTexts = await new GraphManager().knowledgeGraphSearch( + namespace, + expansionTexts + ); + } + return result; }, allNamespaces: async function (client) { diff --git a/server/utils/vectorDbProviders/zilliz/index.js b/server/utils/vectorDbProviders/zilliz/index.js index dbac156b4..775043e87 100644 --- a/server/utils/vectorDbProviders/zilliz/index.js +++ b/server/utils/vectorDbProviders/zilliz/index.js @@ -12,6 +12,7 @@ const { getLLMProvider, getEmbeddingEngineSelection, } = require("../../helpers"); +const { GraphManager } = require("../../graphManager"); // Zilliz is basically a copy of Milvus DB class with a different constructor // to connect to the cloud @@ -314,8 +315,10 @@ const Zilliz = { similarityThreshold = 0.25, topN = 4, textQuery = null, + useKGExpansion = false, }) { const result = { + allTexts: [], contextTexts: [], sourceDocuments: [], scores: [], @@ -325,12 +328,27 @@ const Zilliz = { vectors: queryVector, limit: topN, }); + response.results.forEach((match) => { + match.metadata.text ? result.allTexts.push(match.metadata.text) : null; if (match.score < similarityThreshold) return; result.contextTexts.push(match.metadata.text); result.sourceDocuments.push(match); result.scores.push(match.score); }); + + // Only attempt to expand the original question if we found at least _something_ from the vectorDB + // even if it was filtered out by score - because then there is a chance we can expand on it and save the query. + if (useKGExpansion && result.allTexts.length > 0) { + const expansionTexts = textQuery + ? [textQuery, ...result.allTexts] + : result.allTexts; + result.contextTexts = await new GraphManager().knowledgeGraphSearch( + namespace, + expansionTexts + ); + } + return result; }, "namespace-stats": async function (reqBody = {}) {