sync similarityResponse method with all vdbs

2026-04-25 17:15:37 +02:00 · 2024-02-16 11:59:45 -08:00
parent c9b0aa6fa3
commit 69cd5e98d7
8 changed files with 126 additions and 1 deletions
--- a/server/utils/vectorDbProviders/astra/index.js
+++ b/server/utils/vectorDbProviders/astra/index.js
@@ -7,6 +7,7 @@ const {
  getLLMProvider,
  getEmbeddingEngineSelection,
 } = require("../../helpers");
+const { GraphManager } = require("../../graphManager");

 const AstraDB = {
  name: "AstraDB",
@@ -282,8 +283,10 @@ const AstraDB = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const result = {
+      allTexts: [],
      contextTexts: [],
      sourceDocuments: [],
      scores: [],
@@ -302,11 +305,27 @@ const AstraDB = {
      .toArray();

    responses.forEach((response) => {
+      response.metadata.text
+        ? result.allTexts.push(response.metadata.text)
+        : null;
      if (response.$similarity < similarityThreshold) return;
      result.contextTexts.push(response.metadata.text);
      result.sourceDocuments.push(response);
      result.scores.push(response.$similarity);
    });
+
+    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
+    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
+    if (useKGExpansion && result.allTexts.length > 0) {
+      const expansionTexts = textQuery
+        ? [textQuery, ...result.allTexts]
+        : result.allTexts;
+      result.contextTexts = await new GraphManager().knowledgeGraphSearch(
+        namespace,
+        expansionTexts
+      );
+    }
+
    return result;
  },
  allNamespaces: async function (client) {
--- a/server/utils/vectorDbProviders/chroma/index.js
+++ b/server/utils/vectorDbProviders/chroma/index.js
@@ -7,6 +7,7 @@ const {
  getLLMProvider,
  getEmbeddingEngineSelection,
 } = require("../../helpers");
+const { GraphManager } = require("../../graphManager");

 const Chroma = {
  name: "Chroma",
@@ -70,9 +71,11 @@ const Chroma = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const collection = await client.getCollection({ name: namespace });
    const result = {
+      allTexts: [],
      contextTexts: [],
      sourceDocuments: [],
      scores: [],
@@ -82,7 +85,11 @@ const Chroma = {
      queryEmbeddings: queryVector,
      nResults: topN,
    });
+
    response.ids[0].forEach((_, i) => {
+      response.documents[0][i]
+        ? result.allTexts.push(response.documents[0][i])
+        : null;
      if (
        this.distanceToSimilarity(response.distances[0][i]) <
        similarityThreshold
@@ -93,6 +100,18 @@ const Chroma = {
      result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
    });

+    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
+    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
+    if (useKGExpansion && result.allTexts.length > 0) {
+      const expansionTexts = textQuery
+        ? [textQuery, ...result.allTexts]
+        : result.allTexts;
+      result.contextTexts = await new GraphManager().knowledgeGraphSearch(
+        namespace,
+        expansionTexts
+      );
+    }
+
    return result;
  },
  namespace: async function (client, namespace = null) {
--- a/server/utils/vectorDbProviders/lance/index.js
+++ b/server/utils/vectorDbProviders/lance/index.js
@@ -66,6 +66,7 @@ const LanceDb = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const collection = await client.openTable(namespace);
    const result = {
@@ -92,7 +93,7 @@ const LanceDb = {

    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
-    if (result.allTexts.length > 0) {
+    if (useKGExpansion && result.allTexts.length > 0) {
      const expansionTexts = textQuery
        ? [textQuery, ...result.allTexts]
        : result.allTexts;
--- a/server/utils/vectorDbProviders/milvus/index.js
+++ b/server/utils/vectorDbProviders/milvus/index.js
@@ -12,6 +12,7 @@ const {
  getLLMProvider,
  getEmbeddingEngineSelection,
 } = require("../../helpers");
+const { GraphManager } = require("../../graphManager");

 const Milvus = {
  name: "Milvus",
@@ -313,8 +314,10 @@ const Milvus = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const result = {
+      allTexts: [],
      contextTexts: [],
      sourceDocuments: [],
      scores: [],
@@ -324,12 +327,27 @@ const Milvus = {
      vectors: queryVector,
      limit: topN,
    });
+
    response.results.forEach((match) => {
+      match.metadata.text ? result.allTexts.push(match.metadata.text) : null;
      if (match.score < similarityThreshold) return;
      result.contextTexts.push(match.metadata.text);
      result.sourceDocuments.push(match);
      result.scores.push(match.score);
    });
+
+    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
+    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
+    if (useKGExpansion && result.allTexts.length > 0) {
+      const expansionTexts = textQuery
+        ? [textQuery, ...result.allTexts]
+        : result.allTexts;
+      result.contextTexts = await new GraphManager().knowledgeGraphSearch(
+        namespace,
+        expansionTexts
+      );
+    }
+
    return result;
  },
  "namespace-stats": async function (reqBody = {}) {
--- a/server/utils/vectorDbProviders/pinecone/index.js
+++ b/server/utils/vectorDbProviders/pinecone/index.js
@@ -7,6 +7,7 @@ const {
  getLLMProvider,
  getEmbeddingEngineSelection,
 } = require("../../helpers");
+const { GraphManager } = require("../../graphManager");

 const PineconeDB = {
  name: "Pinecone",
@@ -45,8 +46,10 @@ const PineconeDB = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const result = {
+      allTexts: [],
      contextTexts: [],
      sourceDocuments: [],
      scores: [],
@@ -60,12 +63,25 @@ const PineconeDB = {
    });

    response.matches.forEach((match) => {
+      match.metadata.text ? result.allTexts.push(match.metadata.text) : null;
      if (match.score < similarityThreshold) return;
      result.contextTexts.push(match.metadata.text);
      result.sourceDocuments.push(match);
      result.scores.push(match.score);
    });

+    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
+    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
+    if (useKGExpansion && result.allTexts.length > 0) {
+      const expansionTexts = textQuery
+        ? [textQuery, ...result.allTexts]
+        : result.allTexts;
+      result.contextTexts = await new GraphManager().knowledgeGraphSearch(
+        namespace,
+        expansionTexts
+      );
+    }
+
    return result;
  },
  namespace: async function (index, namespace = null) {
--- a/server/utils/vectorDbProviders/qdrant/index.js
+++ b/server/utils/vectorDbProviders/qdrant/index.js
@@ -7,6 +7,7 @@ const {
  getLLMProvider,
  getEmbeddingEngineSelection,
 } = require("../../helpers");
+const { GraphManager } = require("../../graphManager");

 const QDrant = {
  name: "QDrant",
@@ -56,9 +57,11 @@ const QDrant = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const { client } = await this.connect();
    const result = {
+      allTexts: [],
      contextTexts: [],
      sourceDocuments: [],
      scores: [],
@@ -71,6 +74,9 @@ const QDrant = {
    });

    responses.forEach((response) => {
+      response?.payload?.text
+        ? result.allTexts.push(response.payload.text)
+        : null;
      if (response.score < similarityThreshold) return;
      result.contextTexts.push(response?.payload?.text || "");
      result.sourceDocuments.push({
@@ -80,6 +86,18 @@ const QDrant = {
      result.scores.push(response.score);
    });

+    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
+    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
+    if (useKGExpansion && result.allTexts.length > 0) {
+      const expansionTexts = textQuery
+        ? [textQuery, ...result.allTexts]
+        : result.allTexts;
+      result.contextTexts = await new GraphManager().knowledgeGraphSearch(
+        namespace,
+        expansionTexts
+      );
+    }
+
    return result;
  },
  namespace: async function (client, namespace = null) {
--- a/server/utils/vectorDbProviders/weaviate/index.js
+++ b/server/utils/vectorDbProviders/weaviate/index.js
@@ -8,6 +8,7 @@ const {
  getEmbeddingEngineSelection,
 } = require("../../helpers");
 const { camelCase } = require("../../helpers/camelcase");
+const { GraphManager } = require("../../graphManager");

 const Weaviate = {
  name: "Weaviate",
@@ -83,8 +84,10 @@ const Weaviate = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const result = {
+      allTexts: [],
      contextTexts: [],
      sourceDocuments: [],
      scores: [],
@@ -108,12 +111,25 @@ const Weaviate = {
        _additional: { id, certainty },
        ...rest
      } = response;
+      rest.text ? result.allTexts.push(rest.text) : null;
      if (certainty < similarityThreshold) return;
      result.contextTexts.push(rest.text);
      result.sourceDocuments.push({ ...rest, id });
      result.scores.push(certainty);
    });

+    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
+    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
+    if (useKGExpansion && result.allTexts.length > 0) {
+      const expansionTexts = textQuery
+        ? [textQuery, ...result.allTexts]
+        : result.allTexts;
+      result.contextTexts = await new GraphManager().knowledgeGraphSearch(
+        namespace,
+        expansionTexts
+      );
+    }
+
    return result;
  },
  allNamespaces: async function (client) {
--- a/server/utils/vectorDbProviders/zilliz/index.js
+++ b/server/utils/vectorDbProviders/zilliz/index.js
@@ -12,6 +12,7 @@ const {
  getLLMProvider,
  getEmbeddingEngineSelection,
 } = require("../../helpers");
+const { GraphManager } = require("../../graphManager");

 // Zilliz is basically a copy of Milvus DB class with a different constructor
 // to connect to the cloud
@@ -314,8 +315,10 @@ const Zilliz = {
    similarityThreshold = 0.25,
    topN = 4,
    textQuery = null,
+    useKGExpansion = false,
  }) {
    const result = {
+      allTexts: [],
      contextTexts: [],
      sourceDocuments: [],
      scores: [],
@@ -325,12 +328,27 @@ const Zilliz = {
      vectors: queryVector,
      limit: topN,
    });
+
    response.results.forEach((match) => {
+      match.metadata.text ? result.allTexts.push(match.metadata.text) : null;
      if (match.score < similarityThreshold) return;
      result.contextTexts.push(match.metadata.text);
      result.sourceDocuments.push(match);
      result.scores.push(match.score);
    });
+
+    // Only attempt to expand the original question if we found at least _something_ from the vectorDB
+    // even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
+    if (useKGExpansion && result.allTexts.length > 0) {
+      const expansionTexts = textQuery
+        ? [textQuery, ...result.allTexts]
+        : result.allTexts;
+      result.contextTexts = await new GraphManager().knowledgeGraphSearch(
+        namespace,
+        expansionTexts
+      );
+    }
+
    return result;
  },
  "namespace-stats": async function (reqBody = {}) {