sync similarityResponse method with all vdbs

This commit is contained in:
timothycarambat
2024-02-16 11:59:45 -08:00
parent c9b0aa6fa3
commit 69cd5e98d7
8 changed files with 126 additions and 1 deletions

View File

@@ -7,6 +7,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { GraphManager } = require("../../graphManager");
const AstraDB = {
name: "AstraDB",
@@ -282,8 +283,10 @@ const AstraDB = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const result = {
allTexts: [],
contextTexts: [],
sourceDocuments: [],
scores: [],
@@ -302,11 +305,27 @@ const AstraDB = {
.toArray();
responses.forEach((response) => {
response.metadata.text
? result.allTexts.push(response.metadata.text)
: null;
if (response.$similarity < similarityThreshold) return;
result.contextTexts.push(response.metadata.text);
result.sourceDocuments.push(response);
result.scores.push(response.$similarity);
});
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;
result.contextTexts = await new GraphManager().knowledgeGraphSearch(
namespace,
expansionTexts
);
}
return result;
},
allNamespaces: async function (client) {

View File

@@ -7,6 +7,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { GraphManager } = require("../../graphManager");
const Chroma = {
name: "Chroma",
@@ -70,9 +71,11 @@ const Chroma = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const collection = await client.getCollection({ name: namespace });
const result = {
allTexts: [],
contextTexts: [],
sourceDocuments: [],
scores: [],
@@ -82,7 +85,11 @@ const Chroma = {
queryEmbeddings: queryVector,
nResults: topN,
});
response.ids[0].forEach((_, i) => {
response.documents[0][i]
? result.allTexts.push(response.documents[0][i])
: null;
if (
this.distanceToSimilarity(response.distances[0][i]) <
similarityThreshold
@@ -93,6 +100,18 @@ const Chroma = {
result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
});
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;
result.contextTexts = await new GraphManager().knowledgeGraphSearch(
namespace,
expansionTexts
);
}
return result;
},
namespace: async function (client, namespace = null) {

View File

@@ -66,6 +66,7 @@ const LanceDb = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const collection = await client.openTable(namespace);
const result = {
@@ -92,7 +93,7 @@ const LanceDb = {
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (result.allTexts.length > 0) {
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;

View File

@@ -12,6 +12,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { GraphManager } = require("../../graphManager");
const Milvus = {
name: "Milvus",
@@ -313,8 +314,10 @@ const Milvus = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const result = {
allTexts: [],
contextTexts: [],
sourceDocuments: [],
scores: [],
@@ -324,12 +327,27 @@ const Milvus = {
vectors: queryVector,
limit: topN,
});
response.results.forEach((match) => {
match.metadata.text ? result.allTexts.push(match.metadata.text) : null;
if (match.score < similarityThreshold) return;
result.contextTexts.push(match.metadata.text);
result.sourceDocuments.push(match);
result.scores.push(match.score);
});
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;
result.contextTexts = await new GraphManager().knowledgeGraphSearch(
namespace,
expansionTexts
);
}
return result;
},
"namespace-stats": async function (reqBody = {}) {

View File

@@ -7,6 +7,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { GraphManager } = require("../../graphManager");
const PineconeDB = {
name: "Pinecone",
@@ -45,8 +46,10 @@ const PineconeDB = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const result = {
allTexts: [],
contextTexts: [],
sourceDocuments: [],
scores: [],
@@ -60,12 +63,25 @@ const PineconeDB = {
});
response.matches.forEach((match) => {
match.metadata.text ? result.allTexts.push(match.metadata.text) : null;
if (match.score < similarityThreshold) return;
result.contextTexts.push(match.metadata.text);
result.sourceDocuments.push(match);
result.scores.push(match.score);
});
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;
result.contextTexts = await new GraphManager().knowledgeGraphSearch(
namespace,
expansionTexts
);
}
return result;
},
namespace: async function (index, namespace = null) {

View File

@@ -7,6 +7,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { GraphManager } = require("../../graphManager");
const QDrant = {
name: "QDrant",
@@ -56,9 +57,11 @@ const QDrant = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const { client } = await this.connect();
const result = {
allTexts: [],
contextTexts: [],
sourceDocuments: [],
scores: [],
@@ -71,6 +74,9 @@ const QDrant = {
});
responses.forEach((response) => {
response?.payload?.text
? result.allTexts.push(response.payload.text)
: null;
if (response.score < similarityThreshold) return;
result.contextTexts.push(response?.payload?.text || "");
result.sourceDocuments.push({
@@ -80,6 +86,18 @@ const QDrant = {
result.scores.push(response.score);
});
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;
result.contextTexts = await new GraphManager().knowledgeGraphSearch(
namespace,
expansionTexts
);
}
return result;
},
namespace: async function (client, namespace = null) {

View File

@@ -8,6 +8,7 @@ const {
getEmbeddingEngineSelection,
} = require("../../helpers");
const { camelCase } = require("../../helpers/camelcase");
const { GraphManager } = require("../../graphManager");
const Weaviate = {
name: "Weaviate",
@@ -83,8 +84,10 @@ const Weaviate = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const result = {
allTexts: [],
contextTexts: [],
sourceDocuments: [],
scores: [],
@@ -108,12 +111,25 @@ const Weaviate = {
_additional: { id, certainty },
...rest
} = response;
rest.text ? result.allTexts.push(rest.text) : null;
if (certainty < similarityThreshold) return;
result.contextTexts.push(rest.text);
result.sourceDocuments.push({ ...rest, id });
result.scores.push(certainty);
});
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;
result.contextTexts = await new GraphManager().knowledgeGraphSearch(
namespace,
expansionTexts
);
}
return result;
},
allNamespaces: async function (client) {

View File

@@ -12,6 +12,7 @@ const {
getLLMProvider,
getEmbeddingEngineSelection,
} = require("../../helpers");
const { GraphManager } = require("../../graphManager");
// Zilliz is basically a copy of Milvus DB class with a different constructor
// to connect to the cloud
@@ -314,8 +315,10 @@ const Zilliz = {
similarityThreshold = 0.25,
topN = 4,
textQuery = null,
useKGExpansion = false,
}) {
const result = {
allTexts: [],
contextTexts: [],
sourceDocuments: [],
scores: [],
@@ -325,12 +328,27 @@ const Zilliz = {
vectors: queryVector,
limit: topN,
});
response.results.forEach((match) => {
match.metadata.text ? result.allTexts.push(match.metadata.text) : null;
if (match.score < similarityThreshold) return;
result.contextTexts.push(match.metadata.text);
result.sourceDocuments.push(match);
result.scores.push(match.score);
});
// Only attempt to expand the original question if we found at least _something_ from the vectorDB
// even if it was filtered out by score - because then there is a chance we can expand on it and save the query.
if (useKGExpansion && result.allTexts.length > 0) {
const expansionTexts = textQuery
? [textQuery, ...result.allTexts]
: result.allTexts;
result.contextTexts = await new GraphManager().knowledgeGraphSearch(
namespace,
expansionTexts
);
}
return result;
},
"namespace-stats": async function (reqBody = {}) {