This commit is contained in:
Timothy Jaeryang Baek
2026-03-17 17:58:01 -05:00
parent fcf7208352
commit de3317e26b
220 changed files with 17200 additions and 22836 deletions

View File

@@ -31,17 +31,15 @@ log = logging.getLogger(__name__)
class ChromaClient(VectorDBBase):
def __init__(self):
settings_dict = {
"allow_reset": True,
"anonymized_telemetry": False,
'allow_reset': True,
'anonymized_telemetry': False,
}
if CHROMA_CLIENT_AUTH_PROVIDER is not None:
settings_dict["chroma_client_auth_provider"] = CHROMA_CLIENT_AUTH_PROVIDER
settings_dict['chroma_client_auth_provider'] = CHROMA_CLIENT_AUTH_PROVIDER
if CHROMA_CLIENT_AUTH_CREDENTIALS is not None:
settings_dict["chroma_client_auth_credentials"] = (
CHROMA_CLIENT_AUTH_CREDENTIALS
)
settings_dict['chroma_client_auth_credentials'] = CHROMA_CLIENT_AUTH_CREDENTIALS
if CHROMA_HTTP_HOST != "":
if CHROMA_HTTP_HOST != '':
self.client = chromadb.HttpClient(
host=CHROMA_HTTP_HOST,
port=CHROMA_HTTP_PORT,
@@ -87,25 +85,23 @@ class ChromaClient(VectorDBBase):
# chromadb has cosine distance, 2 (worst) -> 0 (best). Re-odering to 0 -> 1
# https://docs.trychroma.com/docs/collections/configure cosine equation
distances: list = result["distances"][0]
distances: list = result['distances'][0]
distances = [2 - dist for dist in distances]
distances = [[dist / 2 for dist in distances]]
return SearchResult(
**{
"ids": result["ids"],
"distances": distances,
"documents": result["documents"],
"metadatas": result["metadatas"],
'ids': result['ids'],
'distances': distances,
'documents': result['documents'],
'metadatas': result['metadatas'],
}
)
return None
except Exception as e:
return None
def query(
self, collection_name: str, filter: dict, limit: Optional[int] = None
) -> Optional[GetResult]:
def query(self, collection_name: str, filter: dict, limit: Optional[int] = None) -> Optional[GetResult]:
# Query the items from the collection based on the filter.
try:
collection = self.client.get_collection(name=collection_name)
@@ -117,9 +113,9 @@ class ChromaClient(VectorDBBase):
return GetResult(
**{
"ids": [result["ids"]],
"documents": [result["documents"]],
"metadatas": [result["metadatas"]],
'ids': [result['ids']],
'documents': [result['documents']],
'metadatas': [result['metadatas']],
}
)
return None
@@ -133,23 +129,21 @@ class ChromaClient(VectorDBBase):
result = collection.get()
return GetResult(
**{
"ids": [result["ids"]],
"documents": [result["documents"]],
"metadatas": [result["metadatas"]],
'ids': [result['ids']],
'documents': [result['documents']],
'metadatas': [result['metadatas']],
}
)
return None
def insert(self, collection_name: str, items: list[VectorItem]):
# Insert the items into the collection, if the collection does not exist, it will be created.
collection = self.client.get_or_create_collection(
name=collection_name, metadata={"hnsw:space": "cosine"}
)
collection = self.client.get_or_create_collection(name=collection_name, metadata={'hnsw:space': 'cosine'})
ids = [item["id"] for item in items]
documents = [item["text"] for item in items]
embeddings = [item["vector"] for item in items]
metadatas = [process_metadata(item["metadata"]) for item in items]
ids = [item['id'] for item in items]
documents = [item['text'] for item in items]
embeddings = [item['vector'] for item in items]
metadatas = [process_metadata(item['metadata']) for item in items]
for batch in create_batches(
api=self.client,
@@ -162,18 +156,14 @@ class ChromaClient(VectorDBBase):
def upsert(self, collection_name: str, items: list[VectorItem]):
# Update the items in the collection, if the items are not present, insert them. If the collection does not exist, it will be created.
collection = self.client.get_or_create_collection(
name=collection_name, metadata={"hnsw:space": "cosine"}
)
collection = self.client.get_or_create_collection(name=collection_name, metadata={'hnsw:space': 'cosine'})
ids = [item["id"] for item in items]
documents = [item["text"] for item in items]
embeddings = [item["vector"] for item in items]
metadatas = [process_metadata(item["metadata"]) for item in items]
ids = [item['id'] for item in items]
documents = [item['text'] for item in items]
embeddings = [item['vector'] for item in items]
metadatas = [process_metadata(item['metadata']) for item in items]
collection.upsert(
ids=ids, documents=documents, embeddings=embeddings, metadatas=metadatas
)
collection.upsert(ids=ids, documents=documents, embeddings=embeddings, metadatas=metadatas)
def delete(
self,
@@ -191,9 +181,7 @@ class ChromaClient(VectorDBBase):
collection.delete(where=filter)
except Exception as e:
# If collection doesn't exist, that's fine - nothing to delete
log.debug(
f"Attempted to delete from non-existent collection {collection_name}. Ignoring."
)
log.debug(f'Attempted to delete from non-existent collection {collection_name}. Ignoring.')
pass
def reset(self):