import json import logging import time from typing import Optional import uuid from sqlalchemy.orm import Session from open_webui.internal.db import Base, JSONField, get_db, get_db_context from open_webui.models.files import ( File, FileModel, FileMetadataResponse, FileModelResponse, ) from open_webui.models.groups import Groups from open_webui.models.users import User, UserModel, Users, UserResponse from open_webui.models.access_grants import AccessGrantModel, AccessGrants from pydantic import BaseModel, ConfigDict, Field from sqlalchemy import ( BigInteger, Column, ForeignKey, String, Text, JSON, UniqueConstraint, or_, ) log = logging.getLogger(__name__) #################### # Knowledge DB Schema #################### class Knowledge(Base): __tablename__ = 'knowledge' id = Column(Text, unique=True, primary_key=True) user_id = Column(Text) name = Column(Text) description = Column(Text) meta = Column(JSON, nullable=True) created_at = Column(BigInteger) updated_at = Column(BigInteger) class KnowledgeModel(BaseModel): model_config = ConfigDict(from_attributes=True) id: str user_id: str name: str description: str meta: Optional[dict] = None access_grants: list[AccessGrantModel] = Field(default_factory=list) created_at: int # timestamp in epoch updated_at: int # timestamp in epoch class KnowledgeFile(Base): __tablename__ = 'knowledge_file' id = Column(Text, unique=True, primary_key=True) knowledge_id = Column(Text, ForeignKey('knowledge.id', ondelete='CASCADE'), nullable=False) file_id = Column(Text, ForeignKey('file.id', ondelete='CASCADE'), nullable=False) user_id = Column(Text, nullable=False) created_at = Column(BigInteger, nullable=False) updated_at = Column(BigInteger, nullable=False) __table_args__ = (UniqueConstraint('knowledge_id', 'file_id', name='uq_knowledge_file_knowledge_file'),) class KnowledgeFileModel(BaseModel): id: str knowledge_id: str file_id: str user_id: str created_at: int # timestamp in epoch updated_at: int # timestamp in epoch model_config = ConfigDict(from_attributes=True) #################### # Forms #################### class KnowledgeUserModel(KnowledgeModel): user: Optional[UserResponse] = None class KnowledgeResponse(KnowledgeModel): files: Optional[list[FileMetadataResponse | dict]] = None class KnowledgeUserResponse(KnowledgeUserModel): pass class KnowledgeForm(BaseModel): name: str description: str access_grants: Optional[list[dict]] = None class FileUserResponse(FileModelResponse): user: Optional[UserResponse] = None class KnowledgeListResponse(BaseModel): items: list[KnowledgeUserModel] total: int class KnowledgeFileListResponse(BaseModel): items: list[FileUserResponse] total: int class KnowledgeTable: def _get_access_grants(self, knowledge_id: str, db: Optional[Session] = None) -> list[AccessGrantModel]: return AccessGrants.get_grants_by_resource('knowledge', knowledge_id, db=db) def _to_knowledge_model( self, knowledge: Knowledge, access_grants: Optional[list[AccessGrantModel]] = None, db: Optional[Session] = None, ) -> KnowledgeModel: knowledge_data = KnowledgeModel.model_validate(knowledge).model_dump(exclude={'access_grants'}) knowledge_data['access_grants'] = ( access_grants if access_grants is not None else self._get_access_grants(knowledge_data['id'], db=db) ) return KnowledgeModel.model_validate(knowledge_data) def insert_new_knowledge( self, user_id: str, form_data: KnowledgeForm, db: Optional[Session] = None ) -> Optional[KnowledgeModel]: with get_db_context(db) as db: knowledge = KnowledgeModel( **{ **form_data.model_dump(exclude={'access_grants'}), 'id': str(uuid.uuid4()), 'user_id': user_id, 'created_at': int(time.time()), 'updated_at': int(time.time()), 'access_grants': [], } ) try: result = Knowledge(**knowledge.model_dump(exclude={'access_grants'})) db.add(result) db.commit() db.refresh(result) AccessGrants.set_access_grants('knowledge', result.id, form_data.access_grants, db=db) if result: return self._to_knowledge_model(result, db=db) else: return None except Exception: return None def get_knowledge_bases( self, skip: int = 0, limit: int = 30, db: Optional[Session] = None ) -> list[KnowledgeUserModel]: with get_db_context(db) as db: all_knowledge = db.query(Knowledge).order_by(Knowledge.updated_at.desc()).all() user_ids = list(set(knowledge.user_id for knowledge in all_knowledge)) knowledge_ids = [knowledge.id for knowledge in all_knowledge] users = Users.get_users_by_user_ids(user_ids, db=db) if user_ids else [] users_dict = {user.id: user for user in users} grants_map = AccessGrants.get_grants_by_resources('knowledge', knowledge_ids, db=db) knowledge_bases = [] for knowledge in all_knowledge: user = users_dict.get(knowledge.user_id) knowledge_bases.append( KnowledgeUserModel.model_validate( { **self._to_knowledge_model( knowledge, access_grants=grants_map.get(knowledge.id, []), db=db, ).model_dump(), 'user': user.model_dump() if user else None, } ) ) return knowledge_bases def search_knowledge_bases( self, user_id: str, filter: dict, skip: int = 0, limit: int = 30, db: Optional[Session] = None, ) -> KnowledgeListResponse: try: with get_db_context(db) as db: query = db.query(Knowledge, User).outerjoin(User, User.id == Knowledge.user_id) if filter: query_key = filter.get('query') if query_key: query = query.filter( or_( Knowledge.name.ilike(f'%{query_key}%'), Knowledge.description.ilike(f'%{query_key}%'), User.name.ilike(f'%{query_key}%'), User.email.ilike(f'%{query_key}%'), User.username.ilike(f'%{query_key}%'), ) ) view_option = filter.get('view_option') if view_option == 'created': query = query.filter(Knowledge.user_id == user_id) elif view_option == 'shared': query = query.filter(Knowledge.user_id != user_id) query = AccessGrants.has_permission_filter( db=db, query=query, DocumentModel=Knowledge, filter=filter, resource_type='knowledge', permission='read', ) query = query.order_by(Knowledge.updated_at.desc(), Knowledge.id.asc()) total = query.count() if skip: query = query.offset(skip) if limit: query = query.limit(limit) items = query.all() knowledge_ids = [kb.id for kb, _ in items] grants_map = AccessGrants.get_grants_by_resources('knowledge', knowledge_ids, db=db) knowledge_bases = [] for knowledge_base, user in items: knowledge_bases.append( KnowledgeUserModel.model_validate( { **self._to_knowledge_model( knowledge_base, access_grants=grants_map.get(knowledge_base.id, []), db=db, ).model_dump(), 'user': (UserModel.model_validate(user).model_dump() if user else None), } ) ) return KnowledgeListResponse(items=knowledge_bases, total=total) except Exception as e: print(e) return KnowledgeListResponse(items=[], total=0) def search_knowledge_files( self, filter: dict, skip: int = 0, limit: int = 30, db: Optional[Session] = None ) -> KnowledgeFileListResponse: """ Scalable version: search files across all knowledge bases the user has READ access to, without loading all KBs or using large IN() lists. """ try: with get_db_context(db) as db: # Base query: join Knowledge → KnowledgeFile → File query = ( db.query(File, User, Knowledge) .join(KnowledgeFile, File.id == KnowledgeFile.file_id) .join(Knowledge, KnowledgeFile.knowledge_id == Knowledge.id) .outerjoin(User, User.id == KnowledgeFile.user_id) ) # Apply access-control directly to the joined query # This makes the database handle filtering, even with 10k+ KBs query = AccessGrants.has_permission_filter( db=db, query=query, DocumentModel=Knowledge, filter=filter, resource_type='knowledge', permission='read', ) # Apply filename search if filter: q = filter.get('query') if q: query = query.filter(File.filename.ilike(f'%{q}%')) # Order by file changes query = query.order_by(File.updated_at.desc(), File.id.asc()) # Count before pagination total = query.count() if skip: query = query.offset(skip) if limit: query = query.limit(limit) rows = query.all() items = [] for file, user, knowledge in rows: items.append( FileUserResponse( **FileModel.model_validate(file).model_dump(), user=(UserResponse(**UserModel.model_validate(user).model_dump()) if user else None), collection=self._to_knowledge_model(knowledge, db=db).model_dump(), ) ) return KnowledgeFileListResponse(items=items, total=total) except Exception as e: print('search_knowledge_files error:', e) return KnowledgeFileListResponse(items=[], total=0) def check_access_by_user_id(self, id, user_id, permission='write', db: Optional[Session] = None) -> bool: knowledge = self.get_knowledge_by_id(id, db=db) if not knowledge: return False if knowledge.user_id == user_id: return True user_group_ids = {group.id for group in Groups.get_groups_by_member_id(user_id, db=db)} return AccessGrants.has_access( user_id=user_id, resource_type='knowledge', resource_id=knowledge.id, permission=permission, user_group_ids=user_group_ids, db=db, ) def get_knowledge_bases_by_user_id( self, user_id: str, permission: str = 'write', db: Optional[Session] = None ) -> list[KnowledgeUserModel]: knowledge_bases = self.get_knowledge_bases(db=db) user_group_ids = {group.id for group in Groups.get_groups_by_member_id(user_id, db=db)} return [ knowledge_base for knowledge_base in knowledge_bases if knowledge_base.user_id == user_id or AccessGrants.has_access( user_id=user_id, resource_type='knowledge', resource_id=knowledge_base.id, permission=permission, user_group_ids=user_group_ids, db=db, ) ] def get_knowledge_by_id(self, id: str, db: Optional[Session] = None) -> Optional[KnowledgeModel]: try: with get_db_context(db) as db: knowledge = db.query(Knowledge).filter_by(id=id).first() return self._to_knowledge_model(knowledge, db=db) if knowledge else None except Exception: return None def get_knowledge_by_id_and_user_id( self, id: str, user_id: str, db: Optional[Session] = None ) -> Optional[KnowledgeModel]: knowledge = self.get_knowledge_by_id(id, db=db) if not knowledge: return None if knowledge.user_id == user_id: return knowledge user_group_ids = {group.id for group in Groups.get_groups_by_member_id(user_id, db=db)} if AccessGrants.has_access( user_id=user_id, resource_type='knowledge', resource_id=knowledge.id, permission='write', user_group_ids=user_group_ids, db=db, ): return knowledge return None def get_knowledges_by_file_id(self, file_id: str, db: Optional[Session] = None) -> list[KnowledgeModel]: try: with get_db_context(db) as db: knowledges = ( db.query(Knowledge) .join(KnowledgeFile, Knowledge.id == KnowledgeFile.knowledge_id) .filter(KnowledgeFile.file_id == file_id) .all() ) knowledge_ids = [k.id for k in knowledges] grants_map = AccessGrants.get_grants_by_resources('knowledge', knowledge_ids, db=db) return [ self._to_knowledge_model( knowledge, access_grants=grants_map.get(knowledge.id, []), db=db, ) for knowledge in knowledges ] except Exception: return [] def search_files_by_id( self, knowledge_id: str, user_id: str, filter: dict, skip: int = 0, limit: int = 30, db: Optional[Session] = None, ) -> KnowledgeFileListResponse: try: with get_db_context(db) as db: query = ( db.query(File, User) .join(KnowledgeFile, File.id == KnowledgeFile.file_id) .outerjoin(User, User.id == KnowledgeFile.user_id) .filter(KnowledgeFile.knowledge_id == knowledge_id) ) # Default sort: updated_at descending primary_sort = File.updated_at.desc() if filter: query_key = filter.get('query') if query_key: query = query.filter(or_(File.filename.ilike(f'%{query_key}%'))) view_option = filter.get('view_option') if view_option == 'created': query = query.filter(KnowledgeFile.user_id == user_id) elif view_option == 'shared': query = query.filter(KnowledgeFile.user_id != user_id) order_by = filter.get('order_by') direction = filter.get('direction') is_asc = direction == 'asc' if order_by == 'name': primary_sort = File.filename.asc() if is_asc else File.filename.desc() elif order_by == 'created_at': primary_sort = File.created_at.asc() if is_asc else File.created_at.desc() elif order_by == 'updated_at': primary_sort = File.updated_at.asc() if is_asc else File.updated_at.desc() # Apply sort with secondary key for deterministic pagination query = query.order_by(primary_sort, File.id.asc()) # Count BEFORE pagination total = query.count() if skip: query = query.offset(skip) if limit: query = query.limit(limit) items = query.all() files = [] for file, user in items: files.append( FileUserResponse( **FileModel.model_validate(file).model_dump(), user=(UserResponse(**UserModel.model_validate(user).model_dump()) if user else None), ) ) return KnowledgeFileListResponse(items=files, total=total) except Exception as e: print(e) return KnowledgeFileListResponse(items=[], total=0) def get_files_by_id(self, knowledge_id: str, db: Optional[Session] = None) -> list[FileModel]: try: with get_db_context(db) as db: files = ( db.query(File) .join(KnowledgeFile, File.id == KnowledgeFile.file_id) .filter(KnowledgeFile.knowledge_id == knowledge_id) .all() ) return [FileModel.model_validate(file) for file in files] except Exception: return [] def get_file_metadatas_by_id(self, knowledge_id: str, db: Optional[Session] = None) -> list[FileMetadataResponse]: try: with get_db_context(db) as db: files = self.get_files_by_id(knowledge_id, db=db) return [FileMetadataResponse(**file.model_dump()) for file in files] except Exception: return [] def add_file_to_knowledge_by_id( self, knowledge_id: str, file_id: str, user_id: str, db: Optional[Session] = None, ) -> Optional[KnowledgeFileModel]: with get_db_context(db) as db: knowledge_file = KnowledgeFileModel( **{ 'id': str(uuid.uuid4()), 'knowledge_id': knowledge_id, 'file_id': file_id, 'user_id': user_id, 'created_at': int(time.time()), 'updated_at': int(time.time()), } ) try: result = KnowledgeFile(**knowledge_file.model_dump()) db.add(result) db.commit() db.refresh(result) if result: return KnowledgeFileModel.model_validate(result) else: return None except Exception: return None def has_file(self, knowledge_id: str, file_id: str, db: Optional[Session] = None) -> bool: """Check whether a file belongs to a knowledge base.""" try: with get_db_context(db) as db: return db.query(KnowledgeFile).filter_by(knowledge_id=knowledge_id, file_id=file_id).first() is not None except Exception: return False def remove_file_from_knowledge_by_id(self, knowledge_id: str, file_id: str, db: Optional[Session] = None) -> bool: try: with get_db_context(db) as db: db.query(KnowledgeFile).filter_by(knowledge_id=knowledge_id, file_id=file_id).delete() db.commit() return True except Exception: return False def reset_knowledge_by_id(self, id: str, db: Optional[Session] = None) -> Optional[KnowledgeModel]: try: with get_db_context(db) as db: # Delete all knowledge_file entries for this knowledge_id db.query(KnowledgeFile).filter_by(knowledge_id=id).delete() db.commit() # Update the knowledge entry's updated_at timestamp db.query(Knowledge).filter_by(id=id).update( { 'updated_at': int(time.time()), } ) db.commit() return self.get_knowledge_by_id(id=id, db=db) except Exception as e: log.exception(e) return None def update_knowledge_by_id( self, id: str, form_data: KnowledgeForm, overwrite: bool = False, db: Optional[Session] = None, ) -> Optional[KnowledgeModel]: try: with get_db_context(db) as db: knowledge = self.get_knowledge_by_id(id=id, db=db) db.query(Knowledge).filter_by(id=id).update( { **form_data.model_dump(exclude={'access_grants'}), 'updated_at': int(time.time()), } ) db.commit() if form_data.access_grants is not None: AccessGrants.set_access_grants('knowledge', id, form_data.access_grants, db=db) return self.get_knowledge_by_id(id=id, db=db) except Exception as e: log.exception(e) return None def update_knowledge_data_by_id( self, id: str, data: dict, db: Optional[Session] = None ) -> Optional[KnowledgeModel]: try: with get_db_context(db) as db: knowledge = self.get_knowledge_by_id(id=id, db=db) db.query(Knowledge).filter_by(id=id).update( { 'data': data, 'updated_at': int(time.time()), } ) db.commit() return self.get_knowledge_by_id(id=id, db=db) except Exception as e: log.exception(e) return None def delete_knowledge_by_id(self, id: str, db: Optional[Session] = None) -> bool: try: with get_db_context(db) as db: AccessGrants.revoke_all_access('knowledge', id, db=db) db.query(Knowledge).filter_by(id=id).delete() db.commit() return True except Exception: return False def delete_all_knowledge(self, db: Optional[Session] = None) -> bool: with get_db_context(db) as db: try: knowledge_ids = [row[0] for row in db.query(Knowledge.id).all()] for knowledge_id in knowledge_ids: AccessGrants.revoke_all_access('knowledge', knowledge_id, db=db) db.query(Knowledge).delete() db.commit() return True except Exception: return False Knowledges = KnowledgeTable()