chore: format

This commit is contained in:
Timothy Jaeryang Baek
2025-05-29 02:36:33 +04:00
parent cb4299eb98
commit 551597b9cc
58 changed files with 1393 additions and 104 deletions

View File

@@ -239,7 +239,27 @@ class Loader:
elif (
self.engine == "datalab_marker"
and self.kwargs.get("DATALAB_MARKER_API_KEY")
and file_ext in ["pdf", "xls", "xlsx", "ods", "doc", "docx", "odt", "ppt", "pptx", "odp", "html", "epub", "png", "jpeg", "jpg", "webp", "gif", "tiff"]
and file_ext
in [
"pdf",
"xls",
"xlsx",
"ods",
"doc",
"docx",
"odt",
"ppt",
"pptx",
"odp",
"html",
"epub",
"png",
"jpeg",
"jpg",
"webp",
"gif",
"tiff",
]
):
loader = DatalabMarkerLoader(
file_path=file_path,
@@ -249,9 +269,15 @@ class Loader:
skip_cache=self.kwargs.get("DATALAB_MARKER_SKIP_CACHE", False),
force_ocr=self.kwargs.get("DATALAB_MARKER_FORCE_OCR", False),
paginate=self.kwargs.get("DATALAB_MARKER_PAGINATE", False),
strip_existing_ocr=self.kwargs.get("DATALAB_MARKER_STRIP_EXISTING_OCR", False),
disable_image_extraction=self.kwargs.get("DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", False),
output_format=self.kwargs.get("DATALAB_MARKER_OUTPUT_FORMAT", "markdown")
strip_existing_ocr=self.kwargs.get(
"DATALAB_MARKER_STRIP_EXISTING_OCR", False
),
disable_image_extraction=self.kwargs.get(
"DATALAB_MARKER_DISABLE_IMAGE_EXTRACTION", False
),
output_format=self.kwargs.get(
"DATALAB_MARKER_OUTPUT_FORMAT", "markdown"
),
)
elif self.engine == "docling" and self.kwargs.get("DOCLING_SERVER_URL"):
if self._is_text_file(file_ext, file_content_type):