Merge pull request #17276 from Elettrotecnica/extend-docling-configuration

feat: Extend docling configuration options
This commit is contained in:
Tim Jaeryang Baek
2025-09-09 18:04:30 +04:00
committed by GitHub
5 changed files with 198 additions and 17 deletions

View File

@@ -148,7 +148,7 @@ class DoclingLoader:
)
}
params = {"image_export_mode": "placeholder", "table_mode": "accurate"}
params = {"image_export_mode": "placeholder"}
if self.params:
if self.params.get("do_picture_description"):
@@ -174,7 +174,11 @@ class DoclingLoader:
self.params.get("picture_description_api", {})
)
if self.params.get("ocr_engine") and self.params.get("ocr_lang"):
params["do_ocr"] = self.params.get("do_ocr")
params["force_ocr"] = self.params.get("force_ocr")
if self.params.get("do_ocr") and self.params.get("ocr_engine") and self.params.get("ocr_lang"):
params["ocr_engine"] = self.params.get("ocr_engine")
params["ocr_lang"] = [
lang.strip()
@@ -182,6 +186,16 @@ class DoclingLoader:
if lang.strip()
]
if self.params.get("pdf_backend"):
params["pdf_backend"] = self.params.get("pdf_backend")
if self.params.get("table_mode"):
params["table_mode"] = self.params.get("table_mode")
if self.params.get("pipeline"):
params["pipeline"] = self.params.get("pipeline")
endpoint = f"{self.url}/v1/convert/file"
r = requests.post(endpoint, files=files, data=params)