diff --git a/account_invoice_import_simple_pdf/wizard/account_invoice_import.py b/account_invoice_import_simple_pdf/wizard/account_invoice_import.py index 27cf94a8c7..8356e1a097 100644 --- a/account_invoice_import_simple_pdf/wizard/account_invoice_import.py +++ b/account_invoice_import_simple_pdf/wizard/account_invoice_import.py @@ -54,7 +54,16 @@ def _simple_pdf_text_extraction_pymupdf(self, fileobj, test_info): pages = [] doc = fitz.open(fileobj.name) for page in doc: - pages.append(page.get_text()) + # Check if Tessdata is available for OCR + tessdata = fitz.get_tessdata() + # Perform OCR if Tessdata is available, otherwise use regular text extraction + textpage = ( + page.get_textpage_ocr(full=False, tessdata=tessdata) + if tessdata + else page.get_textpage() + ) + # Append the extracted text to the pages list + pages.append(page.get_text(textpage=textpage)) res = { "all": "\n\n".join(pages), "first": pages and pages[0] or "",