OCA · matteotrubini · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024
diff --git a/account_invoice_import_simple_pdf/wizard/account_invoice_import.py b/account_invoice_import_simple_pdf/wizard/account_invoice_import.py
@@ -54,7 +54,16 @@
             pages = []
             doc = fitz.open(fileobj.name)
             for page in doc:
-                pages.append(page.get_text())
+                # Check if Tessdata is available for OCR
+                tessdata = fitz.get_tessdata()
+                # Perform OCR if Tessdata is available, otherwise use regular text extraction
+                textpage = (
+                    page.get_textpage_ocr(full=False, tessdata=tessdata)
+                    if tessdata
+                    else page.get_textpage()
+                )
+                # Append the extracted text to the pages list
+                pages.append(page.get_text(textpage=textpage))
             res = {
                 "all": "\n\n".join(pages),
                 "first": pages and pages[0] or "",