From f1752b190415e113ca4e6e35de0752818d40bbed Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Sun, 1 Sep 2024 15:58:58 +0800 Subject: [PATCH 01/27] chore(pdf): remove old get_byte_stream function --- src/monopoly/pdf.py | 12 ------------ .../unit/test_bank_identifier/test_get_identifier.py | 4 ---- 2 files changed, 16 deletions(-) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 47b66d14..1c008ac3 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -109,18 +109,6 @@ def document(self) -> fitz.Document: args = {"filename": self.file_path, "stream": self.file_bytes} return fitz.Document(**args) - @lru_cache - def get_byte_stream(self) -> BytesIO: - if self.file_path: - with open(self.file_path, "rb") as file: - stream = BytesIO(file.read()) - return stream - - if self.file_bytes: - return BytesIO(self.file_bytes) - - raise RuntimeError("Unable to create document") - @lru_cache def open(self) -> fitz.Document: """ diff --git a/tests/unit/test_bank_identifier/test_get_identifier.py b/tests/unit/test_bank_identifier/test_get_identifier.py index f1001cb1..81303ce4 100644 --- a/tests/unit/test_bank_identifier/test_get_identifier.py +++ b/tests/unit/test_bank_identifier/test_get_identifier.py @@ -1,4 +1,3 @@ -from io import BytesIO from unittest.mock import Mock, patch import pytest @@ -25,9 +24,6 @@ def __init__(self, is_encrypted: bool, metadata: dict): def open(self): return MockDocument(self.is_encrypted, self.metadata) - def get_byte_stream(self): - return BytesIO(b"%PDF-1.6") - @pytest.fixture def mock_encrypted_document(): From c85b614fd011e6cc5feb610ca1eaf354e9b5456a Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Sun, 1 Sep 2024 16:47:28 +0800 Subject: [PATCH 02/27] refactor(pdf): make PdfDocument a child class of fitz.Document --- src/monopoly/bank_detector.py | 2 +- src/monopoly/pdf.py | 75 ++++-------- tests/integration/test_parser.py | 12 +- tests/integration/test_pdf_document.py | 111 ++++++++++-------- .../test_get_identifier.py | 6 +- 5 files changed, 99 insertions(+), 107 deletions(-) diff --git a/src/monopoly/bank_detector.py b/src/monopoly/bank_detector.py index c876d8db..75f15c6e 100644 --- a/src/monopoly/bank_detector.py +++ b/src/monopoly/bank_detector.py @@ -20,7 +20,7 @@ def metadata_items(self) -> list[Any]: Retrieves encryption and metadata identifiers from a bank statement PDF """ identifiers: list[Identifier] = [] - if metadata := self.document.open().metadata: + if metadata := self.document.metadata: metadata_identifier = MetadataIdentifier(**metadata) identifiers.append(metadata_identifier) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 1c008ac3..452cf142 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -59,8 +59,8 @@ class BadPasswordFormatError(Exception): """Exception raised passwords are not provided in a proper format""" -class PdfDocument: - """Handles logic related to the opening, unlocking and storage of a PDF document""" +class PdfDocument(fitz.Document): + """Handles logic related to the opening, unlocking, and storage of a PDF document.""" def __init__( self, @@ -68,56 +68,20 @@ def __init__( file_path: Optional[Path] = None, file_bytes: Optional[bytes] = None, ): - self._passwords = passwords + self.passwords = passwords or PdfPasswords().pdf_passwords self.file_path = file_path self.file_bytes = file_bytes - @cached_property - def raw_text(self) -> str: - raw_text = "" - for page in self.open(): - raw_text += page.get_text() - return raw_text - - @property - def passwords(self): - if not self._passwords: - return PdfPasswords().pdf_passwords - return self._passwords - - @cached_property - def name(self): - return self.open().name - - @cached_property - def metadata(self): - return self.open().metadata - - @cached_property - def document(self) -> fitz.Document: - """ - Returns a Python representation of a PDF document. - """ - if not self.file_path and not self.file_bytes: - raise RuntimeError("Either `file_path` or `file_bytes` must be passed") - - if self.file_path and self.file_bytes: - raise RuntimeError( - "Only one of `file_path` or `file_bytes` should be defined" - ) - args = {"filename": self.file_path, "stream": self.file_bytes} - return fitz.Document(**args) + super().__init__(**args) - @lru_cache - def open(self) -> fitz.Document: - """ - Opens and decrypts a PDF document - """ - document = self.document + if self.is_encrypted: + self._unlock_document() - if not document.is_encrypted: - return document + def _unlock_document(self): + """Attempt to unlock the document using the provided passwords.""" + if not self.is_encrypted: + return self if not self.passwords: raise MissingPasswordError("No password found in PDF configuration") @@ -132,12 +96,19 @@ def open(self) -> fitz.Document: raise BadPasswordFormatError("Passwords should be stored as SecretStr") for password in self.passwords: - document.authenticate(password.get_secret_value()) - - if not document.is_encrypted: + if self.authenticate(password.get_secret_value()): logger.debug("Successfully authenticated with password") - return document - raise WrongPasswordError(f"Could not open document: {document.name}") + return self + + raise WrongPasswordError(f"Could not open document: {self.name}") + + @cached_property + def raw_text(self) -> str: + """Extracts and returns the text from the PDF""" + raw_text = "" + for page in self: + raw_text += page.get_text() + return raw_text class PdfParser: @@ -166,7 +137,7 @@ def page_bbox(self): @lru_cache def get_pages(self) -> list[PdfPage]: logger.debug("Extracting text from PDF") - document = self.document.open() + document = self.document num_pages = list(range(document.page_count)) document.select(num_pages[self.page_range]) diff --git a/tests/integration/test_parser.py b/tests/integration/test_parser.py index d6858dbe..23a50cb6 100644 --- a/tests/integration/test_parser.py +++ b/tests/integration/test_parser.py @@ -7,15 +7,15 @@ fixture_directory = Path(__file__).parent / "fixtures" -def test_get_pages(parser: PdfParser, pdf_document: PdfDocument): - pdf_document.file_path = Path("src/monopoly/examples/example_statement.pdf") +def test_get_pages(parser: PdfParser): + pdf_document = PdfDocument(file_path="src/monopoly/examples/example_statement.pdf") parser.document = pdf_document parser.page_range = slice(0, -1) assert len(parser.get_pages()) == 3 -def test_get_pages_with_no_text(parser: PdfParser, pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "4_pages_blank.pdf" +def test_get_pages_with_no_text(parser: PdfParser): + pdf_document = PdfDocument(file_path=fixture_directory / "4_pages_blank.pdf") parser.document = pdf_document parser.page_range = slice(0, -1) @@ -23,8 +23,8 @@ def test_get_pages_with_no_text(parser: PdfParser, pdf_document: PdfDocument): parser.get_pages() -def test_get_pages_invalid_returns_error(parser: PdfParser, pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "4_pages_blank.pdf" +def test_get_pages_invalid_returns_error(parser: PdfParser): + pdf_document = PdfDocument(fixture_directory / "4_pages_blank.pdf") parser.document = pdf_document parser.page_range = slice(99, -99) diff --git a/tests/integration/test_pdf_document.py b/tests/integration/test_pdf_document.py index f49e9c1c..40b30bec 100644 --- a/tests/integration/test_pdf_document.py +++ b/tests/integration/test_pdf_document.py @@ -1,5 +1,5 @@ from pathlib import Path -from unittest.mock import PropertyMock, patch +from unittest.mock import MagicMock, patch from pydantic import SecretStr from pytest import raises @@ -14,73 +14,92 @@ fixture_directory = Path(__file__).parent / "fixtures" -def test_can_open_file_stream(pdf_document: PdfDocument): +def test_can_open_file_stream(): with open(fixture_directory / "4_pages_blank.pdf", "rb") as file: - pdf_document.file_bytes = file.read() - document = pdf_document.open() - assert len(document) == 4 + pdf_document = PdfDocument(file_bytes=file.read()) + assert len(pdf_document) == 4 def test_can_open_protected(pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "protected.pdf" - pdf_document._passwords = [SecretStr("foobar123")] + mock_pdf_passwords_instance = MagicMock() + mock_pdf_passwords_instance.pdf_passwords = [SecretStr("foobar123")] - pdf_document.open() + with patch("monopoly.pdf.PdfPasswords", return_value=mock_pdf_passwords_instance): + pdf_document = PdfDocument( + passwords=None, file_path=fixture_directory / "protected.pdf" + ) + pdf_document._unlock_document() -def test_wrong_password_raises_error(pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "protected.pdf" - pdf_document._passwords = [SecretStr("wrong_pw")] - +def test_wrong_password_raises_error(): with raises(WrongPasswordError, match="Could not open"): - pdf_document.open() - - -def test_override_password(pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "protected.pdf" - pdf_document._passwords = [SecretStr("foobar123")] - pdf_document = pdf_document.open() + pdf_document: PdfDocument = PdfDocument( + passwords=[SecretStr("wrongpw_123")], + file_path=fixture_directory / "protected.pdf", + ) + pdf_document._unlock_document() + + +def test_override_password(): + pdf_document: PdfDocument = PdfDocument( + passwords=[SecretStr("foobar123")], + file_path=fixture_directory / "protected.pdf", + ) + pdf_document._unlock_document() assert not pdf_document.is_encrypted -def test_error_raised_if_override_is_wrong(pdf_document: PdfDocument): - with raises(WrongPasswordError, match="Could not open"): - pdf_document.file_path = fixture_directory / "protected.pdf" - pdf_document._passwords = [SecretStr("wrongpw")] - pdf_document.open() - - -def test_missing_password_raises_error(pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "protected.pdf" - with patch( - "monopoly.pdf.PdfDocument.passwords", new_callable=PropertyMock - ) as mock_passwords: - mock_passwords.return_value = None - with raises( - MissingPasswordError, match="No password found in PDF configuration" +def test_missing_password_raises_error(): + mock_pdf_passwords_instance = MagicMock() + mock_pdf_passwords_instance.pdf_passwords = [] + + with raises(MissingPasswordError): + with patch( + "monopoly.pdf.PdfPasswords", return_value=mock_pdf_passwords_instance ): - pdf_document.open() + pdf_document = PdfDocument( + passwords=None, file_path=fixture_directory / "protected.pdf" + ) + pdf_document._unlock_document() -def test_null_password_raises_error(pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "protected.pdf" - pdf_document._passwords = [SecretStr("")] +def test_null_password_raises_error(): + mock_pdf_passwords_instance = MagicMock() + mock_pdf_passwords_instance.pdf_passwords = [SecretStr("")] with raises(MissingPasswordError, match="is empty"): - pdf_document.open() + with patch( + "monopoly.pdf.PdfPasswords", return_value=mock_pdf_passwords_instance + ): + pdf_document = PdfDocument( + passwords=None, file_path=fixture_directory / "protected.pdf" + ) + pdf_document._unlock_document() -def test_invalid_password_type_raises_error(pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "protected.pdf" - pdf_document._passwords = "not a list" +def test_invalid_password_type_raises_error(): + mock_pdf_passwords_instance = MagicMock() + mock_pdf_passwords_instance.pdf_passwords = "not a list" with raises(BadPasswordFormatError, match="should be stored in a list"): - pdf_document.open() + with patch( + "monopoly.pdf.PdfPasswords", return_value=mock_pdf_passwords_instance + ): + pdf_document = PdfDocument( + passwords=None, file_path=fixture_directory / "protected.pdf" + ) + pdf_document._unlock_document() def test_plain_text_passwords_raises_error(pdf_document: PdfDocument): - pdf_document.file_path = fixture_directory / "protected.pdf" - pdf_document._passwords = ["password"] + mock_pdf_passwords_instance = MagicMock() + mock_pdf_passwords_instance.pdf_passwords = ["insecure"] with raises(BadPasswordFormatError, match="should be stored as SecretStr"): - pdf_document.open() + with patch( + "monopoly.pdf.PdfPasswords", return_value=mock_pdf_passwords_instance + ): + pdf_document = PdfDocument( + passwords=None, file_path=fixture_directory / "protected.pdf" + ) + pdf_document._unlock_document() diff --git a/tests/unit/test_bank_identifier/test_get_identifier.py b/tests/unit/test_bank_identifier/test_get_identifier.py index 81303ce4..5d2a2ebc 100644 --- a/tests/unit/test_bank_identifier/test_get_identifier.py +++ b/tests/unit/test_bank_identifier/test_get_identifier.py @@ -54,8 +54,10 @@ def mock_non_encrypted_document(): def test_metadata_identifier(mock_non_encrypted_document): - with patch.object(PdfDocument, "open", new_callable=Mock) as mock_open: - mock_open.return_value = mock_non_encrypted_document + with patch.object( + PdfDocument, "_unlock_document", new_callable=Mock + ) as mock_unlock: + mock_unlock.return_value = mock_non_encrypted_document expected_identifier = MetadataIdentifier( title="foo", From c6c5146cbe34b88ec39fb52793a4c312ba0d01c8 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Sun, 1 Sep 2024 16:56:09 +0800 Subject: [PATCH 03/27] chore: remove old mock_document fixture --- tests/conftest.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 83b8e0e4..d1b05dd4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,5 +1,5 @@ import os -from unittest.mock import MagicMock, Mock, PropertyMock, patch +from unittest.mock import MagicMock, Mock, patch import fitz import pytest @@ -36,22 +36,6 @@ def mock_get_pages(): yield mock_get_pages -@pytest.fixture -def mock_document(): - with patch( - "monopoly.pdf.PdfDocument.document", new_callable=PropertyMock - ) as mock_document_prop: - mock_document_instance = mock_document_prop.return_value - type(mock_document_instance).metadata = PropertyMock( - return_value={ - "creator": "Adobe Acrobat 23.3", - "producer": "Adobe Acrobat Pro (64-bit)", - } - ) - type(mock_document_instance).name = PropertyMock(return_value="foo") - yield mock_document_prop - - @pytest.fixture(scope="function") def handler(parser): with patch.object(StatementHandler, "get_statement") as _: From 4c2cb88a16d028cdac4eec5239cbb4f498924834 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 00:41:26 +0800 Subject: [PATCH 04/27] feat(banks/hsbc): add support for non-OCR credit statements --- poetry.lock | 1144 +++++++++++++++++++++++---- pyproject.toml | 1 + src/monopoly/banks/hsbc/hsbc.py | 9 +- src/monopoly/config.py | 3 + src/monopoly/constants/date.py | 1 + src/monopoly/constants/statement.py | 4 +- src/monopoly/identifiers.py | 1 + src/monopoly/log.py | 1 + src/monopoly/pdf.py | 42 +- tesseract.cfg | 2 + 10 files changed, 1059 insertions(+), 149 deletions(-) create mode 100644 tesseract.cfg diff --git a/poetry.lock b/poetry.lock index 39c0c83a..97bcceb8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -71,6 +71,184 @@ d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "cffi" +version = "1.17.0" +description = "Foreign Function Interface for Python calling C code." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cffi-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9338cc05451f1942d0d8203ec2c346c830f8e86469903d5126c1f0a13a2bcbb"}, + {file = "cffi-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0ce71725cacc9ebf839630772b07eeec220cbb5f03be1399e0457a1464f8e1a"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c815270206f983309915a6844fe994b2fa47e5d05c4c4cef267c3b30e34dbe42"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6bdcd415ba87846fd317bee0774e412e8792832e7805938987e4ede1d13046d"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a98748ed1a1df4ee1d6f927e151ed6c1a09d5ec21684de879c7ea6aa96f58f2"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0a048d4f6630113e54bb4b77e315e1ba32a5a31512c31a273807d0027a7e69ab"}, + {file = "cffi-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:24aa705a5f5bd3a8bcfa4d123f03413de5d86e497435693b638cbffb7d5d8a1b"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:856bf0924d24e7f93b8aee12a3a1095c34085600aa805693fb7f5d1962393206"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:4304d4416ff032ed50ad6bb87416d802e67139e31c0bde4628f36a47a3164bfa"}, + {file = "cffi-1.17.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:331ad15c39c9fe9186ceaf87203a9ecf5ae0ba2538c9e898e3a6967e8ad3db6f"}, + {file = "cffi-1.17.0-cp310-cp310-win32.whl", hash = "sha256:669b29a9eca6146465cc574659058ed949748f0809a2582d1f1a324eb91054dc"}, + {file = "cffi-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:48b389b1fd5144603d61d752afd7167dfd205973a43151ae5045b35793232aa2"}, + {file = "cffi-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5d97162c196ce54af6700949ddf9409e9833ef1003b4741c2b39ef46f1d9720"}, + {file = "cffi-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5ba5c243f4004c750836f81606a9fcb7841f8874ad8f3bf204ff5e56332b72b9"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bb9333f58fc3a2296fb1d54576138d4cf5d496a2cc118422bd77835e6ae0b9cb"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:435a22d00ec7d7ea533db494da8581b05977f9c37338c80bc86314bec2619424"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1df34588123fcc88c872f5acb6f74ae59e9d182a2707097f9e28275ec26a12d"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:df8bb0010fdd0a743b7542589223a2816bdde4d94bb5ad67884348fa2c1c67e8"}, + {file = "cffi-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8b5b9712783415695663bd463990e2f00c6750562e6ad1d28e072a611c5f2a6"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ffef8fd58a36fb5f1196919638f73dd3ae0db1a878982b27a9a5a176ede4ba91"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4e67d26532bfd8b7f7c05d5a766d6f437b362c1bf203a3a5ce3593a645e870b8"}, + {file = "cffi-1.17.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:45f7cd36186db767d803b1473b3c659d57a23b5fa491ad83c6d40f2af58e4dbb"}, + {file = "cffi-1.17.0-cp311-cp311-win32.whl", hash = "sha256:a9015f5b8af1bb6837a3fcb0cdf3b874fe3385ff6274e8b7925d81ccaec3c5c9"}, + {file = "cffi-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:b50aaac7d05c2c26dfd50c3321199f019ba76bb650e346a6ef3616306eed67b0"}, + {file = "cffi-1.17.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:aec510255ce690d240f7cb23d7114f6b351c733a74c279a84def763660a2c3bc"}, + {file = "cffi-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2770bb0d5e3cc0e31e7318db06efcbcdb7b31bcb1a70086d3177692a02256f59"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:db9a30ec064129d605d0f1aedc93e00894b9334ec74ba9c6bdd08147434b33eb"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a47eef975d2b8b721775a0fa286f50eab535b9d56c70a6e62842134cf7841195"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f3e0992f23bbb0be00a921eae5363329253c3b86287db27092461c887b791e5e"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6107e445faf057c118d5050560695e46d272e5301feffda3c41849641222a828"}, + {file = "cffi-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb862356ee9391dc5a0b3cbc00f416b48c1b9a52d252d898e5b7696a5f9fe150"}, + {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c1c13185b90bbd3f8b5963cd8ce7ad4ff441924c31e23c975cb150e27c2bf67a"}, + {file = "cffi-1.17.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:17c6d6d3260c7f2d94f657e6872591fe8733872a86ed1345bda872cfc8c74885"}, + {file = "cffi-1.17.0-cp312-cp312-win32.whl", hash = "sha256:c3b8bd3133cd50f6b637bb4322822c94c5ce4bf0d724ed5ae70afce62187c492"}, + {file = "cffi-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:dca802c8db0720ce1c49cce1149ff7b06e91ba15fa84b1d59144fef1a1bc7ac2"}, + {file = "cffi-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:6ce01337d23884b21c03869d2f68c5523d43174d4fc405490eb0091057943118"}, + {file = "cffi-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cab2eba3830bf4f6d91e2d6718e0e1c14a2f5ad1af68a89d24ace0c6b17cced7"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:14b9cbc8f7ac98a739558eb86fabc283d4d564dafed50216e7f7ee62d0d25377"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b00e7bcd71caa0282cbe3c90966f738e2db91e64092a877c3ff7f19a1628fdcb"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:41f4915e09218744d8bae14759f983e466ab69b178de38066f7579892ff2a555"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e4760a68cab57bfaa628938e9c2971137e05ce48e762a9cb53b76c9b569f1204"}, + {file = "cffi-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:011aff3524d578a9412c8b3cfaa50f2c0bd78e03eb7af7aa5e0df59b158efb2f"}, + {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:a003ac9edc22d99ae1286b0875c460351f4e101f8c9d9d2576e78d7e048f64e0"}, + {file = "cffi-1.17.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ef9528915df81b8f4c7612b19b8628214c65c9b7f74db2e34a646a0a2a0da2d4"}, + {file = "cffi-1.17.0-cp313-cp313-win32.whl", hash = "sha256:70d2aa9fb00cf52034feac4b913181a6e10356019b18ef89bc7c12a283bf5f5a"}, + {file = "cffi-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:b7b6ea9e36d32582cda3465f54c4b454f62f23cb083ebc7a94e2ca6ef011c3a7"}, + {file = "cffi-1.17.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:964823b2fc77b55355999ade496c54dde161c621cb1f6eac61dc30ed1b63cd4c"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:516a405f174fd3b88829eabfe4bb296ac602d6a0f68e0d64d5ac9456194a5b7e"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dec6b307ce928e8e112a6bb9921a1cb00a0e14979bf28b98e084a4b8a742bd9b"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e4094c7b464cf0a858e75cd14b03509e84789abf7b79f8537e6a72152109c76e"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2404f3de742f47cb62d023f0ba7c5a916c9c653d5b368cc966382ae4e57da401"}, + {file = "cffi-1.17.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3aa9d43b02a0c681f0bfbc12d476d47b2b2b6a3f9287f11ee42989a268a1833c"}, + {file = "cffi-1.17.0-cp38-cp38-win32.whl", hash = "sha256:0bb15e7acf8ab35ca8b24b90af52c8b391690ef5c4aec3d31f38f0d37d2cc499"}, + {file = "cffi-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:93a7350f6706b31f457c1457d3a3259ff9071a66f312ae64dc024f049055f72c"}, + {file = "cffi-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1a2ddbac59dc3716bc79f27906c010406155031a1c801410f1bafff17ea304d2"}, + {file = "cffi-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6327b572f5770293fc062a7ec04160e89741e8552bf1c358d1a23eba68166759"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbc183e7bef690c9abe5ea67b7b60fdbca81aa8da43468287dae7b5c046107d4"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bdc0f1f610d067c70aa3737ed06e2726fd9d6f7bfee4a351f4c40b6831f4e82"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6d872186c1617d143969defeadac5a904e6e374183e07977eedef9c07c8953bf"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0d46ee4764b88b91f16661a8befc6bfb24806d885e27436fdc292ed7e6f6d058"}, + {file = "cffi-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f76a90c345796c01d85e6332e81cab6d70de83b829cf1d9762d0a3da59c7932"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0e60821d312f99d3e1569202518dddf10ae547e799d75aef3bca3a2d9e8ee693"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:eb09b82377233b902d4c3fbeeb7ad731cdab579c6c6fda1f763cd779139e47c3"}, + {file = "cffi-1.17.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:24658baf6224d8f280e827f0a50c46ad819ec8ba380a42448e24459daf809cf4"}, + {file = "cffi-1.17.0-cp39-cp39-win32.whl", hash = "sha256:0fdacad9e0d9fc23e519efd5ea24a70348305e8d7d85ecbb1a5fa66dc834e7fb"}, + {file = "cffi-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:7cbc78dc018596315d4e7841c8c3a7ae31cc4d638c9b627f87d52e8abaaf2d29"}, + {file = "cffi-1.17.0.tar.gz", hash = "sha256:f3157624b7558b914cb039fd1af735e5e8049a87c817cc215109ad1c8779df76"}, +] + +[package.dependencies] +pycparser = "*" + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "click" version = "8.1.7" @@ -96,6 +274,55 @@ files = [ {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, ] +[[package]] +name = "cryptography" +version = "43.0.0" +description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." +optional = false +python-versions = ">=3.7" +files = [ + {file = "cryptography-43.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:64c3f16e2a4fc51c0d06af28441881f98c5d91009b8caaff40cf3548089e9c74"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dcdedae5c7710b9f97ac6bba7e1052b95c7083c9d0e9df96e02a1932e777895"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d9a1eca329405219b605fac09ecfc09ac09e595d6def650a437523fcd08dd22"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ea9e57f8ea880eeea38ab5abf9fbe39f923544d7884228ec67d666abd60f5a47"}, + {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9a8d6802e0825767476f62aafed40532bd435e8a5f7d23bd8b4f5fd04cc80ecf"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cc70b4b581f28d0a254d006f26949245e3657d40d8857066c2ae22a61222ef55"}, + {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a997df8c1c2aae1e1e5ac49c2e4f610ad037fc5a3aadc7b64e39dea42249431"}, + {file = "cryptography-43.0.0-cp37-abi3-win32.whl", hash = "sha256:6e2b11c55d260d03a8cf29ac9b5e0608d35f08077d8c087be96287f43af3ccdc"}, + {file = "cryptography-43.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:31e44a986ceccec3d0498e16f3d27b2ee5fdf69ce2ab89b52eaad1d2f33d8778"}, + {file = "cryptography-43.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:7b3f5fe74a5ca32d4d0f302ffe6680fcc5c28f8ef0dc0ae8f40c0f3a1b4fca66"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac1955ce000cb29ab40def14fd1bbfa7af2017cca696ee696925615cafd0dce5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:299d3da8e00b7e2b54bb02ef58d73cd5f55fb31f33ebbf33bd00d9aa6807df7e"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ee0c405832ade84d4de74b9029bedb7b31200600fa524d218fc29bfa371e97f5"}, + {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb013933d4c127349b3948aa8aaf2f12c0353ad0eccd715ca789c8a0f671646f"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fdcb265de28585de5b859ae13e3846a8e805268a823a12a4da2597f1f5afc9f0"}, + {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2905ccf93a8a2a416f3ec01b1a7911c3fe4073ef35640e7ee5296754e30b762b"}, + {file = "cryptography-43.0.0-cp39-abi3-win32.whl", hash = "sha256:47ca71115e545954e6c1d207dd13461ab81f4eccfcb1345eac874828b5e3eaaf"}, + {file = "cryptography-43.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:0663585d02f76929792470451a5ba64424acc3cd5227b03921dab0e2f27b1709"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c6d112bf61c5ef44042c253e4859b3cbbb50df2f78fa8fae6747a7814484a70"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:844b6d608374e7d08f4f6e6f9f7b951f9256db41421917dfb2d003dde4cd6b66"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:51956cf8730665e2bdf8ddb8da0056f699c1a5715648c1b0144670c1ba00b48f"}, + {file = "cryptography-43.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:aae4d918f6b180a8ab8bf6511a419473d107df4dbb4225c7b48c5c9602c38c7f"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:232ce02943a579095a339ac4b390fbbe97f5b5d5d107f8a08260ea2768be8cc2"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5bcb8a5620008a8034d39bce21dc3e23735dfdb6a33a06974739bfa04f853947"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:08a24a7070b2b6804c1940ff0f910ff728932a9d0e80e7814234269f9d46d069"}, + {file = "cryptography-43.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e9c5266c432a1e23738d178e51c2c7a5e2ddf790f248be939448c0ba2021f9d1"}, + {file = "cryptography-43.0.0.tar.gz", hash = "sha256:b88075ada2d51aa9f18283532c9f60e72170041bba88d7f37e49cbb10275299e"}, +] + +[package.dependencies] +cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} + +[package.extras] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] +docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] +nox = ["nox"] +pep8test = ["check-sdist", "click", "mypy", "ruff"] +sdist = ["build"] +ssh = ["bcrypt (>=3.1.5)"] +test = ["certifi", "cryptography-vectors (==43.0.0)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test-randomorder = ["pytest-randomly"] + [[package]] name = "dateparser" version = "1.2.0" @@ -118,6 +345,37 @@ calendars = ["convertdate", "hijri-converter"] fasttext = ["fasttext"] langdetect = ["langdetect"] +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + +[[package]] +name = "deprecation" +version = "2.1.0" +description = "A library to handle automated deprecations" +optional = false +python-versions = "*" +files = [ + {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, + {file = "deprecation-2.1.0.tar.gz", hash = "sha256:72b3bde64e5d778694b0cf68178aed03d15e15477116add3fb773e581f9518ff"}, +] + +[package.dependencies] +packaging = "*" + [[package]] name = "dill" version = "0.3.8" @@ -135,13 +393,13 @@ profile = ["gprof2dot (>=2022.7.29)"] [[package]] name = "exceptiongroup" -version = "1.2.1" +version = "1.2.2" description = "Backport of PEP 654 (exception groups)" optional = false python-versions = ">=3.7" files = [ - {file = "exceptiongroup-1.2.1-py3-none-any.whl", hash = "sha256:5258b9ed329c5bbdd31a309f53cbfb0b155341807f6ff7606a1e801a891b29ad"}, - {file = "exceptiongroup-1.2.1.tar.gz", hash = "sha256:a4785e48b045528f5bfe627b6ad554ff32def154f42372786903b7abcfe1aa16"}, + {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"}, + {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"}, ] [package.extras] @@ -196,6 +454,23 @@ files = [ {file = "git_cliff-2.5.0.tar.gz", hash = "sha256:9b73aba7bb0401ad67565ba1edec055aea765490ca3cf14e2da9200600ae4b51"}, ] +[[package]] +name = "img2pdf" +version = "0.5.1" +description = "Convert images to PDF via direct JPEG inclusion." +optional = false +python-versions = "*" +files = [ + {file = "img2pdf-0.5.1.tar.gz", hash = "sha256:73847e47242f4b5bd113c70049e03e03212936c2727cd2a8bf564229a67d0b95"}, +] + +[package.dependencies] +pikepdf = "*" +Pillow = "*" + +[package.extras] +gui = ["tkinter"] + [[package]] name = "iniconfig" version = "2.0.0" @@ -221,6 +496,184 @@ files = [ [package.extras] colors = ["colorama (>=0.4.6)"] +[[package]] +name = "lxml" +version = "5.3.0" +description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." +optional = false +python-versions = ">=3.6" +files = [ + {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd36439be765e2dde7660212b5275641edbc813e7b24668831a5c8ac91180656"}, + {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ae5fe5c4b525aa82b8076c1a59d642c17b6e8739ecf852522c6321852178119d"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:501d0d7e26b4d261fca8132854d845e4988097611ba2531408ec91cf3fd9d20a"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb66442c2546446944437df74379e9cf9e9db353e61301d1a0e26482f43f0dd8"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9e41506fec7a7f9405b14aa2d5c8abbb4dbbd09d88f9496958b6d00cb4d45330"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f7d4a670107d75dfe5ad080bed6c341d18c4442f9378c9f58e5851e86eb79965"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41ce1f1e2c7755abfc7e759dc34d7d05fd221723ff822947132dc934d122fe22"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:44264ecae91b30e5633013fb66f6ddd05c006d3e0e884f75ce0b4755b3e3847b"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_ppc64le.whl", hash = "sha256:3c174dc350d3ec52deb77f2faf05c439331d6ed5e702fc247ccb4e6b62d884b7"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_s390x.whl", hash = "sha256:2dfab5fa6a28a0b60a20638dc48e6343c02ea9933e3279ccb132f555a62323d8"}, + {file = "lxml-5.3.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:b1c8c20847b9f34e98080da785bb2336ea982e7f913eed5809e5a3c872900f32"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:2c86bf781b12ba417f64f3422cfc302523ac9cd1d8ae8c0f92a1c66e56ef2e86"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c162b216070f280fa7da844531169be0baf9ccb17263cf5a8bf876fcd3117fa5"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:36aef61a1678cb778097b4a6eeae96a69875d51d1e8f4d4b491ab3cfb54b5a03"}, + {file = "lxml-5.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f65e5120863c2b266dbcc927b306c5b78e502c71edf3295dfcb9501ec96e5fc7"}, + {file = "lxml-5.3.0-cp310-cp310-win32.whl", hash = "sha256:ef0c1fe22171dd7c7c27147f2e9c3e86f8bdf473fed75f16b0c2e84a5030ce80"}, + {file = "lxml-5.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:052d99051e77a4f3e8482c65014cf6372e61b0a6f4fe9edb98503bb5364cfee3"}, + {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:74bcb423462233bc5d6066e4e98b0264e7c1bed7541fff2f4e34fe6b21563c8b"}, + {file = "lxml-5.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a3d819eb6f9b8677f57f9664265d0a10dd6551d227afb4af2b9cd7bdc2ccbf18"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b8f5db71b28b8c404956ddf79575ea77aa8b1538e8b2ef9ec877945b3f46442"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2c3406b63232fc7e9b8783ab0b765d7c59e7c59ff96759d8ef9632fca27c7ee4"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2ecdd78ab768f844c7a1d4a03595038c166b609f6395e25af9b0f3f26ae1230f"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:168f2dfcfdedf611eb285efac1516c8454c8c99caf271dccda8943576b67552e"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa617107a410245b8660028a7483b68e7914304a6d4882b5ff3d2d3eb5948d8c"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:69959bd3167b993e6e710b99051265654133a98f20cec1d9b493b931942e9c16"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_ppc64le.whl", hash = "sha256:bd96517ef76c8654446fc3db9242d019a1bb5fe8b751ba414765d59f99210b79"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_s390x.whl", hash = "sha256:ab6dd83b970dc97c2d10bc71aa925b84788c7c05de30241b9e96f9b6d9ea3080"}, + {file = "lxml-5.3.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:eec1bb8cdbba2925bedc887bc0609a80e599c75b12d87ae42ac23fd199445654"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a7095eeec6f89111d03dabfe5883a1fd54da319c94e0fb104ee8f23616b572d"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6f651ebd0b21ec65dfca93aa629610a0dbc13dbc13554f19b0113da2e61a4763"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:f422a209d2455c56849442ae42f25dbaaba1c6c3f501d58761c619c7836642ec"}, + {file = "lxml-5.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:62f7fdb0d1ed2065451f086519865b4c90aa19aed51081979ecd05a21eb4d1be"}, + {file = "lxml-5.3.0-cp311-cp311-win32.whl", hash = "sha256:c6379f35350b655fd817cd0d6cbeef7f265f3ae5fedb1caae2eb442bbeae9ab9"}, + {file = "lxml-5.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:9c52100e2c2dbb0649b90467935c4b0de5528833c76a35ea1a2691ec9f1ee7a1"}, + {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e99f5507401436fdcc85036a2e7dc2e28d962550afe1cbfc07c40e454256a859"}, + {file = "lxml-5.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:384aacddf2e5813a36495233b64cb96b1949da72bef933918ba5c84e06af8f0e"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:874a216bf6afaf97c263b56371434e47e2c652d215788396f60477540298218f"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:65ab5685d56914b9a2a34d67dd5488b83213d680b0c5d10b47f81da5a16b0b0e"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aac0bbd3e8dd2d9c45ceb82249e8bdd3ac99131a32b4d35c8af3cc9db1657179"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b369d3db3c22ed14c75ccd5af429086f166a19627e84a8fdade3f8f31426e52a"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c24037349665434f375645fa9d1f5304800cec574d0310f618490c871fd902b3"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:62d172f358f33a26d6b41b28c170c63886742f5b6772a42b59b4f0fa10526cb1"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:c1f794c02903c2824fccce5b20c339a1a14b114e83b306ff11b597c5f71a1c8d"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:5d6a6972b93c426ace71e0be9a6f4b2cfae9b1baed2eed2006076a746692288c"}, + {file = "lxml-5.3.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:3879cc6ce938ff4eb4900d901ed63555c778731a96365e53fadb36437a131a99"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:74068c601baff6ff021c70f0935b0c7bc528baa8ea210c202e03757c68c5a4ff"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:ecd4ad8453ac17bc7ba3868371bffb46f628161ad0eefbd0a855d2c8c32dd81a"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7e2f58095acc211eb9d8b5771bf04df9ff37d6b87618d1cbf85f92399c98dae8"}, + {file = "lxml-5.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e63601ad5cd8f860aa99d109889b5ac34de571c7ee902d6812d5d9ddcc77fa7d"}, + {file = "lxml-5.3.0-cp312-cp312-win32.whl", hash = "sha256:17e8d968d04a37c50ad9c456a286b525d78c4a1c15dd53aa46c1d8e06bf6fa30"}, + {file = "lxml-5.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:c1a69e58a6bb2de65902051d57fde951febad631a20a64572677a1052690482f"}, + {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8c72e9563347c7395910de6a3100a4840a75a6f60e05af5e58566868d5eb2d6a"}, + {file = "lxml-5.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e92ce66cd919d18d14b3856906a61d3f6b6a8500e0794142338da644260595cd"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d04f064bebdfef9240478f7a779e8c5dc32b8b7b0b2fc6a62e39b928d428e51"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c2fb570d7823c2bbaf8b419ba6e5662137f8166e364a8b2b91051a1fb40ab8b"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0c120f43553ec759f8de1fee2f4794452b0946773299d44c36bfe18e83caf002"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:562e7494778a69086f0312ec9689f6b6ac1c6b65670ed7d0267e49f57ffa08c4"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:423b121f7e6fa514ba0c7918e56955a1d4470ed35faa03e3d9f0e3baa4c7e492"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:c00f323cc00576df6165cc9d21a4c21285fa6b9989c5c39830c3903dc4303ef3"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_ppc64le.whl", hash = "sha256:1fdc9fae8dd4c763e8a31e7630afef517eab9f5d5d31a278df087f307bf601f4"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_s390x.whl", hash = "sha256:658f2aa69d31e09699705949b5fc4719cbecbd4a97f9656a232e7d6c7be1a367"}, + {file = "lxml-5.3.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:1473427aff3d66a3fa2199004c3e601e6c4500ab86696edffdbc84954c72d832"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a87de7dd873bf9a792bf1e58b1c3887b9264036629a5bf2d2e6579fe8e73edff"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:0d7b36afa46c97875303a94e8f3ad932bf78bace9e18e603f2085b652422edcd"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:cf120cce539453ae086eacc0130a324e7026113510efa83ab42ef3fcfccac7fb"}, + {file = "lxml-5.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:df5c7333167b9674aa8ae1d4008fa4bc17a313cc490b2cca27838bbdcc6bb15b"}, + {file = "lxml-5.3.0-cp313-cp313-win32.whl", hash = "sha256:c802e1c2ed9f0c06a65bc4ed0189d000ada8049312cfeab6ca635e39c9608957"}, + {file = "lxml-5.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:406246b96d552e0503e17a1006fd27edac678b3fcc9f1be71a2f94b4ff61528d"}, + {file = "lxml-5.3.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:8f0de2d390af441fe8b2c12626d103540b5d850d585b18fcada58d972b74a74e"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1afe0a8c353746e610bd9031a630a95bcfb1a720684c3f2b36c4710a0a96528f"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56b9861a71575f5795bde89256e7467ece3d339c9b43141dbdd54544566b3b94"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_28_x86_64.whl", hash = "sha256:9fb81d2824dff4f2e297a276297e9031f46d2682cafc484f49de182aa5e5df99"}, + {file = "lxml-5.3.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:2c226a06ecb8cdef28845ae976da407917542c5e6e75dcac7cc33eb04aaeb237"}, + {file = "lxml-5.3.0-cp36-cp36m-musllinux_1_2_x86_64.whl", hash = "sha256:7d3d1ca42870cdb6d0d29939630dbe48fa511c203724820fc0fd507b2fb46577"}, + {file = "lxml-5.3.0-cp36-cp36m-win32.whl", hash = "sha256:094cb601ba9f55296774c2d57ad68730daa0b13dc260e1f941b4d13678239e70"}, + {file = "lxml-5.3.0-cp36-cp36m-win_amd64.whl", hash = "sha256:eafa2c8658f4e560b098fe9fc54539f86528651f61849b22111a9b107d18910c"}, + {file = "lxml-5.3.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:cb83f8a875b3d9b458cada4f880fa498646874ba4011dc974e071a0a84a1b033"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:25f1b69d41656b05885aa185f5fdf822cb01a586d1b32739633679699f220391"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23e0553b8055600b3bf4a00b255ec5c92e1e4aebf8c2c09334f8368e8bd174d6"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ada35dd21dc6c039259596b358caab6b13f4db4d4a7f8665764d616daf9cc1d"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_aarch64.whl", hash = "sha256:81b4e48da4c69313192d8c8d4311e5d818b8be1afe68ee20f6385d0e96fc9512"}, + {file = "lxml-5.3.0-cp37-cp37m-manylinux_2_28_x86_64.whl", hash = "sha256:2bc9fd5ca4729af796f9f59cd8ff160fe06a474da40aca03fcc79655ddee1a8b"}, + {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:07da23d7ee08577760f0a71d67a861019103e4812c87e2fab26b039054594cc5"}, + {file = "lxml-5.3.0-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:ea2e2f6f801696ad7de8aec061044d6c8c0dd4037608c7cab38a9a4d316bfb11"}, + {file = "lxml-5.3.0-cp37-cp37m-win32.whl", hash = "sha256:5c54afdcbb0182d06836cc3d1be921e540be3ebdf8b8a51ee3ef987537455f84"}, + {file = "lxml-5.3.0-cp37-cp37m-win_amd64.whl", hash = "sha256:f2901429da1e645ce548bf9171784c0f74f0718c3f6150ce166be39e4dd66c3e"}, + {file = "lxml-5.3.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c56a1d43b2f9ee4786e4658c7903f05da35b923fb53c11025712562d5cc02753"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ee8c39582d2652dcd516d1b879451500f8db3fe3607ce45d7c5957ab2596040"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdf3a3059611f7585a78ee10399a15566356116a4288380921a4b598d807a22"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:146173654d79eb1fc97498b4280c1d3e1e5d58c398fa530905c9ea50ea849b22"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:0a7056921edbdd7560746f4221dca89bb7a3fe457d3d74267995253f46343f15"}, + {file = "lxml-5.3.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:9e4b47ac0f5e749cfc618efdf4726269441014ae1d5583e047b452a32e221920"}, + {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f914c03e6a31deb632e2daa881fe198461f4d06e57ac3d0e05bbcab8eae01945"}, + {file = "lxml-5.3.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:213261f168c5e1d9b7535a67e68b1f59f92398dd17a56d934550837143f79c42"}, + {file = "lxml-5.3.0-cp38-cp38-win32.whl", hash = "sha256:218c1b2e17a710e363855594230f44060e2025b05c80d1f0661258142b2add2e"}, + {file = "lxml-5.3.0-cp38-cp38-win_amd64.whl", hash = "sha256:315f9542011b2c4e1d280e4a20ddcca1761993dda3afc7a73b01235f8641e903"}, + {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1ffc23010330c2ab67fac02781df60998ca8fe759e8efde6f8b756a20599c5de"}, + {file = "lxml-5.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:2b3778cb38212f52fac9fe913017deea2fdf4eb1a4f8e4cfc6b009a13a6d3fcc"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b0c7a688944891086ba192e21c5229dea54382f4836a209ff8d0a660fac06be"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:747a3d3e98e24597981ca0be0fd922aebd471fa99d0043a3842d00cdcad7ad6a"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86a6b24b19eaebc448dc56b87c4865527855145d851f9fc3891673ff97950540"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b11a5d918a6216e521c715b02749240fb07ae5a1fefd4b7bf12f833bc8b4fe70"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68b87753c784d6acb8a25b05cb526c3406913c9d988d51f80adecc2b0775d6aa"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:109fa6fede314cc50eed29e6e56c540075e63d922455346f11e4d7a036d2b8cf"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_ppc64le.whl", hash = "sha256:02ced472497b8362c8e902ade23e3300479f4f43e45f4105c85ef43b8db85229"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_s390x.whl", hash = "sha256:6b038cc86b285e4f9fea2ba5ee76e89f21ed1ea898e287dc277a25884f3a7dfe"}, + {file = "lxml-5.3.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:7437237c6a66b7ca341e868cda48be24b8701862757426852c9b3186de1da8a2"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:7f41026c1d64043a36fda21d64c5026762d53a77043e73e94b71f0521939cc71"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:482c2f67761868f0108b1743098640fbb2a28a8e15bf3f47ada9fa59d9fe08c3"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1483fd3358963cc5c1c9b122c80606a3a79ee0875bcac0204149fa09d6ff2727"}, + {file = "lxml-5.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dec2d1130a9cda5b904696cec33b2cfb451304ba9081eeda7f90f724097300a"}, + {file = "lxml-5.3.0-cp39-cp39-win32.whl", hash = "sha256:a0eabd0a81625049c5df745209dc7fcef6e2aea7793e5f003ba363610aa0a3ff"}, + {file = "lxml-5.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:89e043f1d9d341c52bf2af6d02e6adde62e0a46e6755d5eb60dc6e4f0b8aeca2"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7b1cd427cb0d5f7393c31b7496419da594fe600e6fdc4b105a54f82405e6626c"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51806cfe0279e06ed8500ce19479d757db42a30fd509940b1701be9c86a5ff9a"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee70d08fd60c9565ba8190f41a46a54096afa0eeb8f76bd66f2c25d3b1b83005"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:8dc2c0395bea8254d8daebc76dcf8eb3a95ec2a46fa6fae5eaccee366bfe02ce"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6ba0d3dcac281aad8a0e5b14c7ed6f9fa89c8612b47939fc94f80b16e2e9bc83"}, + {file = "lxml-5.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:6e91cf736959057f7aac7adfc83481e03615a8e8dd5758aa1d95ea69e8931dba"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:94d6c3782907b5e40e21cadf94b13b0842ac421192f26b84c45f13f3c9d5dc27"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c300306673aa0f3ed5ed9372b21867690a17dba38c68c44b287437c362ce486b"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:78d9b952e07aed35fe2e1a7ad26e929595412db48535921c5013edc8aa4a35ce"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:01220dca0d066d1349bd6a1726856a78f7929f3878f7e2ee83c296c69495309e"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:2d9b8d9177afaef80c53c0a9e30fa252ff3036fb1c6494d427c066a4ce6a282f"}, + {file = "lxml-5.3.0-pp37-pypy37_pp73-win_amd64.whl", hash = "sha256:20094fc3f21ea0a8669dc4c61ed7fa8263bd37d97d93b90f28fc613371e7a875"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ace2c2326a319a0bb8a8b0e5b570c764962e95818de9f259ce814ee666603f19"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92e67a0be1639c251d21e35fe74df6bcc40cba445c2cda7c4a967656733249e2"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd5350b55f9fecddc51385463a4f67a5da829bc741e38cf689f38ec9023f54ab"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c1fefd7e3d00921c44dc9ca80a775af49698bbfd92ea84498e56acffd4c5469"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:71a8dd38fbd2f2319136d4ae855a7078c69c9a38ae06e0c17c73fd70fc6caad8"}, + {file = "lxml-5.3.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:97acf1e1fd66ab53dacd2c35b319d7e548380c2e9e8c54525c6e76d21b1ae3b1"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:68934b242c51eb02907c5b81d138cb977b2129a0a75a8f8b60b01cb8586c7b21"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b710bc2b8292966b23a6a0121f7a6c51d45d2347edcc75f016ac123b8054d3f2"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18feb4b93302091b1541221196a2155aa296c363fd233814fa11e181adebc52f"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:3eb44520c4724c2e1a57c0af33a379eee41792595023f367ba3952a2d96c2aab"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:609251a0ca4770e5a8768ff902aa02bf636339c5a93f9349b48eb1f606f7f3e9"}, + {file = "lxml-5.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:516f491c834eb320d6c843156440fe7fc0d50b33e44387fcec5b02f0bc118a4c"}, + {file = "lxml-5.3.0.tar.gz", hash = "sha256:4e109ca30d1edec1ac60cdbe341905dc3b8f55b16855e03a54aaf59e51ec8c6f"}, +] + +[package.extras] +cssselect = ["cssselect (>=0.7)"] +html-clean = ["lxml-html-clean"] +html5 = ["html5lib"] +htmlsoup = ["BeautifulSoup4"] +source = ["Cython (>=3.0.11)"] + +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "mccabe" version = "0.7.0" @@ -232,6 +685,17 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "mslex" version = "1.2.0" @@ -301,15 +765,44 @@ files = [ {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"}, ] +[[package]] +name = "ocrmypdf" +version = "16.5.0" +description = "OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched" +optional = false +python-versions = ">=3.10" +files = [ + {file = "ocrmypdf-16.5.0-py3-none-any.whl", hash = "sha256:9222b1b0818b65c891559b84efab2e84434c71149b3aaaa6dc654457e0b66b14"}, + {file = "ocrmypdf-16.5.0.tar.gz", hash = "sha256:cd96bddfb3a986be7bf7857757919332e1db5dab780eb7b321fdea38f60127ac"}, +] + +[package.dependencies] +deprecation = ">=2.1.0" +img2pdf = ">=0.5" +packaging = ">=20" +pdfminer-six = ">=20220319" +pi-heif = "*" +pikepdf = ">=8.10.1" +pillow = ">=10.0.1" +pluggy = ">=1" +rich = ">=13" + +[package.extras] +docs = ["sphinx", "sphinx-issues", "sphinx-rtd-theme"] +extended-test = ["pymupdf (>=1.19.1)"] +test = ["coverage[toml] (>=6.2)", "hypothesis (>=6.36.0)", "pytest (>=6.2.5)", "pytest-cov (>=3.0.0)", "pytest-xdist (>=2.5.0)", "python-xmp-toolkit (==2.0.1)", "reportlab (>=3.6.8)", "types-humanfriendly", "types-pillow"] +watcher = ["python-dotenv", "typer-slim[standard]", "watchdog (>=1.0.2)"] +webservice = ["flask (>=2.0.1)"] + [[package]] name = "packaging" -version = "24.0" +version = "24.1" description = "Core utilities for Python packages" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "packaging-24.0-py3-none-any.whl", hash = "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5"}, - {file = "packaging-24.0.tar.gz", hash = "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9"}, + {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, + {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, ] [[package]] @@ -323,6 +816,26 @@ files = [ {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, ] +[[package]] +name = "pdfminer-six" +version = "20240706" +description = "PDF parser and analyzer" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pdfminer.six-20240706-py3-none-any.whl", hash = "sha256:f4f70e74174b4b3542fcb8406a210b6e2e27cd0f0b5fd04534a8cc0d8951e38c"}, + {file = "pdfminer.six-20240706.tar.gz", hash = "sha256:c631a46d5da957a9ffe4460c5dce21e8431dabb615fee5f9f4400603a58d95a6"}, +] + +[package.dependencies] +charset-normalizer = ">=2.0.0" +cryptography = ">=36.0.0" + +[package.extras] +dev = ["atheris", "black", "mypy (==0.931)", "nox", "pytest"] +docs = ["sphinx", "sphinx-argparse"] +image = ["Pillow"] + [[package]] name = "pdftotext" version = "2.2.2" @@ -333,6 +846,237 @@ files = [ {file = "pdftotext-2.2.2.tar.gz", hash = "sha256:2a9aa89bc62022408781b39d188fabf5a3ad1103b6630f32c4e27e395f7966ee"}, ] +[[package]] +name = "pi-heif" +version = "0.18.0" +description = "Python interface for libheif library" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pi_heif-0.18.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3c09d22ed75200372b8102debf4ba69d8f63c595870505b9188d6c9a9b48e1f2"}, + {file = "pi_heif-0.18.0-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:d7dc682acccd81857fd4b5849ebe7b9504e11eab493ffa0905ea25eaf5fb0f93"}, + {file = "pi_heif-0.18.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:573602d8c68f4ff93c4d35439d7566b3f2d4ab774925367aece20f9cd0ba243d"}, + {file = "pi_heif-0.18.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:886fbbda898559eba0843feca17e6c7e43c13336404817c6d07a01d4955c3d33"}, + {file = "pi_heif-0.18.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:34725b542bd2737be7e7909fff1fb6d39760d3d395a36ce6fae5280e88ba94a6"}, + {file = "pi_heif-0.18.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:aac4fc247139081b30581cadbea00bb4c4fb7274140eaa1147e22bcf7ece7525"}, + {file = "pi_heif-0.18.0-cp310-cp310-win_amd64.whl", hash = "sha256:5254dc3121d2a38036beae631aae620d0c942f03973ec134ae9827b60e7d5c0b"}, + {file = "pi_heif-0.18.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:e568a323548896848489035c5bb2e4de13df07fbdbd33831b165ff545066b97f"}, + {file = "pi_heif-0.18.0-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:a4b3690f03636944b13ab313d21ee90a46d5fa35a15d884563b0ff400b813042"}, + {file = "pi_heif-0.18.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7e0c3286f106f2d22d394b844c0e015f132567d70b31fef6d3cc846b8fe9dbc6"}, + {file = "pi_heif-0.18.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:74d4b07f0589df9fac138ecbcccd248217a12bbebd3443153158d7f54522e257"}, + {file = "pi_heif-0.18.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:cab6f7a00ccbcc3087d400a544e62ef30eff6339cf0d600588b92b1e7ca49d96"}, + {file = "pi_heif-0.18.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:f1b7c4daeaffb235e73fc54132f4aa8bcb229dcb463ac0b4def9e1aee5793165"}, + {file = "pi_heif-0.18.0-cp311-cp311-win_amd64.whl", hash = "sha256:2b892ebc898ca32c1a1ec9e72658c0d14de5ac31c1bd61a8aa66dc645080e32f"}, + {file = "pi_heif-0.18.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:64ed341f91763e29096b0ddb38b50d13879d06039889d458fc7dac6d5c03dd80"}, + {file = "pi_heif-0.18.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:71309d2a632c0b8716ccbbb9e413ee28b8439967c45c92de68888fe4acf80244"}, + {file = "pi_heif-0.18.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83548aa70e44fef865c2b2575ed949f2e6eba756b114ca6ad525ef56b5449d57"}, + {file = "pi_heif-0.18.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ff516f9f5118a8f2e47531611324e6a07848e4f1f17c5df485de734e50dee7e"}, + {file = "pi_heif-0.18.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f792a278335c278d2c092a62aaad3a7362021f9341f988b1b8b3ca4783651e49"}, + {file = "pi_heif-0.18.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:571d69be0088336c4251d7301f3fdc0fecab45e38286e71a23e64814489c5a15"}, + {file = "pi_heif-0.18.0-cp312-cp312-win_amd64.whl", hash = "sha256:0d5dd431dbf7be88267fbfb08623bcf2d16628cdcbc898bcc0e05412dc43fd26"}, + {file = "pi_heif-0.18.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:286a5d2b5036cf3da8f1a2e1ad54044aaabe4d46b178057323f5a6ce19417741"}, + {file = "pi_heif-0.18.0-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:79969f90a5a01b9a82b18bb0667392da733790585531b3183b7f375b9e88dbcd"}, + {file = "pi_heif-0.18.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18d113c14fecadb90c3d8838240120e6f93671618eb96d776f994b314f1f858c"}, + {file = "pi_heif-0.18.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fae39eec07f4b477c582ddd75d38610553c1b6d19cd6ce4a3ded4c7e0ee029ac"}, + {file = "pi_heif-0.18.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:24ca403e556c84ce0e36ea1477530f7854e71c2523eb1a97c91d5d9ce8bbc548"}, + {file = "pi_heif-0.18.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:742560127423bd179605325a41322df800ca02df768e872bfe189fe371f61578"}, + {file = "pi_heif-0.18.0-cp313-cp313-win_amd64.whl", hash = "sha256:3529f904f51594a613759ab610799ce34b615339d67e642843eec1ac7868814d"}, + {file = "pi_heif-0.18.0-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:2c912219964dc864e1454ab4f43d97cbf6a88d065410a16936e7c59b1290a7da"}, + {file = "pi_heif-0.18.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2af8ac6bd93e5df02b9f292a10664524844f37b39079e55aa9ef5857a3b0a22"}, + {file = "pi_heif-0.18.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad3f54dcc54a4c2ed1c58a135375330fe7b2ba2c2a8a816d3296c12e9d8c284c"}, + {file = "pi_heif-0.18.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:00a6d72ba2cc1477c8a909bfbbac4f5d931a25a88979077b231b76e7b9c80ba6"}, + {file = "pi_heif-0.18.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:45d360c3a056d9c81b0480a546f291bbc53caf70705f3a49d082e728735ed4ae"}, + {file = "pi_heif-0.18.0-cp38-cp38-win_amd64.whl", hash = "sha256:4ecb9031ad1cb7eed1591cba95420964557cff8fc63bab9bdc204d53301e502f"}, + {file = "pi_heif-0.18.0-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:6541a05177c3d8f00e56f4cc8ee9c681eb25fcdc917065acbc426847eb8aea97"}, + {file = "pi_heif-0.18.0-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:054cd3544e421b342b15b5eb8db4de222a09ca3ae441f4fa5943f80d9e65c5d6"}, + {file = "pi_heif-0.18.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1159f54d76b860cc27753c9925e2923959d8b5277372db946cb1078fa11ed1ea"}, + {file = "pi_heif-0.18.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3fa5366b2f555b6b3a56b09aa74f178a040edb174b29060d8d56c03eea154e43"}, + {file = "pi_heif-0.18.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ccd611653581f39c77ab8222a660e471e724d8f7c6f4e50760b10ce06769d9d8"}, + {file = "pi_heif-0.18.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:cfa979043be0d4ad1b37f6794fdff010cf69e5ada1ef74eef4a5b3983d3b8881"}, + {file = "pi_heif-0.18.0-cp39-cp39-win_amd64.whl", hash = "sha256:7acdd41dc72c01c1f2cfd91624a1c102ecc324fff6a501ab981c6f803f673b1b"}, + {file = "pi_heif-0.18.0-pp310-pypy310_pp73-macosx_12_0_x86_64.whl", hash = "sha256:6c7a28547e3f1e2f43b395d2764f693fcfa4eb8a4da0d5815c7eb3eeda745fbb"}, + {file = "pi_heif-0.18.0-pp310-pypy310_pp73-macosx_14_0_arm64.whl", hash = "sha256:c5bded35d1cefb594f6ce9d775e3e6b750a32926779f7b496f0f8d4992db09e1"}, + {file = "pi_heif-0.18.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4d88aba685051131f103a7afc428412abd7d09640719635f8880898b0e7aec97"}, + {file = "pi_heif-0.18.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a9a95f54cb3a473005572f7309666b71d03c1764134b2df0ed796744c7aa069"}, + {file = "pi_heif-0.18.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f19d8cdffbc5e8e9f3676839c8632ffd161d17f84f614cad9b98a58e27ffd3a7"}, + {file = "pi_heif-0.18.0-pp39-pypy39_pp73-macosx_12_0_x86_64.whl", hash = "sha256:0962b4cd828ad1ae94f9cd8e95ed0741cddcd19082cb97d5b69bfe1ac6623eb9"}, + {file = "pi_heif-0.18.0-pp39-pypy39_pp73-macosx_14_0_arm64.whl", hash = "sha256:86f7aad733292fea8a2869814117caf11ed424731bd90fe1693b2ccbfcc6bfed"}, + {file = "pi_heif-0.18.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8d0a7529225f1a25231d8f2cfd39f722c31e5396581eeeaa7a30793188e8b4f7"}, + {file = "pi_heif-0.18.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5424435551e606e1ac515de46a2b1c6d8e82c7a89473bb7cf9398368f051d675"}, + {file = "pi_heif-0.18.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fe0e424d08d59c5a1d74dfa7239b40a935b5a526305ebecd2c27755aa3442225"}, + {file = "pi_heif-0.18.0.tar.gz", hash = "sha256:0a690159607beaa6712f2c8abaa5168a22314d18f00a617d691548f5acba8070"}, +] + +[package.dependencies] +pillow = ">=10.1.0" + +[package.extras] +tests = ["defusedxml", "numpy", "packaging", "pympler", "pytest"] +tests-min = ["defusedxml", "packaging", "pytest"] + +[[package]] +name = "pikepdf" +version = "9.2.1" +description = "Read and write PDFs with Python, powered by qpdf" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pikepdf-9.2.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:e863185d6abadab140a7c3e152d9227afe495cf97d4738efc280896660249180"}, + {file = "pikepdf-9.2.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:d37ce8a4ade0cddf3827e13867208ffc8c161d38fdb12250b31e1b8cfa58ab1b"}, + {file = "pikepdf-9.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b9e9416da42da43f386244b2bab2a236830ccb11598b73fcd43d32fd234aaff"}, + {file = "pikepdf-9.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e1e47e80ecfd77dbfc6c7e807e78e5cce0c10d5bd7804c0d9064429d72af981c"}, + {file = "pikepdf-9.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9699fe058b44e59cdcd05bcadf9cfa8f5242b48e44f9a4772bb321cd74d8e339"}, + {file = "pikepdf-9.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c7e5c3a425de7db1fc13583883d2fa10119ce85071cc1d53344383498739254"}, + {file = "pikepdf-9.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:f3ecbc250254b61de2ca973e3d57acb07720e5a810ee0c81d33b051c76d22208"}, + {file = "pikepdf-9.2.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:6275467b7eacb6fb04f16727e90e6562c6bbf449ece4e57273956beb8f1cdacd"}, + {file = "pikepdf-9.2.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:d6f240b0c1da5b6656efa3daa087394ddce5b3ecc411b85efcfd7e7228a1bc26"}, + {file = "pikepdf-9.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:96ea92374d25481a2213403ae06c990ea41a1f35b0404dd072b7070dac76f41b"}, + {file = "pikepdf-9.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1a1314e4c4b2a28a1af1e700570b3c32c074cf363425768e8bc9f031438aee3"}, + {file = "pikepdf-9.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ceeac42bfb7227310e617e871d8f7ae6f304cf2783ca0131f3063c54ee1ecb73"}, + {file = "pikepdf-9.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:8a50c58bee394f69561ab2861f77ce763f91cf7af6c8a1919109bb33fe8ca669"}, + {file = "pikepdf-9.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:d360e64c31f73b16b78ca1e10e9d96f758b4a3fac195cd35f88a5f213808852e"}, + {file = "pikepdf-9.2.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:e199833ef11a64f22945a9a98d56a98968e988e407cb20d9fa8b6081075c9604"}, + {file = "pikepdf-9.2.1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:4c8bf24b8bf933f4022c6ace5ee757453e3dacb806a8e826461fd5f33ce15a70"}, + {file = "pikepdf-9.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32ef219737e53b48754acb45ad7840aee8403d97fc79539c26501a2d9089c91"}, + {file = "pikepdf-9.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6b1ee86850fddaea15afdde394109332f7dc63a156e52fb131f9b647b16f920"}, + {file = "pikepdf-9.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:fc0deac6dd356ef95fcf42db917cfe2c5375640295609924d4825052c2124509"}, + {file = "pikepdf-9.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f2e4d5632dc03a41d901e4feee474557145c4906d96cf6e7ae8106a85142d2eb"}, + {file = "pikepdf-9.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:c4eb22efae62b057a31ee4cb5574db8edfe15b185c8e89500eca8157fda15974"}, + {file = "pikepdf-9.2.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:01be001988ce0f6a5a89319f37fc14f27df75c4e332222ed8e993d14405acb02"}, + {file = "pikepdf-9.2.1-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:baaf78ed49e3cecfc4d30f2c7291d9b19bebe8a5f8e5940d7e7c93683b47a6f9"}, + {file = "pikepdf-9.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aefa94f8ea6371fc3cbf78f55f669efec6e28e317927e8dd8a237e19a7be50fb"}, + {file = "pikepdf-9.2.1-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:3efff6ffda819d4193dd8e63c6f304bf85f9ae961c0247dc0b716b7c74fb7094"}, + {file = "pikepdf-9.2.1-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:6e15689fd715e83ff555cbdb939a0453c6c94af9975ae9b3292dd68231014653"}, + {file = "pikepdf-9.2.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:755f559c206de5b3de0e35430ad28e50f37866d96a41b3ad41d7114660e1c58b"}, + {file = "pikepdf-9.2.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eb65a84fff25295707250b49f9e2d1186e9f6b4b7f828a0d9e7e2b65a7af6311"}, + {file = "pikepdf-9.2.1-cp38-cp38-win_amd64.whl", hash = "sha256:d209e4a9ba99a4460cf987f6cd8703a8723d8a62fc51451c4c1233eff07db02f"}, + {file = "pikepdf-9.2.1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:7fa15e5ff3e17dc6295d676d673787c79fec67cca59261a22ccf7604914170b1"}, + {file = "pikepdf-9.2.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:127e94632eb1ccd5d4d859511f084a0a314555cba621595a135915fc9e1710c5"}, + {file = "pikepdf-9.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e6b3083ef2e3c29af33fcdb73a9a61a8e4dbe540edb474c19b9866194c6bf25"}, + {file = "pikepdf-9.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:163600dcd8d158e9287934b65a516b469b153859ab029e40fb3a0eff16c7dd7a"}, + {file = "pikepdf-9.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d9ba6c639faac47a85817854d002e2f57683ffe65388a746af580c4a6521646c"}, + {file = "pikepdf-9.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c1b883e1ebe28fbc318ce5c971b3dca9b30621bc2fe1642c99cda76cf442c4a2"}, + {file = "pikepdf-9.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:c6ea5f623629478abaf1e25b1d0edcaee3d0408fd9061fb4f7dc24fb78a25302"}, + {file = "pikepdf-9.2.1-pp310-pypy310_pp73-macosx_12_0_x86_64.whl", hash = "sha256:0759842e47369fe5fa0d61de2ac9ff073895c75567f3efbc4aebc6c1cafee17e"}, + {file = "pikepdf-9.2.1-pp310-pypy310_pp73-macosx_14_0_arm64.whl", hash = "sha256:cd73d828799e41ee778606e30efd0c27be1e2420b1ed0c9cbc39299872ceed76"}, + {file = "pikepdf-9.2.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98ff348c97c7c641c2d2b741d60c8edf22e0fe76fa5c386cb351a3abd3f2a9b9"}, + {file = "pikepdf-9.2.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4a5c5ccccb5812a5be5b5cb66c8c8a6f796910ab89932a3048a4e66e5436bd01"}, + {file = "pikepdf-9.2.1-pp39-pypy39_pp73-macosx_12_0_x86_64.whl", hash = "sha256:1dd707e6159af953f5560138f695b3a1ae2e1a0750535be70a3b75a720279330"}, + {file = "pikepdf-9.2.1-pp39-pypy39_pp73-macosx_14_0_arm64.whl", hash = "sha256:61bb9dfe58ee3ee2a286ea4cd21af87e1853a2d1433b550e3f58faa005b6ea3a"}, + {file = "pikepdf-9.2.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:531b6685912eb630a7fe57c527c9b5636c50c543eb0cdb5807b139e0d7712696"}, + {file = "pikepdf-9.2.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c2c21c6a3d7ec96c7f9627ad61195eadff12659e3e00abe7156c34503189db47"}, + {file = "pikepdf-9.2.1.tar.gz", hash = "sha256:5e31aeb15ab21ba340a9013c1665e7ce85bd1f8167e6710c455d51f82c2e64e0"}, +] + +[package.dependencies] +Deprecated = "*" +lxml = ">=4.8" +packaging = "*" +Pillow = ">=10.0.1" + +[package.extras] +dev = ["pre-commit", "typer"] +docs = ["Sphinx (>=3)", "sphinx-autoapi", "sphinx-design", "sphinx-issues", "sphinx-rtd-theme", "tomli"] +mypy = ["lxml-stubs", "types-Pillow", "types-requests", "types-setuptools"] +test = ["attrs (>=20.2.0)", "coverage[toml]", "hypothesis (>=6.36)", "numpy (>=1.21.0)", "psutil (>=5.9)", "pybind11", "pytest (>=6.2.5)", "pytest-cov (>=3.0.0)", "pytest-timeout (>=2.1.0)", "pytest-xdist (>=2.5.0)", "python-dateutil (>=2.8.1)", "python-xmp-toolkit (>=2.0.1)", "tomli"] + +[[package]] +name = "pillow" +version = "10.4.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, + {file = "pillow-10.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:543f3dc61c18dafb755773efc89aae60d06b6596a63914107f75459cf984164d"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7928ecbf1ece13956b95d9cbcfc77137652b02763ba384d9ab508099a2eca856"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e4d49b85c4348ea0b31ea63bc75a9f3857869174e2bf17e7aba02945cd218e6f"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:6c762a5b0997f5659a5ef2266abc1d8851ad7749ad9a6a5506eb23d314e4f46b"}, + {file = "pillow-10.4.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:a985e028fc183bf12a77a8bbf36318db4238a3ded7fa9df1b9a133f1cb79f8fc"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:812f7342b0eee081eaec84d91423d1b4650bb9828eb53d8511bcef8ce5aecf1e"}, + {file = "pillow-10.4.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:ac1452d2fbe4978c2eec89fb5a23b8387aba707ac72810d9490118817d9c0b46"}, + {file = "pillow-10.4.0-cp310-cp310-win32.whl", hash = "sha256:bcd5e41a859bf2e84fdc42f4edb7d9aba0a13d29a2abadccafad99de3feff984"}, + {file = "pillow-10.4.0-cp310-cp310-win_amd64.whl", hash = "sha256:ecd85a8d3e79cd7158dec1c9e5808e821feea088e2f69a974db5edf84dc53141"}, + {file = "pillow-10.4.0-cp310-cp310-win_arm64.whl", hash = "sha256:ff337c552345e95702c5fde3158acb0625111017d0e5f24bf3acdb9cc16b90d1"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0a9ec697746f268507404647e531e92889890a087e03681a3606d9b920fbee3c"}, + {file = "pillow-10.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:dfe91cb65544a1321e631e696759491ae04a2ea11d36715eca01ce07284738be"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5dc6761a6efc781e6a1544206f22c80c3af4c8cf461206d46a1e6006e4429ff3"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e84b6cc6a4a3d76c153a6b19270b3526a5a8ed6b09501d3af891daa2a9de7d6"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:bbc527b519bd3aa9d7f429d152fea69f9ad37c95f0b02aebddff592688998abe"}, + {file = "pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:76a911dfe51a36041f2e756b00f96ed84677cdeb75d25c767f296c1c1eda1319"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:59291fb29317122398786c2d44427bbd1a6d7ff54017075b22be9d21aa59bd8d"}, + {file = "pillow-10.4.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:416d3a5d0e8cfe4f27f574362435bc9bae57f679a7158e0096ad2beb427b8696"}, + {file = "pillow-10.4.0-cp311-cp311-win32.whl", hash = "sha256:7086cc1d5eebb91ad24ded9f58bec6c688e9f0ed7eb3dbbf1e4800280a896496"}, + {file = "pillow-10.4.0-cp311-cp311-win_amd64.whl", hash = "sha256:cbed61494057c0f83b83eb3a310f0bf774b09513307c434d4366ed64f4128a91"}, + {file = "pillow-10.4.0-cp311-cp311-win_arm64.whl", hash = "sha256:f5f0c3e969c8f12dd2bb7e0b15d5c468b51e5017e01e2e867335c81903046a22"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_10_10_x86_64.whl", hash = "sha256:673655af3eadf4df6b5457033f086e90299fdd7a47983a13827acf7459c15d94"}, + {file = "pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:866b6942a92f56300012f5fbac71f2d610312ee65e22f1aa2609e491284e5597"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:29dbdc4207642ea6aad70fbde1a9338753d33fb23ed6956e706936706f52dd80"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf2342ac639c4cf38799a44950bbc2dfcb685f052b9e262f446482afaf4bffca"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:f5b92f4d70791b4a67157321c4e8225d60b119c5cc9aee8ecf153aace4aad4ef"}, + {file = "pillow-10.4.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:86dcb5a1eb778d8b25659d5e4341269e8590ad6b4e8b44d9f4b07f8d136c414a"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:780c072c2e11c9b2c7ca37f9a2ee8ba66f44367ac3e5c7832afcfe5104fd6d1b"}, + {file = "pillow-10.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37fb69d905be665f68f28a8bba3c6d3223c8efe1edf14cc4cfa06c241f8c81d9"}, + {file = "pillow-10.4.0-cp312-cp312-win32.whl", hash = "sha256:7dfecdbad5c301d7b5bde160150b4db4c659cee2b69589705b6f8a0c509d9f42"}, + {file = "pillow-10.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:1d846aea995ad352d4bdcc847535bd56e0fd88d36829d2c90be880ef1ee4668a"}, + {file = "pillow-10.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:e553cad5179a66ba15bb18b353a19020e73a7921296a7979c4a2b7f6a5cd57f9"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8bc1a764ed8c957a2e9cacf97c8b2b053b70307cf2996aafd70e91a082e70df3"}, + {file = "pillow-10.4.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:6209bb41dc692ddfee4942517c19ee81b86c864b626dbfca272ec0f7cff5d9fb"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bee197b30783295d2eb680b311af15a20a8b24024a19c3a26431ff83eb8d1f70"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ef61f5dd14c300786318482456481463b9d6b91ebe5ef12f405afbba77ed0be"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:297e388da6e248c98bc4a02e018966af0c5f92dfacf5a5ca22fa01cb3179bca0"}, + {file = "pillow-10.4.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e4db64794ccdf6cb83a59d73405f63adbe2a1887012e308828596100a0b2f6cc"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:bd2880a07482090a3bcb01f4265f1936a903d70bc740bfcb1fd4e8a2ffe5cf5a"}, + {file = "pillow-10.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4b35b21b819ac1dbd1233317adeecd63495f6babf21b7b2512d244ff6c6ce309"}, + {file = "pillow-10.4.0-cp313-cp313-win32.whl", hash = "sha256:551d3fd6e9dc15e4c1eb6fc4ba2b39c0c7933fa113b220057a34f4bb3268a060"}, + {file = "pillow-10.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:030abdbe43ee02e0de642aee345efa443740aa4d828bfe8e2eb11922ea6a21ea"}, + {file = "pillow-10.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b001114dd152cfd6b23befeb28d7aee43553e2402c9f159807bf55f33af8a8d"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_10_10_x86_64.whl", hash = "sha256:8d4d5063501b6dd4024b8ac2f04962d661222d120381272deea52e3fc52d3736"}, + {file = "pillow-10.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7c1ee6f42250df403c5f103cbd2768a28fe1a0ea1f0f03fe151c8741e1469c8b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15e02e9bb4c21e39876698abf233c8c579127986f8207200bc8a8f6bb27acf2"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a8d4bade9952ea9a77d0c3e49cbd8b2890a399422258a77f357b9cc9be8d680"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:43efea75eb06b95d1631cb784aa40156177bf9dd5b4b03ff38979e048258bc6b"}, + {file = "pillow-10.4.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:950be4d8ba92aca4b2bb0741285a46bfae3ca699ef913ec8416c1b78eadd64cd"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:d7480af14364494365e89d6fddc510a13e5a2c3584cb19ef65415ca57252fb84"}, + {file = "pillow-10.4.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:73664fe514b34c8f02452ffb73b7a92c6774e39a647087f83d67f010eb9a0cf0"}, + {file = "pillow-10.4.0-cp38-cp38-win32.whl", hash = "sha256:e88d5e6ad0d026fba7bdab8c3f225a69f063f116462c49892b0149e21b6c0a0e"}, + {file = "pillow-10.4.0-cp38-cp38-win_amd64.whl", hash = "sha256:5161eef006d335e46895297f642341111945e2c1c899eb406882a6c61a4357ab"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:0ae24a547e8b711ccaaf99c9ae3cd975470e1a30caa80a6aaee9a2f19c05701d"}, + {file = "pillow-10.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:298478fe4f77a4408895605f3482b6cc6222c018b2ce565c2b6b9c354ac3229b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:134ace6dc392116566980ee7436477d844520a26a4b1bd4053f6f47d096997fd"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:930044bb7679ab003b14023138b50181899da3f25de50e9dbee23b61b4de2126"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:c76e5786951e72ed3686e122d14c5d7012f16c8303a674d18cdcd6d89557fc5b"}, + {file = "pillow-10.4.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:b2724fdb354a868ddf9a880cb84d102da914e99119211ef7ecbdc613b8c96b3c"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:dbc6ae66518ab3c5847659e9988c3b60dc94ffb48ef9168656e0019a93dbf8a1"}, + {file = "pillow-10.4.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:06b2f7898047ae93fad74467ec3d28fe84f7831370e3c258afa533f81ef7f3df"}, + {file = "pillow-10.4.0-cp39-cp39-win32.whl", hash = "sha256:7970285ab628a3779aecc35823296a7869f889b8329c16ad5a71e4901a3dc4ef"}, + {file = "pillow-10.4.0-cp39-cp39-win_amd64.whl", hash = "sha256:961a7293b2457b405967af9c77dcaa43cc1a8cd50d23c532e62d48ab6cdd56f5"}, + {file = "pillow-10.4.0-cp39-cp39-win_arm64.whl", hash = "sha256:32cda9e3d601a52baccb2856b8ea1fc213c90b340c542dcef77140dfa3278a9e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5b4815f2e65b30f5fbae9dfffa8636d992d49705723fe86a3661806e069352d4"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:8f0aef4ef59694b12cadee839e2ba6afeab89c0f39a3adc02ed51d109117b8da"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f4727572e2918acaa9077c919cbbeb73bd2b3ebcfe033b72f858fc9fbef0026"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ff25afb18123cea58a591ea0244b92eb1e61a1fd497bf6d6384f09bc3262ec3e"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:dc3e2db6ba09ffd7d02ae9141cfa0ae23393ee7687248d46a7507b75d610f4f5"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:02a2be69f9c9b8c1e97cf2713e789d4e398c751ecfd9967c18d0ce304efbf885"}, + {file = "pillow-10.4.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0755ffd4a0c6f267cccbae2e9903d95477ca2f77c4fcf3a3a09570001856c8a5"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:a02364621fe369e06200d4a16558e056fe2805d3468350df3aef21e00d26214b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:1b5dea9831a90e9d0721ec417a80d4cbd7022093ac38a568db2dd78363b00908"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9b885f89040bb8c4a1573566bbb2f44f5c505ef6e74cec7ab9068c900047f04b"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87dd88ded2e6d74d31e1e0a99a726a6765cda32d00ba72dc37f0651f306daaa8"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:2db98790afc70118bd0255c2eeb465e9767ecf1f3c25f9a1abb8ffc8cfd1fe0a"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f7baece4ce06bade126fb84b8af1c33439a76d8a6fd818970215e0560ca28c27"}, + {file = "pillow-10.4.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cfdd747216947628af7b259d274771d84db2268ca062dd5faf373639d00113a3"}, + {file = "pillow-10.4.0.tar.gz", hash = "sha256:166c1cd4d24309b30d61f79f4a9114b7b2313d7450912277855ff5dfd7cd4a06"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=7.3)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + [[package]] name = "platformdirs" version = "4.2.2" @@ -394,13 +1138,24 @@ test = ["enum34", "ipaddress", "mock", "pywin32", "wmi"] [[package]] name = "pycodestyle" -version = "2.12.0" +version = "2.12.1" description = "Python style guide checker" optional = false python-versions = ">=3.8" files = [ - {file = "pycodestyle-2.12.0-py2.py3-none-any.whl", hash = "sha256:949a39f6b86c3e1515ba1787c2022131d165a8ad271b11370a8819aa070269e4"}, - {file = "pycodestyle-2.12.0.tar.gz", hash = "sha256:442f950141b4f43df752dd303511ffded3a04c2b6fb7f65980574f0c31e6e79c"}, + {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"}, + {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"}, +] + +[[package]] +name = "pycparser" +version = "2.22" +description = "C parser in Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, + {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"}, ] [[package]] @@ -557,6 +1312,20 @@ files = [ {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, ] +[[package]] +name = "pygments" +version = "2.18.0" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, + {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "pylint" version = "3.2.7" @@ -617,67 +1386,61 @@ pylint-plugin-utils = "*" [[package]] name = "pymupdf" -version = "1.24.9" +version = "1.24.10" description = "A high performance Python library for data extraction, analysis, conversion & manipulation of PDF (and other) documents." optional = false python-versions = ">=3.8" files = [ - {file = "PyMuPDF-1.24.9-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:da5d9699472bfd1de52975de3eb7efaf5190ac5801b9fc6bcccde603afbe6937"}, - {file = "PyMuPDF-1.24.9-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:3d1133983c7ac388a35bbab8dfc4c26a874c05edc47d2038961add2efa4639a8"}, - {file = "PyMuPDF-1.24.9-cp310-none-manylinux2014_aarch64.whl", hash = "sha256:94f2796a3dd1f0735d0717eb020d7c3c7313eaae8c9c1040022408c880931616"}, - {file = "PyMuPDF-1.24.9-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:5199567353d1543e6c21c626148f8ac9ebb14ce553f2c434fcb9b00e195e1e52"}, - {file = "PyMuPDF-1.24.9-cp310-none-musllinux_1_2_x86_64.whl", hash = "sha256:c97f0b2fb201c9d9bc0f15a901641174e8896a9ae9fbe0d5bb1a6f2315cc3ced"}, - {file = "PyMuPDF-1.24.9-cp310-none-win32.whl", hash = "sha256:00499b864a56a2168254dce3d0f12048b96e9b3bdd43fecace18a1572342c8d4"}, - {file = "PyMuPDF-1.24.9-cp310-none-win_amd64.whl", hash = "sha256:f074e501e883428e7d5480f732ea6a6bd17146f10ebefb9b84957fd32b79f0d4"}, - {file = "PyMuPDF-1.24.9-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:caf43ce86790f95049a5849f2802b5c412b865cd368ece89a39a54fc84aa45cd"}, - {file = "PyMuPDF-1.24.9-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:13d06161176e1d4e337f5b5e053b628e4531bab5effb269a83dc38d4deb8e659"}, - {file = "PyMuPDF-1.24.9-cp311-none-manylinux2014_aarch64.whl", hash = "sha256:7ab228dfb80002eb8612ffe71b50052d8b20d9364a3535e2fe43a0901ce41d40"}, - {file = "PyMuPDF-1.24.9-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:042ad205c7ef615d9fbab7078f6fa8d14f020ed2dfe3a79d803b6171318565b5"}, - {file = "PyMuPDF-1.24.9-cp311-none-musllinux_1_2_x86_64.whl", hash = "sha256:b4495833bb0300fc885491928f2cbdf96afb569205dcc256bb4c43e3d1fde7cb"}, - {file = "PyMuPDF-1.24.9-cp311-none-win32.whl", hash = "sha256:e53370f3679a7b013c2abb801bb566882dab1fb59646d4b0a717ee0d350c5ab1"}, - {file = "PyMuPDF-1.24.9-cp311-none-win_amd64.whl", hash = "sha256:454932e9c7b9cd3057ee83dfe805f551a1382b9e216e87a32eb44c6d6843f966"}, - {file = "PyMuPDF-1.24.9-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:93cc4908259f133c9dc88f5e77329c4b2dbc03fca83126b1efffedb67ade0fb9"}, - {file = "PyMuPDF-1.24.9-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:84e1516d4b3e40711b9a6dbaedd30e0a89d6a054ca408a56114ceb5a1461f0d1"}, - {file = "PyMuPDF-1.24.9-cp312-none-manylinux2014_aarch64.whl", hash = "sha256:d7cdddce8d214e65ed483a8a403da49984815e543c3ce4b539306570c4cfc453"}, - {file = "PyMuPDF-1.24.9-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:de8b330900c194efeedeb97adab25520479d101fc9aed50d7323dde08698ae24"}, - {file = "PyMuPDF-1.24.9-cp312-none-musllinux_1_2_x86_64.whl", hash = "sha256:41c92d69993e7614730205b75d7999b21ca0f929d31b2bb86a4b58d3b1b0451a"}, - {file = "PyMuPDF-1.24.9-cp312-none-win32.whl", hash = "sha256:a04af6f3f5f35cb62bc7b3c2e9cfff510aa56c39c53355ecfff40b7cb9773fef"}, - {file = "PyMuPDF-1.24.9-cp312-none-win_amd64.whl", hash = "sha256:e2828a79415ae3dd90c629697ace51db7f1e81f426fc2fc034c2151dbe58be6e"}, - {file = "PyMuPDF-1.24.9-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:241913d0c76aacb05acdd8a0e82b1105883ffe6ef3bb4d9742b41d3c5e84d2db"}, - {file = "PyMuPDF-1.24.9-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:ff70e26625b6cdd036e2c63b5d6c1897949c0e8b205cd756276f27baadaad340"}, - {file = "PyMuPDF-1.24.9-cp38-none-manylinux2014_aarch64.whl", hash = "sha256:8e29bc817afad511072371f24624c7c3b7485a9e656b6a65dc58fecdf5043b08"}, - {file = "PyMuPDF-1.24.9-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:d17ec6920f91c43b6e777a017f3aaf44b205a3216771db9e8aa46e78a703f8f6"}, - {file = "PyMuPDF-1.24.9-cp38-none-musllinux_1_2_x86_64.whl", hash = "sha256:5cec9d17fdcbd83fa2c90190c22f652a0a51275cf75a29068eea025fff076829"}, - {file = "PyMuPDF-1.24.9-cp38-none-win32.whl", hash = "sha256:4f7b19f5c0026db49b7be17901728ed15761c5aa2031f04b01f9eb2e54f1b50e"}, - {file = "PyMuPDF-1.24.9-cp38-none-win_amd64.whl", hash = "sha256:e4c867f1cde68ff0e9c7889ea27c4c2c67df80e776f82619888bb69d1e1b27cf"}, - {file = "PyMuPDF-1.24.9-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:b4f85c24050e3778be6c7c1f4d4965fd4385281264798df7b4301b78895053fd"}, - {file = "PyMuPDF-1.24.9-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:4e807010ef4e63cfb70dd88fe1fcd1d7e2b4e62ffa2b1dc53b35bc18bf939d8e"}, - {file = "PyMuPDF-1.24.9-cp39-none-manylinux2014_aarch64.whl", hash = "sha256:5dac888cc16981e385c886c26de6aabf914059215e028d14cd67767ff0c1288c"}, - {file = "PyMuPDF-1.24.9-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:de55817c02e06ff75233ce2487cc5ebcbf585acd694bb69500825ee37789ac79"}, - {file = "PyMuPDF-1.24.9-cp39-none-musllinux_1_2_x86_64.whl", hash = "sha256:49cb22196f11c2327f6345554db48cfb2e31ed4f073ca6a872f21ddc4b0619c1"}, - {file = "PyMuPDF-1.24.9-cp39-none-win32.whl", hash = "sha256:46b1f84816c666e1c82f4249c1e815e92c462633255d72da20751eaad125d0f0"}, - {file = "PyMuPDF-1.24.9-cp39-none-win_amd64.whl", hash = "sha256:4fa45474d63715c707e3c3a6ebeeee75fd7aaa180512b75863e437f6876dfa86"}, - {file = "PyMuPDF-1.24.9.tar.gz", hash = "sha256:3692a5e824f10dc09bbddabab207f7cd5979831e48dd2f4de1be21e441767473"}, + {file = "PyMuPDF-1.24.10-cp310-none-macosx_10_9_x86_64.whl", hash = "sha256:a320d44a96f0cc69364c4d7384205fdae57a511edb07e68627108546cfd3ca7d"}, + {file = "PyMuPDF-1.24.10-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:cd078bf6c5a016bb34dea0ed75030edaae1b00ca273f1fe41100c8c35a101abe"}, + {file = "PyMuPDF-1.24.10-cp310-none-manylinux2014_x86_64.whl", hash = "sha256:9b3188a6d47bb512f749e9612f81f7ddf01c70308de6bb81b77b3a76ac43f402"}, + {file = "PyMuPDF-1.24.10-cp310-none-musllinux_1_2_x86_64.whl", hash = "sha256:e52cc55055f08d9411cd991693fb1e6c7f665e92f7b79cdfa0c3d526b2e7f9bb"}, + {file = "PyMuPDF-1.24.10-cp310-none-win32.whl", hash = "sha256:117b8a2220ec31142f6a5e4ac0235b56f239d88e57e5cef3aa047bc70e4cd1e9"}, + {file = "PyMuPDF-1.24.10-cp310-none-win_amd64.whl", hash = "sha256:98b846dd044bf7242dcea6900b16eb430adbc3f1cef58d80c83a15ca7a35ef8b"}, + {file = "PyMuPDF-1.24.10-cp311-none-macosx_10_9_x86_64.whl", hash = "sha256:5fbd67cce759fc0126902137409cf9da6313b776c4d5ff0d5200f336350f86a3"}, + {file = "PyMuPDF-1.24.10-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:2b14dbdf7c415bb0fa849527abbe7b4f1f55ae23b9355d132951f634438c59ac"}, + {file = "PyMuPDF-1.24.10-cp311-none-manylinux2014_x86_64.whl", hash = "sha256:c0d1ccdc062ea9961063790831e838bc43fcf9a8436a8b9f55898addf97c0f86"}, + {file = "PyMuPDF-1.24.10-cp311-none-musllinux_1_2_x86_64.whl", hash = "sha256:f68671363be5a2ba104ab7d3bad821d2994cbe3f3408538bbc27d32e6dc9f923"}, + {file = "PyMuPDF-1.24.10-cp311-none-win32.whl", hash = "sha256:49f83556cd1a7d05b36a54ccc01fce324da8a4e6854e36cc5cd94d321e428565"}, + {file = "PyMuPDF-1.24.10-cp311-none-win_amd64.whl", hash = "sha256:05b8d360766b87f4abd186eba16a56b92bae513b2361b13f633fe6256329292e"}, + {file = "PyMuPDF-1.24.10-cp312-none-macosx_10_9_x86_64.whl", hash = "sha256:f323aa7bb55e0214e632bfe24fa140bd5dcfeac2d3977bdce46e760385140513"}, + {file = "PyMuPDF-1.24.10-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:50d2972558d25ce46a8634b58787b28dbeff9b3fe4299530fc9c8c9921061e83"}, + {file = "PyMuPDF-1.24.10-cp312-none-manylinux2014_x86_64.whl", hash = "sha256:cd78ee1ebefdfe72bc36fd4b731cc8c694eb8ef5337d8ea956b0e94cd88751fc"}, + {file = "PyMuPDF-1.24.10-cp312-none-musllinux_1_2_x86_64.whl", hash = "sha256:696eed91d2ee44e76277dfeb6bd904c84ae005378588949df6ed9be9e03b9817"}, + {file = "PyMuPDF-1.24.10-cp312-none-win32.whl", hash = "sha256:1e5413e1aeab2f18e1ca1b3ff17057a4a7c5cbf4ff14abc93203da88fc1a1dd8"}, + {file = "PyMuPDF-1.24.10-cp312-none-win_amd64.whl", hash = "sha256:227a4473fce8fa32b9268da68781048795503b67dc045867fc201e1334204bf1"}, + {file = "PyMuPDF-1.24.10-cp38-none-macosx_10_9_x86_64.whl", hash = "sha256:738baea1e9f9f1df1da6ae9f5699fc96dcad58043c34ddecb69d3840c396dfb5"}, + {file = "PyMuPDF-1.24.10-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:bbd618078c2caab742d34291bd4a4c660981bef3988dfaaadf5d534081abd8ec"}, + {file = "PyMuPDF-1.24.10-cp38-none-manylinux2014_x86_64.whl", hash = "sha256:d31c09e2af29f778249fe93beb4e44657ed2c0f385ccd5906e84b2b6a747ba9a"}, + {file = "PyMuPDF-1.24.10-cp38-none-musllinux_1_2_x86_64.whl", hash = "sha256:dda94921c9daf0416fe77b1366200c970a1b8519f826c30be7148df6a147eb1a"}, + {file = "PyMuPDF-1.24.10-cp38-none-win32.whl", hash = "sha256:8ccb72a67386affe170a36e1a145f0a05daf1df9856130cedf4371015b66acc3"}, + {file = "PyMuPDF-1.24.10-cp38-none-win_amd64.whl", hash = "sha256:634b22c0465dc9e415acea35f98c189391c6685ed943d489c069af7a72d1f8da"}, + {file = "PyMuPDF-1.24.10-cp39-none-macosx_10_9_x86_64.whl", hash = "sha256:903b61b70952368d262603abc4b0cbe06a557872003f92bb8db4b6e608ff0817"}, + {file = "PyMuPDF-1.24.10-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:c3405e120249e2849daac4d52e25433bb0b5da4695ce8edea6cfb9cc819985c4"}, + {file = "PyMuPDF-1.24.10-cp39-none-manylinux2014_x86_64.whl", hash = "sha256:2e95f5c379dc086eaa5a2af73f2e62a0bae95e05b19963603cdf800ca51e65a6"}, + {file = "PyMuPDF-1.24.10-cp39-none-musllinux_1_2_x86_64.whl", hash = "sha256:e6a6458a3a6e7ec371176be17535825ca860d8eab5f8999958b2ac639777df5f"}, + {file = "PyMuPDF-1.24.10-cp39-none-win32.whl", hash = "sha256:c8db06b0d5fbcf9b0e8949415771bd4771c820f43b052b42bce2db44c100e384"}, + {file = "PyMuPDF-1.24.10-cp39-none-win_amd64.whl", hash = "sha256:be120588a1d41220a60d899c43600e537d1bdb73612e0230378d14df40498658"}, + {file = "PyMuPDF-1.24.10.tar.gz", hash = "sha256:bd3ebd6d3fb8a845582098362f885bfb0a31ae4272587efc2c55c5e29fe7327a"}, ] [package.dependencies] -PyMuPDFb = "1.24.9" +PyMuPDFb = "1.24.10" [[package]] name = "pymupdfb" -version = "1.24.9" +version = "1.24.10" description = "MuPDF shared libraries for PyMuPDF." optional = false python-versions = ">=3.8" files = [ - {file = "PyMuPDFb-1.24.9-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:3c9e694b1fb1bde37a8d3c953fbd0916e7dee8a4650142547d4f832105b17689"}, - {file = "PyMuPDFb-1.24.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:3fd74ee7969712ab457495465da0a61aab44d8cf9b71b9ef51910a8c6a90ad57"}, - {file = "PyMuPDFb-1.24.9-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb5b38f588963a239a8c0bca99d3d912f0c04674e3c6e7199e44cebd22840061"}, - {file = "PyMuPDFb-1.24.9-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:198f6b3713b6f980fa96c1099be0d5459c7d43c593299948f0ba528577e6bf46"}, - {file = "PyMuPDFb-1.24.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:ae044ebc8299f5a3ba822a6dfe97285dffd6c66cba194bc39180aa189a2755c9"}, - {file = "PyMuPDFb-1.24.9-py3-none-win32.whl", hash = "sha256:20ea17fd5799dcf7813ec099c0ce303f763e6e4ba8d0f54d5f84e4df90c3a340"}, - {file = "PyMuPDFb-1.24.9-py3-none-win_amd64.whl", hash = "sha256:c6b8adc0b9c91ff0f657440a816ad2130429a808cd53ff273f3e72532e526bdc"}, - {file = "PyMuPDFb-1.24.9.tar.gz", hash = "sha256:5505f07b3dded6e791ab7d10d01f0687e913fc75edd23fdf2825a582b6651558"}, + {file = "PyMuPDFb-1.24.10-py3-none-macosx_10_9_x86_64.whl", hash = "sha256:cd6b24630d90dce9ab3e59d06c5e616686f8d7ec626be1311721fcb062aa0078"}, + {file = "PyMuPDFb-1.24.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fda2c34b206f724b1b5685b67188e2a57bcaa5c99bc40a0a5bc62057514c5cdf"}, + {file = "PyMuPDFb-1.24.10-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:409f1270ef2e70d845e80149ff3db9cfed578274042316cba55cc3e3882421ea"}, + {file = "PyMuPDFb-1.24.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:aca96b6e9ee3096a26810592f4d899f4d3cf3cf0c902ae7e8cca09bce4d946c4"}, + {file = "PyMuPDFb-1.24.10-py3-none-win32.whl", hash = "sha256:2d231b42fe3bf79837df235e7fbdf7ff8b46bf4ca1346d0f0124fb1cdd343ce8"}, + {file = "PyMuPDFb-1.24.10-py3-none-win_amd64.whl", hash = "sha256:27ea65c701608b6b7632703339ca33ea6d513843b26dbe9bdefb2f56f7b9b196"}, + {file = "PyMuPDFb-1.24.10.tar.gz", hash = "sha256:007b91fa9b528c5c0eecea2e49c486ac02e878274f9e31522bdd948adc5f8327"}, ] [[package]] @@ -777,92 +1540,110 @@ files = [ [[package]] name = "regex" -version = "2024.5.15" +version = "2024.7.24" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" files = [ - {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a81e3cfbae20378d75185171587cbf756015ccb14840702944f014e0d93ea09f"}, - {file = "regex-2024.5.15-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b59138b219ffa8979013be7bc85bb60c6f7b7575df3d56dc1e403a438c7a3f6"}, - {file = "regex-2024.5.15-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a0bd000c6e266927cb7a1bc39d55be95c4b4f65c5be53e659537537e019232b1"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eaa7ddaf517aa095fa8da0b5015c44d03da83f5bd49c87961e3c997daed0de7"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ba68168daedb2c0bab7fd7e00ced5ba90aebf91024dea3c88ad5063c2a562cca"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e8d717bca3a6e2064fc3a08df5cbe366369f4b052dcd21b7416e6d71620dca1"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1337b7dbef9b2f71121cdbf1e97e40de33ff114801263b275aafd75303bd62b5"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f9ebd0a36102fcad2f03696e8af4ae682793a5d30b46c647eaf280d6cfb32796"}, - {file = "regex-2024.5.15-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:9efa1a32ad3a3ea112224897cdaeb6aa00381627f567179c0314f7b65d354c62"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1595f2d10dff3d805e054ebdc41c124753631b6a471b976963c7b28543cf13b0"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b802512f3e1f480f41ab5f2cfc0e2f761f08a1f41092d6718868082fc0d27143"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:a0981022dccabca811e8171f913de05720590c915b033b7e601f35ce4ea7019f"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:19068a6a79cf99a19ccefa44610491e9ca02c2be3305c7760d3831d38a467a6f"}, - {file = "regex-2024.5.15-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1b5269484f6126eee5e687785e83c6b60aad7663dafe842b34691157e5083e53"}, - {file = "regex-2024.5.15-cp310-cp310-win32.whl", hash = "sha256:ada150c5adfa8fbcbf321c30c751dc67d2f12f15bd183ffe4ec7cde351d945b3"}, - {file = "regex-2024.5.15-cp310-cp310-win_amd64.whl", hash = "sha256:ac394ff680fc46b97487941f5e6ae49a9f30ea41c6c6804832063f14b2a5a145"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f5b1dff3ad008dccf18e652283f5e5339d70bf8ba7c98bf848ac33db10f7bc7a"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c6a2b494a76983df8e3d3feea9b9ffdd558b247e60b92f877f93a1ff43d26656"}, - {file = "regex-2024.5.15-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a32b96f15c8ab2e7d27655969a23895eb799de3665fa94349f3b2fbfd547236f"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:10002e86e6068d9e1c91eae8295ef690f02f913c57db120b58fdd35a6bb1af35"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec54d5afa89c19c6dd8541a133be51ee1017a38b412b1321ccb8d6ddbeb4cf7d"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:10e4ce0dca9ae7a66e6089bb29355d4432caed736acae36fef0fdd7879f0b0cb"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e507ff1e74373c4d3038195fdd2af30d297b4f0950eeda6f515ae3d84a1770f"}, - {file = "regex-2024.5.15-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d1f059a4d795e646e1c37665b9d06062c62d0e8cc3c511fe01315973a6542e40"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:0721931ad5fe0dda45d07f9820b90b2148ccdd8e45bb9e9b42a146cb4f695649"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:833616ddc75ad595dee848ad984d067f2f31be645d603e4d158bba656bbf516c"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:287eb7f54fc81546346207c533ad3c2c51a8d61075127d7f6d79aaf96cdee890"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:19dfb1c504781a136a80ecd1fff9f16dddf5bb43cec6871778c8a907a085bb3d"}, - {file = "regex-2024.5.15-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:119af6e56dce35e8dfb5222573b50c89e5508d94d55713c75126b753f834de68"}, - {file = "regex-2024.5.15-cp311-cp311-win32.whl", hash = "sha256:1c1c174d6ec38d6c8a7504087358ce9213d4332f6293a94fbf5249992ba54efa"}, - {file = "regex-2024.5.15-cp311-cp311-win_amd64.whl", hash = "sha256:9e717956dcfd656f5055cc70996ee2cc82ac5149517fc8e1b60261b907740201"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:632b01153e5248c134007209b5c6348a544ce96c46005d8456de1d552455b014"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e64198f6b856d48192bf921421fdd8ad8eb35e179086e99e99f711957ffedd6e"}, - {file = "regex-2024.5.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:68811ab14087b2f6e0fc0c2bae9ad689ea3584cad6917fc57be6a48bbd012c49"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8ec0c2fea1e886a19c3bee0cd19d862b3aa75dcdfb42ebe8ed30708df64687a"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d0c0c0003c10f54a591d220997dd27d953cd9ccc1a7294b40a4be5312be8797b"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2431b9e263af1953c55abbd3e2efca67ca80a3de8a0437cb58e2421f8184717a"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a605586358893b483976cffc1723fb0f83e526e8f14c6e6614e75919d9862cf"}, - {file = "regex-2024.5.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:391d7f7f1e409d192dba8bcd42d3e4cf9e598f3979cdaed6ab11288da88cb9f2"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9ff11639a8d98969c863d4617595eb5425fd12f7c5ef6621a4b74b71ed8726d5"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4eee78a04e6c67e8391edd4dad3279828dd66ac4b79570ec998e2155d2e59fd5"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:8fe45aa3f4aa57faabbc9cb46a93363edd6197cbc43523daea044e9ff2fea83e"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:d0a3d8d6acf0c78a1fff0e210d224b821081330b8524e3e2bc5a68ef6ab5803d"}, - {file = "regex-2024.5.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c486b4106066d502495b3025a0a7251bf37ea9540433940a23419461ab9f2a80"}, - {file = "regex-2024.5.15-cp312-cp312-win32.whl", hash = "sha256:c49e15eac7c149f3670b3e27f1f28a2c1ddeccd3a2812cba953e01be2ab9b5fe"}, - {file = "regex-2024.5.15-cp312-cp312-win_amd64.whl", hash = "sha256:673b5a6da4557b975c6c90198588181029c60793835ce02f497ea817ff647cb2"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:87e2a9c29e672fc65523fb47a90d429b70ef72b901b4e4b1bd42387caf0d6835"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:c3bea0ba8b73b71b37ac833a7f3fd53825924165da6a924aec78c13032f20850"}, - {file = "regex-2024.5.15-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:bfc4f82cabe54f1e7f206fd3d30fda143f84a63fe7d64a81558d6e5f2e5aaba9"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5bb9425fe881d578aeca0b2b4b3d314ec88738706f66f219c194d67179337cb"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:64c65783e96e563103d641760664125e91bd85d8e49566ee560ded4da0d3e704"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cf2430df4148b08fb4324b848672514b1385ae3807651f3567871f130a728cc3"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5397de3219a8b08ae9540c48f602996aa6b0b65d5a61683e233af8605c42b0f2"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:455705d34b4154a80ead722f4f185b04c4237e8e8e33f265cd0798d0e44825fa"}, - {file = "regex-2024.5.15-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b2b6f1b3bb6f640c1a92be3bbfbcb18657b125b99ecf141fb3310b5282c7d4ed"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:3ad070b823ca5890cab606c940522d05d3d22395d432f4aaaf9d5b1653e47ced"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:5b5467acbfc153847d5adb21e21e29847bcb5870e65c94c9206d20eb4e99a384"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:e6662686aeb633ad65be2a42b4cb00178b3fbf7b91878f9446075c404ada552f"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:2b4c884767504c0e2401babe8b5b7aea9148680d2e157fa28f01529d1f7fcf67"}, - {file = "regex-2024.5.15-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:3cd7874d57f13bf70078f1ff02b8b0aa48d5b9ed25fc48547516c6aba36f5741"}, - {file = "regex-2024.5.15-cp38-cp38-win32.whl", hash = "sha256:e4682f5ba31f475d58884045c1a97a860a007d44938c4c0895f41d64481edbc9"}, - {file = "regex-2024.5.15-cp38-cp38-win_amd64.whl", hash = "sha256:d99ceffa25ac45d150e30bd9ed14ec6039f2aad0ffa6bb87a5936f5782fc1569"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:13cdaf31bed30a1e1c2453ef6015aa0983e1366fad2667657dbcac7b02f67133"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:cac27dcaa821ca271855a32188aa61d12decb6fe45ffe3e722401fe61e323cd1"}, - {file = "regex-2024.5.15-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7dbe2467273b875ea2de38ded4eba86cbcbc9a1a6d0aa11dcf7bd2e67859c435"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f18a9a3513a99c4bef0e3efd4c4a5b11228b48aa80743be822b71e132ae4f5"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d347a741ea871c2e278fde6c48f85136c96b8659b632fb57a7d1ce1872547600"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1878b8301ed011704aea4c806a3cadbd76f84dece1ec09cc9e4dc934cfa5d4da"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4babf07ad476aaf7830d77000874d7611704a7fcf68c9c2ad151f5d94ae4bfc4"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:35cb514e137cb3488bce23352af3e12fb0dbedd1ee6e60da053c69fb1b29cc6c"}, - {file = "regex-2024.5.15-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:cdd09d47c0b2efee9378679f8510ee6955d329424c659ab3c5e3a6edea696294"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:72d7a99cd6b8f958e85fc6ca5b37c4303294954eac1376535b03c2a43eb72629"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:a094801d379ab20c2135529948cb84d417a2169b9bdceda2a36f5f10977ebc16"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:c0c18345010870e58238790a6779a1219b4d97bd2e77e1140e8ee5d14df071aa"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:16093f563098448ff6b1fa68170e4acbef94e6b6a4e25e10eae8598bb1694b5d"}, - {file = "regex-2024.5.15-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e38a7d4e8f633a33b4c7350fbd8bad3b70bf81439ac67ac38916c4a86b465456"}, - {file = "regex-2024.5.15-cp39-cp39-win32.whl", hash = "sha256:71a455a3c584a88f654b64feccc1e25876066c4f5ef26cd6dd711308aa538694"}, - {file = "regex-2024.5.15-cp39-cp39-win_amd64.whl", hash = "sha256:cab12877a9bdafde5500206d1020a584355a97884dfd388af3699e9137bf7388"}, - {file = "regex-2024.5.15.tar.gz", hash = "sha256:d3ee02d9e5f482cc8309134a91eeaacbdd2261ba111b0fef3748eeb4913e6a2c"}, + {file = "regex-2024.7.24-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b0d3f567fafa0633aee87f08b9276c7062da9616931382993c03808bb68ce"}, + {file = "regex-2024.7.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3426de3b91d1bc73249042742f45c2148803c111d1175b283270177fdf669024"}, + {file = "regex-2024.7.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f273674b445bcb6e4409bf8d1be67bc4b58e8b46fd0d560055d515b8830063cd"}, + {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23acc72f0f4e1a9e6e9843d6328177ae3074b4182167e34119ec7233dfeccf53"}, + {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65fd3d2e228cae024c411c5ccdffae4c315271eee4a8b839291f84f796b34eca"}, + {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c414cbda77dbf13c3bc88b073a1a9f375c7b0cb5e115e15d4b73ec3a2fbc6f59"}, + {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf7a89eef64b5455835f5ed30254ec19bf41f7541cd94f266ab7cbd463f00c41"}, + {file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19c65b00d42804e3fbea9708f0937d157e53429a39b7c61253ff15670ff62cb5"}, + {file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7a5486ca56c8869070a966321d5ab416ff0f83f30e0e2da1ab48815c8d165d46"}, + {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6f51f9556785e5a203713f5efd9c085b4a45aecd2a42573e2b5041881b588d1f"}, + {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a4997716674d36a82eab3e86f8fa77080a5d8d96a389a61ea1d0e3a94a582cf7"}, + {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c0abb5e4e8ce71a61d9446040c1e86d4e6d23f9097275c5bd49ed978755ff0fe"}, + {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:18300a1d78cf1290fa583cd8b7cde26ecb73e9f5916690cf9d42de569c89b1ce"}, + {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:416c0e4f56308f34cdb18c3f59849479dde5b19febdcd6e6fa4d04b6c31c9faa"}, + {file = "regex-2024.7.24-cp310-cp310-win32.whl", hash = "sha256:fb168b5924bef397b5ba13aabd8cf5df7d3d93f10218d7b925e360d436863f66"}, + {file = "regex-2024.7.24-cp310-cp310-win_amd64.whl", hash = "sha256:6b9fc7e9cc983e75e2518496ba1afc524227c163e43d706688a6bb9eca41617e"}, + {file = "regex-2024.7.24-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:382281306e3adaaa7b8b9ebbb3ffb43358a7bbf585fa93821300a418bb975281"}, + {file = "regex-2024.7.24-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4fdd1384619f406ad9037fe6b6eaa3de2749e2e12084abc80169e8e075377d3b"}, + {file = "regex-2024.7.24-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3d974d24edb231446f708c455fd08f94c41c1ff4f04bcf06e5f36df5ef50b95a"}, + {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2ec4419a3fe6cf8a4795752596dfe0adb4aea40d3683a132bae9c30b81e8d73"}, + {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb563dd3aea54c797adf513eeec819c4213d7dbfc311874eb4fd28d10f2ff0f2"}, + {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:45104baae8b9f67569f0f1dca5e1f1ed77a54ae1cd8b0b07aba89272710db61e"}, + {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:994448ee01864501912abf2bad9203bffc34158e80fe8bfb5b031f4f8e16da51"}, + {file = "regex-2024.7.24-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3fac296f99283ac232d8125be932c5cd7644084a30748fda013028c815ba3364"}, + {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7e37e809b9303ec3a179085415cb5f418ecf65ec98cdfe34f6a078b46ef823ee"}, + {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:01b689e887f612610c869421241e075c02f2e3d1ae93a037cb14f88ab6a8934c"}, + {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f6442f0f0ff81775eaa5b05af8a0ffa1dda36e9cf6ec1e0d3d245e8564b684ce"}, + {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:871e3ab2838fbcb4e0865a6e01233975df3a15e6fce93b6f99d75cacbd9862d1"}, + {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c918b7a1e26b4ab40409820ddccc5d49871a82329640f5005f73572d5eaa9b5e"}, + {file = "regex-2024.7.24-cp311-cp311-win32.whl", hash = "sha256:2dfbb8baf8ba2c2b9aa2807f44ed272f0913eeeba002478c4577b8d29cde215c"}, + {file = "regex-2024.7.24-cp311-cp311-win_amd64.whl", hash = "sha256:538d30cd96ed7d1416d3956f94d54e426a8daf7c14527f6e0d6d425fcb4cca52"}, + {file = "regex-2024.7.24-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fe4ebef608553aff8deb845c7f4f1d0740ff76fa672c011cc0bacb2a00fbde86"}, + {file = "regex-2024.7.24-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:74007a5b25b7a678459f06559504f1eec2f0f17bca218c9d56f6a0a12bfffdad"}, + {file = "regex-2024.7.24-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7df9ea48641da022c2a3c9c641650cd09f0cd15e8908bf931ad538f5ca7919c9"}, + {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a1141a1dcc32904c47f6846b040275c6e5de0bf73f17d7a409035d55b76f289"}, + {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80c811cfcb5c331237d9bad3bea2c391114588cf4131707e84d9493064d267f9"}, + {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7214477bf9bd195894cf24005b1e7b496f46833337b5dedb7b2a6e33f66d962c"}, + {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d55588cba7553f0b6ec33130bc3e114b355570b45785cebdc9daed8c637dd440"}, + {file = "regex-2024.7.24-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:558a57cfc32adcf19d3f791f62b5ff564922942e389e3cfdb538a23d65a6b610"}, + {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a512eed9dfd4117110b1881ba9a59b31433caed0c4101b361f768e7bcbaf93c5"}, + {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:86b17ba823ea76256b1885652e3a141a99a5c4422f4a869189db328321b73799"}, + {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5eefee9bfe23f6df09ffb6dfb23809f4d74a78acef004aa904dc7c88b9944b05"}, + {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:731fcd76bbdbf225e2eb85b7c38da9633ad3073822f5ab32379381e8c3c12e94"}, + {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eaef80eac3b4cfbdd6de53c6e108b4c534c21ae055d1dbea2de6b3b8ff3def38"}, + {file = "regex-2024.7.24-cp312-cp312-win32.whl", hash = "sha256:185e029368d6f89f36e526764cf12bf8d6f0e3a2a7737da625a76f594bdfcbfc"}, + {file = "regex-2024.7.24-cp312-cp312-win_amd64.whl", hash = "sha256:2f1baff13cc2521bea83ab2528e7a80cbe0ebb2c6f0bfad15be7da3aed443908"}, + {file = "regex-2024.7.24-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:66b4c0731a5c81921e938dcf1a88e978264e26e6ac4ec96a4d21ae0354581ae0"}, + {file = "regex-2024.7.24-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:88ecc3afd7e776967fa16c80f974cb79399ee8dc6c96423321d6f7d4b881c92b"}, + {file = "regex-2024.7.24-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64bd50cf16bcc54b274e20235bf8edbb64184a30e1e53873ff8d444e7ac656b2"}, + {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb462f0e346fcf41a901a126b50f8781e9a474d3927930f3490f38a6e73b6950"}, + {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a82465ebbc9b1c5c50738536fdfa7cab639a261a99b469c9d4c7dcbb2b3f1e57"}, + {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:68a8f8c046c6466ac61a36b65bb2395c74451df2ffb8458492ef49900efed293"}, + {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac8e84fff5d27420f3c1e879ce9929108e873667ec87e0c8eeb413a5311adfe"}, + {file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba2537ef2163db9e6ccdbeb6f6424282ae4dea43177402152c67ef869cf3978b"}, + {file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:43affe33137fcd679bdae93fb25924979517e011f9dea99163f80b82eadc7e53"}, + {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c9bb87fdf2ab2370f21e4d5636e5317775e5d51ff32ebff2cf389f71b9b13750"}, + {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:945352286a541406f99b2655c973852da7911b3f4264e010218bbc1cc73168f2"}, + {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:8bc593dcce679206b60a538c302d03c29b18e3d862609317cb560e18b66d10cf"}, + {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3f3b6ca8eae6d6c75a6cff525c8530c60e909a71a15e1b731723233331de4169"}, + {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c51edc3541e11fbe83f0c4d9412ef6c79f664a3745fab261457e84465ec9d5a8"}, + {file = "regex-2024.7.24-cp38-cp38-win32.whl", hash = "sha256:d0a07763776188b4db4c9c7fb1b8c494049f84659bb387b71c73bbc07f189e96"}, + {file = "regex-2024.7.24-cp38-cp38-win_amd64.whl", hash = "sha256:8fd5afd101dcf86a270d254364e0e8dddedebe6bd1ab9d5f732f274fa00499a5"}, + {file = "regex-2024.7.24-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0ffe3f9d430cd37d8fa5632ff6fb36d5b24818c5c986893063b4e5bdb84cdf24"}, + {file = "regex-2024.7.24-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:25419b70ba00a16abc90ee5fce061228206173231f004437730b67ac77323f0d"}, + {file = "regex-2024.7.24-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33e2614a7ce627f0cdf2ad104797d1f68342d967de3695678c0cb84f530709f8"}, + {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d33a0021893ede5969876052796165bab6006559ab845fd7b515a30abdd990dc"}, + {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04ce29e2c5fedf296b1a1b0acc1724ba93a36fb14031f3abfb7abda2806c1535"}, + {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b16582783f44fbca6fcf46f61347340c787d7530d88b4d590a397a47583f31dd"}, + {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:836d3cc225b3e8a943d0b02633fb2f28a66e281290302a79df0e1eaa984ff7c1"}, + {file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:438d9f0f4bc64e8dea78274caa5af971ceff0f8771e1a2333620969936ba10be"}, + {file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:973335b1624859cb0e52f96062a28aa18f3a5fc77a96e4a3d6d76e29811a0e6e"}, + {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c5e69fd3eb0b409432b537fe3c6f44ac089c458ab6b78dcec14478422879ec5f"}, + {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fbf8c2f00904eaf63ff37718eb13acf8e178cb940520e47b2f05027f5bb34ce3"}, + {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2757ace61bc4061b69af19e4689fa4416e1a04840f33b441034202b5cd02d4"}, + {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:44fc61b99035fd9b3b9453f1713234e5a7c92a04f3577252b45feefe1b327759"}, + {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:84c312cdf839e8b579f504afcd7b65f35d60b6285d892b19adea16355e8343c9"}, + {file = "regex-2024.7.24-cp39-cp39-win32.whl", hash = "sha256:ca5b2028c2f7af4e13fb9fc29b28d0ce767c38c7facdf64f6c2cd040413055f1"}, + {file = "regex-2024.7.24-cp39-cp39-win_amd64.whl", hash = "sha256:7c479f5ae937ec9985ecaf42e2e10631551d909f203e31308c12d703922742f9"}, + {file = "regex-2024.7.24.tar.gz", hash = "sha256:9cfd009eed1a46b27c14039ad5bbc5e71b6367c5b2e6d5f5da0ea91600817506"}, +] + +[[package]] +name = "rich" +version = "13.8.0" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.8.0-py3-none-any.whl", hash = "sha256:2e85306a063b9492dffc86278197a60cbece75bcb766022f3436f567cae11bdc"}, + {file = "rich-13.8.0.tar.gz", hash = "sha256:a5ac1f1cd448ade0d59cc3356f7db7a7ccda2c8cbae9c7a90c28ff463d3e91f4"}, ] +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "ruff" version = "0.6.3" @@ -961,13 +1742,13 @@ files = [ [[package]] name = "tomlkit" -version = "0.12.5" +version = "0.13.2" description = "Style preserving TOML library" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "tomlkit-0.12.5-py3-none-any.whl", hash = "sha256:af914f5a9c59ed9d0762c7b64d3b5d5df007448eb9cd2edc8a46b1eafead172f"}, - {file = "tomlkit-0.12.5.tar.gz", hash = "sha256:eef34fba39834d4d6b73c9ba7f3e4d1c417a4e56f89a7e96e090dd0d24b8fb3c"}, + {file = "tomlkit-0.13.2-py3-none-any.whl", hash = "sha256:7a974427f6e119197f670fbbbeae7bef749a6c14e793db934baefc1b5f03efde"}, + {file = "tomlkit-0.13.2.tar.gz", hash = "sha256:fff5fe59a87295b278abd31bec92c15d9bc4a06885ab12bcea52c71119392e79"}, ] [[package]] @@ -1062,7 +1843,86 @@ tzdata = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3)", "zest.releaser"] +[[package]] +name = "wrapt" +version = "1.16.0" +description = "Module for decorators, wrappers and monkey patching." +optional = false +python-versions = ">=3.6" +files = [ + {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, + {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, + {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, + {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, + {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, + {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, + {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, + {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, + {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, + {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, + {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, + {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, + {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, + {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, + {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, + {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, + {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, + {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, + {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, + {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, + {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, + {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, + {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, + {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, + {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, + {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, + {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, + {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, + {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, + {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, + {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, + {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, + {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, + {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, + {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, + {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, + {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, +] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bfbd56477db4d64ce91da34497f339eeebff48625ef08ba69aff68b95182c119" +content-hash = "bbc56e5218316ab7ff7fb98e940720bdd61fd61ced05f8eca70062637cb0d823" diff --git a/pyproject.toml b/pyproject.toml index 2dd691d6..6e85b678 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ tabulate = "^0.9.0" pydantic = "^2.5.2" dateparser = "^1.2.0" strenum = "^0.4.15" +ocrmypdf = "^16.5.0" [tool.poetry.group.dev.dependencies] diff --git a/src/monopoly/banks/hsbc/hsbc.py b/src/monopoly/banks/hsbc/hsbc.py index 4fd3d08b..47d4fcf4 100644 --- a/src/monopoly/banks/hsbc/hsbc.py +++ b/src/monopoly/banks/hsbc/hsbc.py @@ -26,9 +26,7 @@ class Hsbc(BankBase): multiline_transactions=True, ) - pdf_config = PdfConfig( - page_bbox=(0, 0, 379, 842), - ) + pdf_config = PdfConfig(page_bbox=(0, 0, 379, 840), apply_ocr=True) identifiers = [ [ @@ -39,6 +37,11 @@ class Hsbc(BankBase): ), TextIdentifier("HSBC"), ], + [ + MetadataIdentifier( + format="PDF 1.7", producer="OpenText Output Transformation Engine" + ) + ], ] statement_configs = [credit_config] diff --git a/src/monopoly/config.py b/src/monopoly/config.py index ebe571d3..bfdc6a2a 100644 --- a/src/monopoly/config.py +++ b/src/monopoly/config.py @@ -67,7 +67,10 @@ class PdfConfig: - `page_bbox`: A tuple representing the bounding box range for every page. This is used to avoid weirdness like vertical text, and other PDF artifacts that may affect parsing. + - `apply_ocr`: Whether to attempt to apply OCR on the PDF. If the PDF already + has OCR, the original OCR text will be retained. """ page_range: tuple[Optional[int], Optional[int]] = (None, None) page_bbox: Optional[tuple[float, float, float, float]] = None + apply_ocr: bool = False diff --git a/src/monopoly/constants/date.py b/src/monopoly/constants/date.py index 8b62bc82..43346f60 100644 --- a/src/monopoly/constants/date.py +++ b/src/monopoly/constants/date.py @@ -28,6 +28,7 @@ class ISO8601(RegexEnum): DD_MM = rf"\b({DateFormats.DD}[\/\-\s]{DateFormats.MM})" DD_MM_YY = rf"\b({DateFormats.DD}[\/\-\s]{DateFormats.MM}[\/\-\s]{DateFormats.YY})" DD_MMM = rf"\b({DateFormats.DD}[-\s]{DateFormats.MMM})" + DD_MMM_RELAXED = DD_MMM.replace(r"[-\s]", r"(?:[-\s]|)") DD_MMM_YY = rf"\b({DateFormats.DD}[-\s]{DateFormats.MMM}[-\s]{DateFormats.YY})" DD_MMM_YYYY = ( rf"\b({DateFormats.DD}[-\s]{DateFormats.MMM}[,\s]{{1,2}}{DateFormats.YYYY})" diff --git a/src/monopoly/constants/statement.py b/src/monopoly/constants/statement.py index e48e3ee0..a9324af7 100644 --- a/src/monopoly/constants/statement.py +++ b/src/monopoly/constants/statement.py @@ -103,8 +103,8 @@ class CreditTransactionPatterns(RegexEnum): + SharedPatterns.AMOUNT_EXTENDED ) HSBC = ( - rf"(?P{ISO8601.DD_MMM})\s+" - rf"(?P{ISO8601.DD_MMM})\s+" + rf"(?P{ISO8601.DD_MMM_RELAXED})\s+" + rf"(?P{ISO8601.DD_MMM_RELAXED})\s+" + SharedPatterns.DESCRIPTION + SharedPatterns.AMOUNT_EXTENDED ) diff --git a/src/monopoly/identifiers.py b/src/monopoly/identifiers.py index face2821..258ea758 100644 --- a/src/monopoly/identifiers.py +++ b/src/monopoly/identifiers.py @@ -10,6 +10,7 @@ class Identifier: class MetadataIdentifier(Identifier): """Stores the metadata attributes of a PDF""" + format: str = "" title: str = "" author: str = "" subject: str = "" diff --git a/src/monopoly/log.py b/src/monopoly/log.py index 78dbe3cf..4c0a9926 100644 --- a/src/monopoly/log.py +++ b/src/monopoly/log.py @@ -17,6 +17,7 @@ def get_logger() -> logging.Logger: logging.getLogger("pdf2john").setLevel(logging.ERROR) logging.getLogger("pyhanko").setLevel(logging.ERROR) logging.getLogger("tzlocal").setLevel(logging.ERROR) + logging.getLogger("pikepdf").setLevel(logging.ERROR) return logger diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 452cf142..bca29009 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -7,6 +7,8 @@ import fitz import pdftotext +from ocrmypdf import Verbosity, configure_logging, ocr +from ocrmypdf.exceptions import PriorOcrFoundError, TaggedPDFError from pydantic import SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict @@ -66,7 +68,7 @@ def __init__( self, passwords: Optional[list[SecretStr]] = None, file_path: Optional[Path] = None, - file_bytes: Optional[bytes] = None, + file_bytes: Optional[bytes | BytesIO] = None, ): self.passwords = passwords or PdfPasswords().pdf_passwords self.file_path = file_path @@ -134,6 +136,10 @@ def page_range(self): def page_bbox(self): return self.pdf_config.page_bbox + @cached_property + def apply_ocr(self): + return self.pdf_config.apply_ocr + @lru_cache def get_pages(self) -> list[PdfPage]: logger.debug("Extracting text from PDF") @@ -151,7 +157,10 @@ def get_pages(self) -> list[PdfPage]: page.set_cropbox(cropbox) page = self._remove_vertical_text(page) - # certain statements require garbage collection, so that duplicate objects + if self.apply_ocr: + document = self._apply_ocr(document) + + # certain statements requsire garbage collection, so that duplicate objects # do not cause pdftotext to fail due to missing xrefs/null values # however, setting `garbage=2` may cause issues with other statements # so an initial attempt should be made to run using `garbage=0` @@ -197,3 +206,32 @@ def _remove_vertical_text(page: fitz.Page): page.add_redact_annot(line["bbox"]) page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE) return page + + @staticmethod + def _apply_ocr(document: PdfDocument) -> PdfDocument: + added_ocr = False + try: + logger.debug("Applying OCR") + original_metadata = document.metadata + output_bytes = BytesIO() + configure_logging(Verbosity.quiet) + logging.getLogger("ocrmypdf").setLevel(logging.ERROR) + ocr( + BytesIO(document.tobytes()), + output_bytes, + language="eng", + tesseract_config="tesseract.cfg", + progress_bar=False, + ) + output_bytes.seek(0) + added_ocr = True + + except (PriorOcrFoundError, TaggedPDFError): + pass + + # pylint: disable=attribute-defined-outside-init + if added_ocr: + logger.debug("Adding OCR layer to document") + document = PdfDocument(file_bytes=output_bytes) + document.metadata = original_metadata + return document diff --git a/tesseract.cfg b/tesseract.cfg new file mode 100644 index 00000000..54f41e1f --- /dev/null +++ b/tesseract.cfg @@ -0,0 +1,2 @@ +tessedit_char_whitelist "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-&@$*,. " +preserve_interword_spaces 1 From dceffdd6e1e147a154c3b33b2c49070fb8ef650d Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 00:41:56 +0800 Subject: [PATCH 05/27] chore(constants): remove case insensitive modifier from formats with no words --- src/monopoly/constants/date.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/monopoly/constants/date.py b/src/monopoly/constants/date.py index 43346f60..7c266d8f 100644 --- a/src/monopoly/constants/date.py +++ b/src/monopoly/constants/date.py @@ -14,14 +14,14 @@ class DateFormats(StrEnum): """Holds a case-insensitive list of common ISO 8601 date formats""" - D = r"(?i:1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)" - DD = r"(?i:01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)" - M = r"(?i:1|2|3|4|5|6|7|8|9|10|11|12)" - MM = r"(?i:01|02|03|04|05|06|07|08|09|10|11|12)" + D = r"(1|2|3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)" + DD = r"(01|02|03|04|05|06|07|08|09|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31)" + M = r"(1|2|3|4|5|6|7|8|9|10|11|12)" + MM = r"(01|02|03|04|05|06|07|08|09|10|11|12)" MMM = r"(?i:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)" MMMM = r"(?i:January|February|March|April|May|June|July|August|September|October|November|December)" - YY = r"(?i:[2-5][0-9]\b)" - YYYY = r"(?i:20\d{2}\b)" + YY = r"([2-5][0-9]\b)" + YYYY = r"(20\d{2}\b)" class ISO8601(RegexEnum): From 46a8020672d5ea82c8a98cf66e0b23e91e05bef8 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 00:45:11 +0800 Subject: [PATCH 06/27] refactor(pdf): use file_path as first arg to PdfDocument --- src/monopoly/pdf.py | 4 ++-- src/monopoly/pipeline.py | 2 +- tests/integration/test_parser.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index bca29009..b3025d8f 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -66,13 +66,13 @@ class PdfDocument(fitz.Document): def __init__( self, - passwords: Optional[list[SecretStr]] = None, file_path: Optional[Path] = None, file_bytes: Optional[bytes | BytesIO] = None, + passwords: Optional[list[SecretStr]] = None, ): - self.passwords = passwords or PdfPasswords().pdf_passwords self.file_path = file_path self.file_bytes = file_bytes + self.passwords = passwords or PdfPasswords().pdf_passwords args = {"filename": self.file_path, "stream": self.file_bytes} super().__init__(**args) diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index 36629aa6..d49fdfde 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -42,7 +42,7 @@ def __init__( "Only one of `file_path` or `file_bytes` should be passed" ) - document = PdfDocument(passwords, file_bytes=file_bytes, file_path=file_path) + document = PdfDocument(file_path, file_bytes, passwords) bank = bank or self.detect_bank(document) parser = PdfParser(bank, document) self.handler = self.create_handler(bank, parser) diff --git a/tests/integration/test_parser.py b/tests/integration/test_parser.py index 23a50cb6..d563c142 100644 --- a/tests/integration/test_parser.py +++ b/tests/integration/test_parser.py @@ -24,7 +24,7 @@ def test_get_pages_with_no_text(parser: PdfParser): def test_get_pages_invalid_returns_error(parser: PdfParser): - pdf_document = PdfDocument(fixture_directory / "4_pages_blank.pdf") + pdf_document = PdfDocument(file_path=fixture_directory / "4_pages_blank.pdf") parser.document = pdf_document parser.page_range = slice(99, -99) From 18b0b0850cf4c6eb20c0abc3dbbbe1fdf9f81f0d Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 00:51:44 +0800 Subject: [PATCH 07/27] build(deps): add ocrmypdf as a system dependency --- .github/workflows/ci.yaml | 2 +- .github/workflows/publish.yaml | 2 +- .github/workflows/tests.yaml | 2 +- README.md | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3e5b5031..f9570d6f 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,7 +15,7 @@ jobs: - name: Install pdftotext uses: daaku/gh-action-apt-install@v4 with: - packages: build-essential libpoppler-cpp-dev pkg-config + packages: build-essential libpoppler-cpp-dev pkg-config ocrmypdf - name: Setup Python & Poetry uses: ./.github/actions/setup-python-poetry diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 4416043d..c2cdcd63 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -23,7 +23,7 @@ jobs: - name: Install pdftotext uses: daaku/gh-action-apt-install@v4 with: - packages: build-essential libpoppler-cpp-dev pkg-config + packages: build-essential libpoppler-cpp-dev pkg-config ocrmypdf - name: Setup Python & Poetry uses: ./.github/actions/setup-python-poetry diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml index eae5b6d9..42b3a232 100644 --- a/.github/workflows/tests.yaml +++ b/.github/workflows/tests.yaml @@ -15,7 +15,7 @@ jobs: - name: Install pdftotext uses: daaku/gh-action-apt-install@v4 with: - packages: build-essential libpoppler-cpp-dev pkg-config + packages: build-essential libpoppler-cpp-dev pkg-config ocrmypdf - name: Setup Python & Poetry uses: ./.github/actions/setup-python-poetry diff --git a/README.md b/README.md index 75df55d0..496ea002 100644 --- a/README.md +++ b/README.md @@ -26,13 +26,13 @@ Monopoly is a pip-installable Python package on [PyPI](https://pypi.org/project/ Since Monopoly uses `pdftotext`, you'll need to install additional dependencies: ```sh -apt-get install build-essential libpoppler-cpp-dev pkg-config +apt-get install build-essential libpoppler-cpp-dev pkg-config ocrmypdf ``` or ```sh -brew install gcc@11 pkg-config poppler +brew install gcc@11 pkg-config poppler ocrmypdf ``` Then install with pipx: From b7274a32e9469ae3d9b115309c5e19567b8934ba Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 21:40:22 +0800 Subject: [PATCH 08/27] chore(pdf): improve ocrmypdf performance --- src/monopoly/pdf.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index b3025d8f..0ad1fa3e 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -222,6 +222,9 @@ def _apply_ocr(document: PdfDocument) -> PdfDocument: language="eng", tesseract_config="tesseract.cfg", progress_bar=False, + optimize=0, + fast_web_view=999999, + output_type="pdf", ) output_bytes.seek(0) added_ocr = True From 472d82381ac6c421e690c9b1fb56813940598f68 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 21:46:39 +0800 Subject: [PATCH 09/27] chore(pipeline): shorten create_handler function signature --- src/monopoly/pipeline.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index d49fdfde..163d8a7a 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -48,9 +48,7 @@ def __init__( self.handler = self.create_handler(bank, parser) @staticmethod - def create_handler( - bank: Type[BankBase], parser: PdfParser - ) -> GenericStatementHandler | StatementHandler: + def create_handler(bank: Type[BankBase], parser: PdfParser) -> StatementHandler: if issubclass(bank, GenericBank): logger.debug("Using generic statement handler") return GenericStatementHandler(parser) From c58d10825f48734cf6264cf2aec5694c1c601049 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 21:51:05 +0800 Subject: [PATCH 10/27] refactor(pipeline): move parser & handler creation logic to extract --- src/monopoly/pipeline.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index 163d8a7a..96ecc8e2 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -42,10 +42,8 @@ def __init__( "Only one of `file_path` or `file_bytes` should be passed" ) - document = PdfDocument(file_path, file_bytes, passwords) - bank = bank or self.detect_bank(document) - parser = PdfParser(bank, document) - self.handler = self.create_handler(bank, parser) + self.document = PdfDocument(file_path, file_bytes, passwords) + self.bank = bank or self.detect_bank(self.document) @staticmethod def create_handler(bank: Type[BankBase], parser: PdfParser) -> StatementHandler: @@ -66,7 +64,9 @@ def detect_bank(document) -> Type[BankBase]: def extract(self, safety_check=True) -> BaseStatement: """Extracts transactions from the statement, and performs a safety check to make sure that total transactions add up""" - statement = self.handler.statement + parser = PdfParser(self.bank, self.document) + handler = self.create_handler(self.bank, parser) + statement = handler.statement transactions = statement.get_transactions() if not transactions: From 6960cf98c4317e9f462a2661fea630e52950380c Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 22:22:42 +0800 Subject: [PATCH 11/27] refactor: pass PdfPages instead of parser --- src/monopoly/generic/generic_handler.py | 10 +++--- src/monopoly/handler.py | 37 ++++++++-------------- src/monopoly/pipeline.py | 13 ++++---- src/monopoly/statements/base.py | 33 ++++++++----------- src/monopoly/statements/debit_statement.py | 3 +- tests/conftest.py | 6 +--- 6 files changed, 41 insertions(+), 61 deletions(-) diff --git a/src/monopoly/generic/generic_handler.py b/src/monopoly/generic/generic_handler.py index c8305aae..84a335b7 100644 --- a/src/monopoly/generic/generic_handler.py +++ b/src/monopoly/generic/generic_handler.py @@ -1,11 +1,12 @@ import logging from functools import cached_property +from typing import Type from monopoly.banks import BankBase from monopoly.config import StatementConfig from monopoly.constants import EntryType, InternalBankNames from monopoly.handler import StatementHandler -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfPage from .generic import DatePatternAnalyzer @@ -29,13 +30,12 @@ def identifiers(self): class GenericStatementHandler(StatementHandler): - def __init__(self, parser: PdfParser): - pages = parser.get_pages() + def __init__(self, bank: Type[BankBase], pages: list[PdfPage]): self.analyzer = DatePatternAnalyzer(pages) - parser.bank.statement_configs = list( + bank.statement_configs = list( filter(None, [self.debit_config, self.credit_config]) ) - super().__init__(parser) + super().__init__(bank, pages) # override get_header and ignore passed config, since # the header line has already been found diff --git a/src/monopoly/handler.py b/src/monopoly/handler.py index b92b839b..dd326c7f 100644 --- a/src/monopoly/handler.py +++ b/src/monopoly/handler.py @@ -1,9 +1,11 @@ import logging -from functools import cached_property +from functools import lru_cache +from typing import Type +from monopoly.banks import BankBase from monopoly.config import StatementConfig from monopoly.constants import EntryType -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfPage from monopoly.statements import BaseStatement, CreditStatement, DebitStatement logger = logging.getLogger(__name__) @@ -16,43 +18,30 @@ class StatementHandler: based on the debit and credit config. """ - def __init__(self, parser: PdfParser): - self.parser = parser - self.bank = parser.bank - - @property - def transactions(self): - return self.statement.transactions - - @property - def statement_date(self): - return self.statement.statement_date + def __init__(self, bank: Type[BankBase], pages: list[PdfPage]): + self.bank = bank + self.pages = pages def get_header(self, config: StatementConfig) -> str | None: pattern = config.header_pattern - pages = self.parser.get_pages() - for page in pages: + for page in self.pages: for line in page.lines: if match := pattern.search(line): return match.group().lower() return None - def perform_safety_check(self): - self.statement.perform_safety_check() - - @cached_property - def statement(self) -> BaseStatement: - parser = self.parser - + @lru_cache + def get_statement(self) -> BaseStatement: + pages = self.pages for config in self.bank.statement_configs: if header := self.get_header(config): match config.statement_type: case EntryType.DEBIT: logger.debug("Statement type detected: %s", EntryType.DEBIT) - return DebitStatement(parser, config, header) + return DebitStatement(pages, config, header) case EntryType.CREDIT: logger.debug("Statement type detected: %s", EntryType.CREDIT) - return CreditStatement(parser, config, header) + return CreditStatement(pages, config, header) raise RuntimeError("Could not create statement object") diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index 96ecc8e2..5a7eb51e 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -12,7 +12,7 @@ from monopoly.generic import GenericStatementHandler from monopoly.generic.generic_handler import GenericBank from monopoly.handler import StatementHandler -from monopoly.pdf import PdfDocument, PdfParser +from monopoly.pdf import PdfDocument, PdfPage, PdfParser from monopoly.statements import BaseStatement from monopoly.statements.transaction import Transaction from monopoly.write import generate_name @@ -46,12 +46,12 @@ def __init__( self.bank = bank or self.detect_bank(self.document) @staticmethod - def create_handler(bank: Type[BankBase], parser: PdfParser) -> StatementHandler: + def create_handler(bank: Type[BankBase], pages: list[PdfPage]) -> StatementHandler: if issubclass(bank, GenericBank): logger.debug("Using generic statement handler") - return GenericStatementHandler(parser) + return GenericStatementHandler(bank, pages) logger.debug("Using statement handler with bank: %s", bank.__name__) - return StatementHandler(parser) + return StatementHandler(bank, pages) @staticmethod def detect_bank(document) -> Type[BankBase]: @@ -65,8 +65,9 @@ def extract(self, safety_check=True) -> BaseStatement: """Extracts transactions from the statement, and performs a safety check to make sure that total transactions add up""" parser = PdfParser(self.bank, self.document) - handler = self.create_handler(self.bank, parser) - statement = handler.statement + pages = parser.get_pages() + handler = self.create_handler(self.bank, pages) + statement = handler.get_statement() transactions = statement.get_transactions() if not transactions: diff --git a/src/monopoly/statements/base.py b/src/monopoly/statements/base.py index a88501f4..b11437d8 100644 --- a/src/monopoly/statements/base.py +++ b/src/monopoly/statements/base.py @@ -3,13 +3,12 @@ from abc import ABC from datetime import datetime from functools import cached_property, lru_cache -from pathlib import Path from dateparser import parse from monopoly.config import StatementConfig from monopoly.constants import Columns, SharedPatterns -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfPage from monopoly.statements.transaction import ( Transaction, TransactionGroupDict, @@ -28,17 +27,20 @@ class BaseStatement(ABC): """ statement_type = "base" + columns: list[str] = [ + Columns.DATE, + Columns.DESCRIPTION, + Columns.AMOUNT, + ] - def __init__(self, parser: PdfParser, config: StatementConfig, header: str): - self.pages = parser.get_pages() + def __init__( + self, + pages: list[PdfPage], + config: StatementConfig, + header: str, + ): self.config = config - self.columns: list[str] = [ - Columns.DATE, - Columns.DESCRIPTION, - Columns.AMOUNT, - ] - self.parser = parser - self.document = parser.document + self.pages = pages self.header = header @cached_property @@ -55,10 +57,6 @@ def subtotal_pattern(self) -> re.Pattern: rf"(?:sub\stotal.*?)\s+{SharedPatterns.AMOUNT}", re.IGNORECASE ) - @property - def bank(self): - return self.parser.bank - @property def pattern(self): return self.config.transaction_pattern @@ -166,10 +164,7 @@ def get_multiline_description( @property def failed_safety_message(self) -> str: - return ( - f"Safety check for {Path(self.document.name).stem} failed - " - "transactions may be inaccurate" - ) + return "Safety check for failed - transactions may be inaccurate" @cached_property def transactions(self): diff --git a/src/monopoly/statements/debit_statement.py b/src/monopoly/statements/debit_statement.py index 85c725e9..f408f8f8 100644 --- a/src/monopoly/statements/debit_statement.py +++ b/src/monopoly/statements/debit_statement.py @@ -87,8 +87,7 @@ def get_header_pos(self, column_name: str, page_number: int) -> int: 16 OCT item 123.12 ``` """ - pages = list(self.pages) - lines = pages[page_number].lines + lines = self.pages[page_number].lines for line in lines: header_start_pos = line.lower().find(column_name.lower()) if header_start_pos == -1: diff --git a/tests/conftest.py b/tests/conftest.py index d1b05dd4..651e498e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -62,16 +62,12 @@ def setup_statement_fixture( statement_cls: BaseStatement, statement_config, ): - mock_parser = MagicMock(spec=PdfParser) mock_page = Mock(spec=PdfPage) mock_page.lines = ["foo", "bar"] mock_page.raw_text = ["foo\nbar"] - mock_parser.get_pages.return_value = [mock_page] - document = MagicMock(spec=fitz.Document) document.name = "mock_document.pdf" - mock_parser.document = document - statement = statement_cls(parser=mock_parser, config=statement_config, header="foo") + statement = statement_cls(pages=[mock_page], config=statement_config, header="foo") yield statement From db743c16c6c764e32f7ebbfc90272ec312ad4f15 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 22:25:54 +0800 Subject: [PATCH 12/27] chore(generic): move GenericBank to generic __init__ --- src/monopoly/generic/__init__.py | 8 ++------ src/monopoly/pipeline.py | 3 +-- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/monopoly/generic/__init__.py b/src/monopoly/generic/__init__.py index fa9d1795..86e9ebd7 100644 --- a/src/monopoly/generic/__init__.py +++ b/src/monopoly/generic/__init__.py @@ -1,8 +1,4 @@ from .generic import DateMatch, DatePatternAnalyzer -from .generic_handler import GenericStatementHandler +from .generic_handler import GenericBank, GenericStatementHandler -__all__ = [ - "DatePatternAnalyzer", - "DateMatch", - "GenericStatementHandler", -] +__all__ = ["DatePatternAnalyzer", "DateMatch", "GenericStatementHandler", "GenericBank"] diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index 5a7eb51e..e6e99199 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -9,8 +9,7 @@ from monopoly.bank_detector import BankDetector from monopoly.banks import BankBase from monopoly.config import DateOrder -from monopoly.generic import GenericStatementHandler -from monopoly.generic.generic_handler import GenericBank +from monopoly.generic import GenericBank, GenericStatementHandler from monopoly.handler import StatementHandler from monopoly.pdf import PdfDocument, PdfPage, PdfParser from monopoly.statements import BaseStatement From fcae17064127fe2063470ab30ea29b8552bf7d11 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 22:26:37 +0800 Subject: [PATCH 13/27] chore(pipeline): import Transaction from statements namespace --- src/monopoly/pipeline.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index e6e99199..d307583d 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -12,8 +12,7 @@ from monopoly.generic import GenericBank, GenericStatementHandler from monopoly.handler import StatementHandler from monopoly.pdf import PdfDocument, PdfPage, PdfParser -from monopoly.statements import BaseStatement -from monopoly.statements.transaction import Transaction +from monopoly.statements import BaseStatement, Transaction from monopoly.write import generate_name logger = logging.getLogger(__name__) From 7bc4ed1e73f2bb6cf988db56364dcdcc7fcc22fc Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 22:28:18 +0800 Subject: [PATCH 14/27] chore: rename generic/generic_handler to generic/handler --- src/monopoly/generic/__init__.py | 2 +- src/monopoly/generic/{generic_handler.py => handler.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename src/monopoly/generic/{generic_handler.py => handler.py} (100%) diff --git a/src/monopoly/generic/__init__.py b/src/monopoly/generic/__init__.py index 86e9ebd7..8af5b96c 100644 --- a/src/monopoly/generic/__init__.py +++ b/src/monopoly/generic/__init__.py @@ -1,4 +1,4 @@ from .generic import DateMatch, DatePatternAnalyzer -from .generic_handler import GenericBank, GenericStatementHandler +from .handler import GenericBank, GenericStatementHandler __all__ = ["DatePatternAnalyzer", "DateMatch", "GenericStatementHandler", "GenericBank"] diff --git a/src/monopoly/generic/generic_handler.py b/src/monopoly/generic/handler.py similarity index 100% rename from src/monopoly/generic/generic_handler.py rename to src/monopoly/generic/handler.py From 1fedd41b5b16409a8a335000c6b1bfa67978ce89 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 22:59:40 +0800 Subject: [PATCH 15/27] refactor(pipeline): move bank detection logic to CLI --- src/monopoly/cli.py | 12 ++++++++++-- src/monopoly/generic/patterns.py | 4 ++-- src/monopoly/pipeline.py | 13 ++----------- tests/integration/banks/test_banks_debit.py | 2 +- tests/integration/banks/test_banks_generic_debit.py | 1 - tests/integration/test_pipeline.py | 2 +- 6 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/monopoly/cli.py b/src/monopoly/cli.py index 4b8c18b4..6bbe5606 100644 --- a/src/monopoly/cli.py +++ b/src/monopoly/cli.py @@ -123,10 +123,18 @@ def process_statement( information about the processed statement. If an error occurs during processing, returns a Result object with error information. """ - from monopoly.pipeline import Pipeline # pylint: disable=import-outside-toplevel + # pylint: disable=import-outside-toplevel, too-many-locals + from monopoly.bank_detector import BankDetector + from monopoly.generic import GenericBank + from monopoly.pdf import PdfDocument + from monopoly.pipeline import Pipeline try: - pipeline = Pipeline(file) + document = PdfDocument(file) + analyzer = BankDetector(document) + bank = analyzer.detect_bank() or GenericBank + + pipeline = Pipeline(file, bank=bank) statement = pipeline.extract(safety_check=safety_check) transactions = pipeline.transform(statement) diff --git a/src/monopoly/generic/patterns.py b/src/monopoly/generic/patterns.py index 03bbff70..4a4a56b3 100644 --- a/src/monopoly/generic/patterns.py +++ b/src/monopoly/generic/patterns.py @@ -144,8 +144,8 @@ def get_transaction_pattern(self) -> DatePattern: max_span_occurrences = 0 most_common_pattern = None - # Sort patterns so that those ending with "yy" come first - sorted_patterns = sorted(self, key=lambda p: not p.name.endswith("yy")) + # Sort patterns so that those ending with "yy" come last + sorted_patterns = sorted(self, key=lambda p: p.name.endswith("yy")) for pattern in sorted_patterns: if counter := pattern.span_occurrences: diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index d307583d..9eec4731 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -6,7 +6,6 @@ from dateparser import parse from pydantic import SecretStr -from monopoly.bank_detector import BankDetector from monopoly.banks import BankBase from monopoly.config import DateOrder from monopoly.generic import GenericBank, GenericStatementHandler @@ -26,11 +25,12 @@ def __init__( file_path: Optional[Path] = None, file_bytes: Optional[bytes] = None, passwords: Optional[list[SecretStr]] = None, - bank: Optional[Type[BankBase]] = None, + bank: Type[BankBase] = GenericBank, ): self.file_path = file_path self.file_bytes = file_bytes self.passwords = passwords + self.bank = bank if not any([self.file_path, self.file_bytes]): raise RuntimeError("Either `file_path` or `file_bytes` must be passed") @@ -41,7 +41,6 @@ def __init__( ) self.document = PdfDocument(file_path, file_bytes, passwords) - self.bank = bank or self.detect_bank(self.document) @staticmethod def create_handler(bank: Type[BankBase], pages: list[PdfPage]) -> StatementHandler: @@ -51,14 +50,6 @@ def create_handler(bank: Type[BankBase], pages: list[PdfPage]) -> StatementHandl logger.debug("Using statement handler with bank: %s", bank.__name__) return StatementHandler(bank, pages) - @staticmethod - def detect_bank(document) -> Type[BankBase]: - analyzer = BankDetector(document) - if bank := analyzer.detect_bank(): - return bank - logger.warning("Unable to detect bank, transactions may be inaccurate") - return GenericBank - def extract(self, safety_check=True) -> BaseStatement: """Extracts transactions from the statement, and performs a safety check to make sure that total transactions add up""" diff --git a/tests/integration/banks/test_banks_debit.py b/tests/integration/banks/test_banks_debit.py index dbc97044..012c6d0e 100644 --- a/tests/integration/banks/test_banks_debit.py +++ b/tests/integration/banks/test_banks_debit.py @@ -29,7 +29,7 @@ def test_bank_debit_statements( ): bank_name = bank.debit_config.bank_name test_directory = Path(__file__).parent / bank_name / "debit" - pipeline = Pipeline(test_directory / "input.pdf") + pipeline = Pipeline(test_directory / "input.pdf", bank=bank) statement: DebitStatement = pipeline.extract() # check raw data diff --git a/tests/integration/banks/test_banks_generic_debit.py b/tests/integration/banks/test_banks_generic_debit.py index d7d3e952..80ab2059 100644 --- a/tests/integration/banks/test_banks_generic_debit.py +++ b/tests/integration/banks/test_banks_generic_debit.py @@ -32,7 +32,6 @@ def test_bank_debit_statements( expected_debit_sum: float, expected_credit_sum: float, statement_date: datetime, - no_banks, ): bank_name = bank.debit_config.bank_name test_directory = Path(__file__).parent / bank_name / "debit" diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index 5b039ceb..321315bf 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -61,7 +61,7 @@ def test_pipeline_initialization_with_neither_raises_error(): def test_pipeline_bytes_etl(pdf_file_bytes): - pipeline = Pipeline(file_bytes=pdf_file_bytes) + pipeline = Pipeline(file_bytes=pdf_file_bytes, bank=ExampleBank) statement = pipeline.extract() transactions = pipeline.transform(statement) assert len(transactions) == 53 From 25a68bf7164ca74d67ace737db8774bbb4cd0adf Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 23:21:28 +0800 Subject: [PATCH 16/27] refactor(detector): move detector to banks namespace --- src/monopoly/banks/__init__.py | 7 +++-- .../{bank_detector.py => banks/detector.py} | 15 ++++++++--- .../{examples => banks}/example_bank.py | 3 ++- src/monopoly/cli.py | 4 +-- src/monopoly/examples/__init__.py | 3 --- src/monopoly/pdf.py | 4 +-- tests/conftest.py | 2 +- .../test_auto_detect_bank.py | 27 +++++++------------ .../test_check_matching_field.py | 2 +- .../test_get_identifier.py | 2 +- .../test_is_bank_identified.py | 2 +- .../test_pdf_properties_match.py | 2 +- 12 files changed, 37 insertions(+), 36 deletions(-) rename src/monopoly/{bank_detector.py => banks/detector.py} (94%) rename src/monopoly/{examples => banks}/example_bank.py (96%) delete mode 100644 src/monopoly/examples/__init__.py diff --git a/src/monopoly/banks/__init__.py b/src/monopoly/banks/__init__.py index e7a5b4bf..662b9448 100644 --- a/src/monopoly/banks/__init__.py +++ b/src/monopoly/banks/__init__.py @@ -1,16 +1,17 @@ import logging from typing import Type -from ..examples.example_bank import ExampleBank from .base import BankBase from .citibank import Citibank from .dbs import Dbs +from .detector import BankDetector +from .example_bank import ExampleBank from .hsbc import Hsbc from .maybank import Maybank from .ocbc import Ocbc from .standard_chartered import StandardChartered -banks: list[Type[BankBase]] = [ +banks: list[Type["BankBase"]] = [ Citibank, Dbs, ExampleBank, @@ -21,3 +22,5 @@ ] logger = logging.getLogger(__name__) + +__all__ = ["BankDetector", "BankBase", *banks] diff --git a/src/monopoly/bank_detector.py b/src/monopoly/banks/detector.py similarity index 94% rename from src/monopoly/bank_detector.py rename to src/monopoly/banks/detector.py index 75f15c6e..4e4cb8ac 100644 --- a/src/monopoly/bank_detector.py +++ b/src/monopoly/banks/detector.py @@ -1,12 +1,14 @@ import logging from dataclasses import Field, fields from functools import cached_property -from typing import Any, Type +from typing import TYPE_CHECKING, Any, Type -from monopoly.banks import BankBase, banks from monopoly.identifiers import Identifier, MetadataIdentifier, TextIdentifier from monopoly.pdf import PdfDocument +if TYPE_CHECKING: + from .base import BankBase + logger = logging.getLogger(__name__) @@ -29,11 +31,16 @@ def metadata_items(self) -> list[Any]: return identifiers - def detect_bank(self) -> Type[BankBase] | None: + def detect_bank( + self, banks: list[Type["BankBase"]] = None + ) -> Type["BankBase"] | None: """ Reads the encryption metadata or actual metadata (if the PDF is not encrypted), and checks for a bank based on unique identifiers. """ + if not banks: + banks = [] + logger.debug("Found PDF properties: %s", self.metadata_items) for bank in banks: @@ -43,7 +50,7 @@ def detect_bank(self) -> Type[BankBase] | None: def is_bank_identified( self, - bank: Type[BankBase], + bank: Type["BankBase"], ) -> bool: """ Checks if a bank is identified based on a list of metadata items. diff --git a/src/monopoly/examples/example_bank.py b/src/monopoly/banks/example_bank.py similarity index 96% rename from src/monopoly/examples/example_bank.py rename to src/monopoly/banks/example_bank.py index 6ac3d8fc..cc39acbf 100644 --- a/src/monopoly/examples/example_bank.py +++ b/src/monopoly/banks/example_bank.py @@ -1,10 +1,11 @@ from re import compile as regex -from monopoly.banks.base import BankBase from monopoly.config import StatementConfig from monopoly.constants import EntryType, InternalBankNames, SharedPatterns from monopoly.identifiers import TextIdentifier +from .base import BankBase + class ExampleBank(BankBase): """Dummy class to help with reading the example PDF statement""" diff --git a/src/monopoly/cli.py b/src/monopoly/cli.py index 6bbe5606..c58f7d17 100644 --- a/src/monopoly/cli.py +++ b/src/monopoly/cli.py @@ -124,7 +124,7 @@ def process_statement( returns a Result object with error information. """ # pylint: disable=import-outside-toplevel, too-many-locals - from monopoly.bank_detector import BankDetector + from monopoly.banks import BankDetector, banks from monopoly.generic import GenericBank from monopoly.pdf import PdfDocument from monopoly.pipeline import Pipeline @@ -132,7 +132,7 @@ def process_statement( try: document = PdfDocument(file) analyzer = BankDetector(document) - bank = analyzer.detect_bank() or GenericBank + bank = analyzer.detect_bank(banks) or GenericBank pipeline = Pipeline(file, bank=bank) statement = pipeline.extract(safety_check=safety_check) diff --git a/src/monopoly/examples/__init__.py b/src/monopoly/examples/__init__.py deleted file mode 100644 index 06d2f26c..00000000 --- a/src/monopoly/examples/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .example_bank import ExampleBank - -__all__ = ["ExampleBank"] diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 0ad1fa3e..b39ab9f8 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -3,7 +3,7 @@ from functools import cached_property, lru_cache from io import BytesIO from pathlib import Path -from typing import Optional, Type +from typing import Optional import fitz import pdftotext @@ -114,7 +114,7 @@ def raw_text(self) -> str: class PdfParser: - def __init__(self, bank: Type[BankBase], document: PdfDocument): + def __init__(self, bank: BankBase, document: PdfDocument): """ Class responsible for parsing PDFs and returning raw text diff --git a/tests/conftest.py b/tests/conftest.py index 651e498e..9803db2d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,7 +4,7 @@ import fitz import pytest -from monopoly.bank_detector import BankDetector +from monopoly.banks.detector import BankDetector from monopoly.config import DateOrder, PdfConfig, StatementConfig from monopoly.constants import EntryType from monopoly.handler import StatementHandler diff --git a/tests/unit/test_bank_identifier/test_auto_detect_bank.py b/tests/unit/test_bank_identifier/test_auto_detect_bank.py index 3b9b6c7d..31c82a0b 100644 --- a/tests/unit/test_bank_identifier/test_auto_detect_bank.py +++ b/tests/unit/test_bank_identifier/test_auto_detect_bank.py @@ -2,8 +2,8 @@ import pytest -from monopoly.bank_detector import BankDetector from monopoly.banks.base import BankBase +from monopoly.banks.detector import BankDetector from monopoly.identifiers import MetadataIdentifier, TextIdentifier from monopoly.pdf import PdfDocument @@ -81,7 +81,7 @@ class MockBankWithOnlyTextIdentifier(BankBase): @patch.object(BankDetector, "metadata_items", new_callable=PropertyMock) def test_auto_detect_bank_identified( - mock_metadata_items, monkeypatch, metadata_analyzer: BankDetector + mock_metadata_items, metadata_analyzer: BankDetector ): mock_metadata_items.return_value = [ MetadataIdentifier( @@ -90,30 +90,26 @@ def test_auto_detect_bank_identified( ] mock_banks_list = [MockBankOne, MockBankTwo] - monkeypatch.setattr("monopoly.bank_detector.banks", mock_banks_list) - - bank = metadata_analyzer.detect_bank() + bank = metadata_analyzer.detect_bank(mock_banks_list) assert bank.__name__ == MockBankTwo.__name__ @patch.object(BankDetector, "metadata_items", new_callable=PropertyMock) def test_detect_bank_not_identified( - mock_metadata_items, monkeypatch, metadata_analyzer: BankDetector + mock_metadata_items, metadata_analyzer: BankDetector ): mock_metadata_items.return_value = [ MetadataIdentifier(creator="asdf", producer="qwerty") ] mock_banks_list = [MockBankThree] - monkeypatch.setattr("monopoly.bank_detector.banks", mock_banks_list) - - assert not metadata_analyzer.detect_bank() + assert not metadata_analyzer.detect_bank(mock_banks_list) @patch.object(PdfDocument, "raw_text", new_callable=PropertyMock) @patch.object(BankDetector, "metadata_items", new_callable=PropertyMock) def test_detect_bank_with_text_identifier( - mock_metadata_items, mock_raw_text, monkeypatch, metadata_analyzer: BankDetector + mock_metadata_items, mock_raw_text, metadata_analyzer: BankDetector ): mock_raw_text.return_value = "specific_string, other_specific_string" mock_metadata_items.return_value = [ @@ -121,9 +117,7 @@ def test_detect_bank_with_text_identifier( ] mock_banks_list = [MockBankTwo, MockBankWithMultipleTextIdentifier] - monkeypatch.setattr("monopoly.bank_detector.banks", mock_banks_list) - - bank = metadata_analyzer.detect_bank() + bank = metadata_analyzer.detect_bank(mock_banks_list) assert bank.__name__ == MockBankWithMultipleTextIdentifier.__name__ @@ -139,7 +133,7 @@ def test_detect_bank_with_not_matching_text_identifier( ] mock_banks_list = [MockBankTwo, MockBankWithMultipleTextIdentifier] - monkeypatch.setattr("monopoly.bank_detector.banks", mock_banks_list) + monkeypatch.setattr("monopoly.banks.banks", mock_banks_list) assert not metadata_analyzer.detect_bank() @@ -147,7 +141,7 @@ def test_detect_bank_with_not_matching_text_identifier( @patch.object(PdfDocument, "raw_text", new_callable=PropertyMock) @patch.object(BankDetector, "metadata_items", new_callable=PropertyMock) def test_detect_bank_with_only_text_identifier( - mock_metadata_items, mock_raw_text, monkeypatch, metadata_analyzer: BankDetector + mock_metadata_items, mock_raw_text, metadata_analyzer: BankDetector ): mock_raw_text.return_value = "foo baz bar" mock_metadata_items.return_value = [ @@ -158,7 +152,6 @@ def test_detect_bank_with_only_text_identifier( MockBankWithMultipleTextIdentifier, MockBankWithOnlyTextIdentifier, ] - monkeypatch.setattr("monopoly.bank_detector.banks", mock_banks_list) - bank = metadata_analyzer.detect_bank() + bank = metadata_analyzer.detect_bank(mock_banks_list) assert bank.__name__ == MockBankWithOnlyTextIdentifier.__name__ diff --git a/tests/unit/test_bank_identifier/test_check_matching_field.py b/tests/unit/test_bank_identifier/test_check_matching_field.py index 106c66eb..3fb72180 100644 --- a/tests/unit/test_bank_identifier/test_check_matching_field.py +++ b/tests/unit/test_bank_identifier/test_check_matching_field.py @@ -1,6 +1,6 @@ from unittest.mock import Mock -from monopoly.bank_detector import BankDetector +from monopoly.banks.detector import BankDetector from monopoly.identifiers import MetadataIdentifier diff --git a/tests/unit/test_bank_identifier/test_get_identifier.py b/tests/unit/test_bank_identifier/test_get_identifier.py index 5d2a2ebc..75d11e69 100644 --- a/tests/unit/test_bank_identifier/test_get_identifier.py +++ b/tests/unit/test_bank_identifier/test_get_identifier.py @@ -2,7 +2,7 @@ import pytest -from monopoly.bank_detector import BankDetector +from monopoly.banks.detector import BankDetector from monopoly.identifiers import MetadataIdentifier from monopoly.pdf import PdfDocument diff --git a/tests/unit/test_bank_identifier/test_is_bank_identified.py b/tests/unit/test_bank_identifier/test_is_bank_identified.py index d1873faf..ab9aff88 100644 --- a/tests/unit/test_bank_identifier/test_is_bank_identified.py +++ b/tests/unit/test_bank_identifier/test_is_bank_identified.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from unittest.mock import PropertyMock, patch -from monopoly.bank_detector import BankDetector +from monopoly.banks.detector import BankDetector @dataclass diff --git a/tests/unit/test_bank_identifier/test_pdf_properties_match.py b/tests/unit/test_bank_identifier/test_pdf_properties_match.py index 31365429..e326fe9c 100644 --- a/tests/unit/test_bank_identifier/test_pdf_properties_match.py +++ b/tests/unit/test_bank_identifier/test_pdf_properties_match.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from unittest.mock import PropertyMock, patch -from monopoly.bank_detector import BankDetector +from monopoly.banks.detector import BankDetector @dataclass From 22fe99e421853027718e5737919815e4f93b8ccd Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Tue, 3 Sep 2024 23:40:30 +0800 Subject: [PATCH 17/27] chore: import from pymupdf instead of fitz --- src/monopoly/pdf.py | 10 +++++----- tests/conftest.py | 4 ++-- tests/unit/test_safety_check.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index b39ab9f8..8f5be3db 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -5,12 +5,12 @@ from pathlib import Path from typing import Optional -import fitz import pdftotext from ocrmypdf import Verbosity, configure_logging, ocr from ocrmypdf.exceptions import PriorOcrFoundError, TaggedPDFError from pydantic import SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict +from pymupdf import TEXTFLAGS_TEXT, Document, Page from monopoly.banks import BankBase @@ -61,7 +61,7 @@ class BadPasswordFormatError(Exception): """Exception raised passwords are not provided in a proper format""" -class PdfDocument(fitz.Document): +class PdfDocument(Document): """Handles logic related to the opening, unlocking, and storage of a PDF document.""" def __init__( @@ -181,7 +181,7 @@ def get_pages(self) -> list[PdfPage]: raise RuntimeError("Unable to retrieve pages") @staticmethod - def _remove_vertical_text(page: fitz.Page): + def _remove_vertical_text(page: Page): """Helper function to remove vertical text, based on writing direction (wdir). This helps avoid situations where the PDF is oddly parsed, due to vertical text @@ -199,12 +199,12 @@ def _remove_vertical_text(page: fitz.Page): If line["dir"] != (1, 0), the text of its spans is rotated. """ - for block in page.get_text("dict", flags=fitz.TEXTFLAGS_TEXT)["blocks"]: + for block in page.get_text("dict", flags=TEXTFLAGS_TEXT)["blocks"]: for line in block["lines"]: writing_direction = line["dir"] if writing_direction != (1, 0): page.add_redact_annot(line["bbox"]) - page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_NONE) + page.apply_redactions(images=0) return page @staticmethod diff --git a/tests/conftest.py b/tests/conftest.py index 9803db2d..5334fa06 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,8 @@ import os from unittest.mock import MagicMock, Mock, patch -import fitz import pytest +from pymupdf import Document from monopoly.banks.detector import BankDetector from monopoly.config import DateOrder, PdfConfig, StatementConfig @@ -65,7 +65,7 @@ def setup_statement_fixture( mock_page = Mock(spec=PdfPage) mock_page.lines = ["foo", "bar"] mock_page.raw_text = ["foo\nbar"] - document = MagicMock(spec=fitz.Document) + document = MagicMock(spec=Document) document.name = "mock_document.pdf" statement = statement_cls(pages=[mock_page], config=statement_config, header="foo") yield statement diff --git a/tests/unit/test_safety_check.py b/tests/unit/test_safety_check.py index 5e817a23..7e1add21 100644 --- a/tests/unit/test_safety_check.py +++ b/tests/unit/test_safety_check.py @@ -1,5 +1,5 @@ -import fitz import pytest +from pymupdf import Document from monopoly.banks import BankBase from monopoly.statements import CreditStatement, DebitStatement @@ -15,7 +15,7 @@ class MockProcessor(BankBase): def test_credit_safety_check(credit_statement: CreditStatement): - document = fitz.Document() + document = Document() page = document.new_page() text = "Page 1\n3\nfoo\n02 May\n2.27\n27 Apr\n2.67\ntotal amount 31.50" page.lines = text.split("\n") @@ -35,7 +35,7 @@ def test_credit_safety_check(credit_statement: CreditStatement): def test_debit_safety_check(debit_statement: DebitStatement): - document = fitz.Document() + document = Document() page = document.new_page() text = ( "Page 1\n3\nfoo\n02 May\n-2.5\n27 Apr\n2.67\ntotal credit 30.0 total debit 2.5" @@ -64,7 +64,7 @@ def test_debit_safety_check(debit_statement: DebitStatement): def test_debit_safety_check_failure(debit_statement: DebitStatement): - document = fitz.Document() + document = Document() page = document.new_page() text = "Page 1\n3\nfoo\n02 May\n-999\n27 Apr\n456\nrandom transaction 123" page.lines = text.split("\n") From 62ba9f2207c2682cf8d139140b2519bd3d0ee9a1 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Wed, 4 Sep 2024 00:03:25 +0800 Subject: [PATCH 18/27] refactor: remove unnecessary usage of pydantic dataclasses --- src/monopoly/cli.py | 4 ++-- src/monopoly/config.py | 4 +--- src/monopoly/generic/generic.py | 14 +++++++------- src/monopoly/identifiers.py | 7 ++++++- src/monopoly/statements/base.py | 5 ++++- tests/unit/generic/test_date_pattern_analyzer.py | 4 ++-- 6 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/monopoly/cli.py b/src/monopoly/cli.py index c58f7d17..3b19ce27 100644 --- a/src/monopoly/cli.py +++ b/src/monopoly/cli.py @@ -1,10 +1,10 @@ import traceback from concurrent.futures import ProcessPoolExecutor +from dataclasses import dataclass, field from pathlib import Path from typing import Collection, Iterable, Optional, TypedDict import click -from pydantic.dataclasses import Field, dataclass from tabulate import tabulate from tqdm import tqdm @@ -41,7 +41,7 @@ class Result: source_file_name: str target_file_name: Optional[str] = None - error_info: dict[str, str] = Field(default_factory=dict) + error_info: dict[str, str] = field(default_factory=dict) @dataclass diff --git a/src/monopoly/config.py b/src/monopoly/config.py index bfdc6a2a..51c87aa4 100644 --- a/src/monopoly/config.py +++ b/src/monopoly/config.py @@ -1,8 +1,6 @@ -from dataclasses import field +from dataclasses import dataclass, field from typing import Optional, Pattern -from pydantic.dataclasses import dataclass - from monopoly.constants import BankNames, EntryType, InternalBankNames from monopoly.enums import RegexEnum diff --git a/src/monopoly/generic/generic.py b/src/monopoly/generic/generic.py index 96db76d1..6a8db46f 100644 --- a/src/monopoly/generic/generic.py +++ b/src/monopoly/generic/generic.py @@ -70,7 +70,7 @@ def is_transaction_date_first(self) -> bool: return True @lru_cache - def create_transaction_pattern(self) -> str: + def create_transaction_pattern(self) -> re.Pattern: """ Create a regex pattern that will be used for date parsing by the generic statement handler. @@ -100,16 +100,16 @@ def create_transaction_pattern(self) -> str: if self.get_statement_type() == EntryType.CREDIT: pattern += SharedPatterns.AMOUNT_EXTENDED - return pattern + return re.compile(pattern, re.IGNORECASE) @lru_cache - def create_statement_date_pattern(self) -> str: + def create_statement_date_pattern(self) -> re.Pattern: """ Creates a regex pattern for the statement date based on the first statement date. """ statement_date = self.matcher.get_statement_date_pattern() - return f"({statement_date})" + return re.compile(f"({statement_date})") @lru_cache def get_statement_type(self) -> str: @@ -186,7 +186,7 @@ def check_if_multiline(self) -> bool: return average_line_distance > 2 @lru_cache - def create_previous_balance_regex(self) -> str | None: + def create_previous_balance_regex(self) -> re.Pattern | None: """Helper function to check for a previous balance line items. Makes the assumption that the previous balance line item, if it exists, will be the line before the first line item with a date. @@ -218,13 +218,13 @@ def create_previous_balance_regex(self) -> str | None: + SharedPatterns.AMOUNT ) logger.debug("Found words, generated pattern %s", pattern) - return pattern + return re.compile(pattern) return None @lru_cache def get_first_transaction_location(self): # uses the transaction pattern to find the first transaction - pattern = re.compile(self.create_transaction_pattern(), re.IGNORECASE) + pattern = self.create_transaction_pattern() for page_num, page in enumerate(self.pages): for line_num, line in enumerate(page.lines): diff --git a/src/monopoly/identifiers.py b/src/monopoly/identifiers.py index 258ea758..d3ffaf87 100644 --- a/src/monopoly/identifiers.py +++ b/src/monopoly/identifiers.py @@ -1,4 +1,4 @@ -from pydantic.dataclasses import dataclass +from dataclasses import dataclass @dataclass @@ -16,6 +16,11 @@ class MetadataIdentifier(Identifier): subject: str = "" creator: str = "" producer: str = "" + keywords: str = "" + creationDate: str = "" + modDate: str = "" + trapped: str = "" + encryption: dict = None @dataclass diff --git a/src/monopoly/statements/base.py b/src/monopoly/statements/base.py index b11437d8..7c789fe6 100644 --- a/src/monopoly/statements/base.py +++ b/src/monopoly/statements/base.py @@ -59,7 +59,10 @@ def subtotal_pattern(self) -> re.Pattern: @property def pattern(self): - return self.config.transaction_pattern + pattern = self.config.transaction_pattern + if isinstance(pattern, str): + pattern = re.compile(pattern) + return pattern @lru_cache def get_transactions(self) -> list[Transaction] | None: diff --git a/tests/unit/generic/test_date_pattern_analyzer.py b/tests/unit/generic/test_date_pattern_analyzer.py index 72d24505..09f758e4 100644 --- a/tests/unit/generic/test_date_pattern_analyzer.py +++ b/tests/unit/generic/test_date_pattern_analyzer.py @@ -286,7 +286,7 @@ def test_create_transaction_pattern_with_transaction_first( date_pattern_analyzer.matches = matches_with_transaction_posting_dates result = date_pattern_analyzer.create_transaction_pattern() - assert result == expected + assert result == re.compile(expected, re.IGNORECASE) def test_create_transaction_pattern_with_posting_first( @@ -305,7 +305,7 @@ def test_create_transaction_pattern_with_posting_first( + SharedPatterns.AMOUNT_EXTENDED ) result = date_pattern_analyzer.create_transaction_pattern() - assert result == expected + assert result == re.compile(expected, re.IGNORECASE) def test_get_statement_type_debit(date_pattern_analyzer: DatePatternAnalyzer): From b7af37bd730e182c2bd85b1cd805d065ddd9c1b5 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Wed, 4 Sep 2024 22:57:48 +0800 Subject: [PATCH 19/27] refactor(pdf): add metadata identifier attr to PdfDocument --- src/monopoly/banks/detector.py | 5 ++--- src/monopoly/pdf.py | 3 +++ tests/unit/test_bank_identifier/test_get_identifier.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/monopoly/banks/detector.py b/src/monopoly/banks/detector.py index 4e4cb8ac..e1526b06 100644 --- a/src/monopoly/banks/detector.py +++ b/src/monopoly/banks/detector.py @@ -3,7 +3,7 @@ from functools import cached_property from typing import TYPE_CHECKING, Any, Type -from monopoly.identifiers import Identifier, MetadataIdentifier, TextIdentifier +from monopoly.identifiers import Identifier, TextIdentifier from monopoly.pdf import PdfDocument if TYPE_CHECKING: @@ -22,8 +22,7 @@ def metadata_items(self) -> list[Any]: Retrieves encryption and metadata identifiers from a bank statement PDF """ identifiers: list[Identifier] = [] - if metadata := self.document.metadata: - metadata_identifier = MetadataIdentifier(**metadata) + if metadata_identifier := self.document.metadata_identifier: identifiers.append(metadata_identifier) if not identifiers: diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 8f5be3db..a24e4854 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -13,6 +13,7 @@ from pymupdf import TEXTFLAGS_TEXT, Document, Page from monopoly.banks import BankBase +from monopoly.identifiers import MetadataIdentifier logger = logging.getLogger(__name__) @@ -80,6 +81,8 @@ def __init__( if self.is_encrypted: self._unlock_document() + self.metadata_identifier = MetadataIdentifier(**self.metadata) + def _unlock_document(self): """Attempt to unlock the document using the provided passwords.""" if not self.is_encrypted: diff --git a/tests/unit/test_bank_identifier/test_get_identifier.py b/tests/unit/test_bank_identifier/test_get_identifier.py index 75d11e69..80a230ce 100644 --- a/tests/unit/test_bank_identifier/test_get_identifier.py +++ b/tests/unit/test_bank_identifier/test_get_identifier.py @@ -19,10 +19,10 @@ def xref_get_key(self): class MockPdfDocument: def __init__(self, is_encrypted: bool, metadata: dict): self.is_encrypted = is_encrypted - self.metadata = metadata + self.metadata_identifier = MetadataIdentifier(**metadata) def open(self): - return MockDocument(self.is_encrypted, self.metadata) + return MockDocument(self.is_encrypted, self.metadata_identifier) @pytest.fixture From 190510dd71acd22ed24519fca8431e3d387d0fc9 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Wed, 4 Sep 2024 23:02:05 +0800 Subject: [PATCH 20/27] refactor(banks/base): fix type hint for identifiers --- src/monopoly/banks/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/monopoly/banks/base.py b/src/monopoly/banks/base.py index 8edf4f0c..4def5022 100644 --- a/src/monopoly/banks/base.py +++ b/src/monopoly/banks/base.py @@ -1,6 +1,7 @@ import logging from monopoly.config import PdfConfig, StatementConfig +from monopoly.identifiers import Identifier logger = logging.getLogger(__name__) @@ -15,6 +16,7 @@ class BankBase: statement_configs: list[StatementConfig] pdf_config: PdfConfig = PdfConfig() + identifiers: list[Identifier] def __init_subclass__(cls, **kwargs) -> None: if not hasattr(cls, "statement_configs"): From 4c676712a238c963a5058c8c6e80cead17500afb Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Wed, 4 Sep 2024 23:16:28 +0800 Subject: [PATCH 21/27] build(deps): move ocrmypdf to extras --- poetry.lock | 99 ++++++++++++++++++++++++++------------------------ pyproject.toml | 7 ++-- 2 files changed, 55 insertions(+), 51 deletions(-) diff --git a/poetry.lock b/poetry.lock index 97bcceb8..8c0c908a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -75,7 +75,7 @@ uvloop = ["uvloop (>=0.15.2)"] name = "cffi" version = "1.17.0" description = "Foreign Function Interface for Python calling C code." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "cffi-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f9338cc05451f1942d0d8203ec2c346c830f8e86469903d5126c1f0a13a2bcbb"}, @@ -154,7 +154,7 @@ pycparser = "*" name = "charset-normalizer" version = "3.3.2" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -optional = false +optional = true python-versions = ">=3.7.0" files = [ {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, @@ -276,38 +276,38 @@ files = [ [[package]] name = "cryptography" -version = "43.0.0" +version = "43.0.1" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = false +optional = true python-versions = ">=3.7" files = [ - {file = "cryptography-43.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:64c3f16e2a4fc51c0d06af28441881f98c5d91009b8caaff40cf3548089e9c74"}, - {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3dcdedae5c7710b9f97ac6bba7e1052b95c7083c9d0e9df96e02a1932e777895"}, - {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d9a1eca329405219b605fac09ecfc09ac09e595d6def650a437523fcd08dd22"}, - {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ea9e57f8ea880eeea38ab5abf9fbe39f923544d7884228ec67d666abd60f5a47"}, - {file = "cryptography-43.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:9a8d6802e0825767476f62aafed40532bd435e8a5f7d23bd8b4f5fd04cc80ecf"}, - {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cc70b4b581f28d0a254d006f26949245e3657d40d8857066c2ae22a61222ef55"}, - {file = "cryptography-43.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:4a997df8c1c2aae1e1e5ac49c2e4f610ad037fc5a3aadc7b64e39dea42249431"}, - {file = "cryptography-43.0.0-cp37-abi3-win32.whl", hash = "sha256:6e2b11c55d260d03a8cf29ac9b5e0608d35f08077d8c087be96287f43af3ccdc"}, - {file = "cryptography-43.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:31e44a986ceccec3d0498e16f3d27b2ee5fdf69ce2ab89b52eaad1d2f33d8778"}, - {file = "cryptography-43.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:7b3f5fe74a5ca32d4d0f302ffe6680fcc5c28f8ef0dc0ae8f40c0f3a1b4fca66"}, - {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ac1955ce000cb29ab40def14fd1bbfa7af2017cca696ee696925615cafd0dce5"}, - {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:299d3da8e00b7e2b54bb02ef58d73cd5f55fb31f33ebbf33bd00d9aa6807df7e"}, - {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:ee0c405832ade84d4de74b9029bedb7b31200600fa524d218fc29bfa371e97f5"}, - {file = "cryptography-43.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:cb013933d4c127349b3948aa8aaf2f12c0353ad0eccd715ca789c8a0f671646f"}, - {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:fdcb265de28585de5b859ae13e3846a8e805268a823a12a4da2597f1f5afc9f0"}, - {file = "cryptography-43.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2905ccf93a8a2a416f3ec01b1a7911c3fe4073ef35640e7ee5296754e30b762b"}, - {file = "cryptography-43.0.0-cp39-abi3-win32.whl", hash = "sha256:47ca71115e545954e6c1d207dd13461ab81f4eccfcb1345eac874828b5e3eaaf"}, - {file = "cryptography-43.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:0663585d02f76929792470451a5ba64424acc3cd5227b03921dab0e2f27b1709"}, - {file = "cryptography-43.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:2c6d112bf61c5ef44042c253e4859b3cbbb50df2f78fa8fae6747a7814484a70"}, - {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:844b6d608374e7d08f4f6e6f9f7b951f9256db41421917dfb2d003dde4cd6b66"}, - {file = "cryptography-43.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:51956cf8730665e2bdf8ddb8da0056f699c1a5715648c1b0144670c1ba00b48f"}, - {file = "cryptography-43.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:aae4d918f6b180a8ab8bf6511a419473d107df4dbb4225c7b48c5c9602c38c7f"}, - {file = "cryptography-43.0.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:232ce02943a579095a339ac4b390fbbe97f5b5d5d107f8a08260ea2768be8cc2"}, - {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5bcb8a5620008a8034d39bce21dc3e23735dfdb6a33a06974739bfa04f853947"}, - {file = "cryptography-43.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:08a24a7070b2b6804c1940ff0f910ff728932a9d0e80e7814234269f9d46d069"}, - {file = "cryptography-43.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e9c5266c432a1e23738d178e51c2c7a5e2ddf790f248be939448c0ba2021f9d1"}, - {file = "cryptography-43.0.0.tar.gz", hash = "sha256:b88075ada2d51aa9f18283532c9f60e72170041bba88d7f37e49cbb10275299e"}, + {file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"}, + {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062"}, + {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962"}, + {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277"}, + {file = "cryptography-43.0.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a"}, + {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042"}, + {file = "cryptography-43.0.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494"}, + {file = "cryptography-43.0.1-cp37-abi3-win32.whl", hash = "sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2"}, + {file = "cryptography-43.0.1-cp37-abi3-win_amd64.whl", hash = "sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d"}, + {file = "cryptography-43.0.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d"}, + {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806"}, + {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85"}, + {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c"}, + {file = "cryptography-43.0.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1"}, + {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa"}, + {file = "cryptography-43.0.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4"}, + {file = "cryptography-43.0.1-cp39-abi3-win32.whl", hash = "sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47"}, + {file = "cryptography-43.0.1-cp39-abi3-win_amd64.whl", hash = "sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb"}, + {file = "cryptography-43.0.1-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034"}, + {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d"}, + {file = "cryptography-43.0.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289"}, + {file = "cryptography-43.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84"}, + {file = "cryptography-43.0.1-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365"}, + {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96"}, + {file = "cryptography-43.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172"}, + {file = "cryptography-43.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2"}, + {file = "cryptography-43.0.1.tar.gz", hash = "sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d"}, ] [package.dependencies] @@ -320,7 +320,7 @@ nox = ["nox"] pep8test = ["check-sdist", "click", "mypy", "ruff"] sdist = ["build"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi", "cryptography-vectors (==43.0.0)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi", "cryptography-vectors (==43.0.1)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] test-randomorder = ["pytest-randomly"] [[package]] @@ -349,7 +349,7 @@ langdetect = ["langdetect"] name = "deprecated" version = "1.2.14" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." -optional = false +optional = true python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, @@ -366,7 +366,7 @@ dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] name = "deprecation" version = "2.1.0" description = "A library to handle automated deprecations" -optional = false +optional = true python-versions = "*" files = [ {file = "deprecation-2.1.0-py2.py3-none-any.whl", hash = "sha256:a10811591210e1fb0e768a8c25517cabeabcba6f0bf96564f8ff45189f90b14a"}, @@ -458,7 +458,7 @@ files = [ name = "img2pdf" version = "0.5.1" description = "Convert images to PDF via direct JPEG inclusion." -optional = false +optional = true python-versions = "*" files = [ {file = "img2pdf-0.5.1.tar.gz", hash = "sha256:73847e47242f4b5bd113c70049e03e03212936c2727cd2a8bf564229a67d0b95"}, @@ -500,7 +500,7 @@ colors = ["colorama (>=0.4.6)"] name = "lxml" version = "5.3.0" description = "Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:dd36439be765e2dde7660212b5275641edbc813e7b24668831a5c8ac91180656"}, @@ -654,7 +654,7 @@ source = ["Cython (>=3.0.11)"] name = "markdown-it-py" version = "3.0.0" description = "Python port of markdown-it. Markdown parsing, done right!" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, @@ -689,7 +689,7 @@ files = [ name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -optional = false +optional = true python-versions = ">=3.7" files = [ {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, @@ -769,7 +769,7 @@ files = [ name = "ocrmypdf" version = "16.5.0" description = "OCRmyPDF adds an OCR text layer to scanned PDF files, allowing them to be searched" -optional = false +optional = true python-versions = ">=3.10" files = [ {file = "ocrmypdf-16.5.0-py3-none-any.whl", hash = "sha256:9222b1b0818b65c891559b84efab2e84434c71149b3aaaa6dc654457e0b66b14"}, @@ -820,7 +820,7 @@ files = [ name = "pdfminer-six" version = "20240706" description = "PDF parser and analyzer" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pdfminer.six-20240706-py3-none-any.whl", hash = "sha256:f4f70e74174b4b3542fcb8406a210b6e2e27cd0f0b5fd04534a8cc0d8951e38c"}, @@ -850,7 +850,7 @@ files = [ name = "pi-heif" version = "0.18.0" description = "Python interface for libheif library" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pi_heif-0.18.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3c09d22ed75200372b8102debf4ba69d8f63c595870505b9188d6c9a9b48e1f2"}, @@ -918,7 +918,7 @@ tests-min = ["defusedxml", "packaging", "pytest"] name = "pikepdf" version = "9.2.1" description = "Read and write PDFs with Python, powered by qpdf" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pikepdf-9.2.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:e863185d6abadab140a7c3e152d9227afe495cf97d4738efc280896660249180"}, @@ -984,7 +984,7 @@ test = ["attrs (>=20.2.0)", "coverage[toml]", "hypothesis (>=6.36)", "numpy (>=1 name = "pillow" version = "10.4.0" description = "Python Imaging Library (Fork)" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pillow-10.4.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:4d9667937cfa347525b319ae34375c37b9ee6b525440f3ef48542fcf66f2731e"}, @@ -1151,7 +1151,7 @@ files = [ name = "pycparser" version = "2.22" description = "C parser in Python" -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, @@ -1316,7 +1316,7 @@ files = [ name = "pygments" version = "2.18.0" description = "Pygments is a syntax highlighting package written in Python." -optional = false +optional = true python-versions = ">=3.8" files = [ {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, @@ -1630,7 +1630,7 @@ files = [ name = "rich" version = "13.8.0" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -optional = false +optional = true python-versions = ">=3.7.0" files = [ {file = "rich-13.8.0-py3-none-any.whl", hash = "sha256:2e85306a063b9492dffc86278197a60cbece75bcb766022f3436f567cae11bdc"}, @@ -1847,7 +1847,7 @@ devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3) name = "wrapt" version = "1.16.0" description = "Module for decorators, wrappers and monkey patching." -optional = false +optional = true python-versions = ">=3.6" files = [ {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, @@ -1922,7 +1922,10 @@ files = [ {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, ] +[extras] +ocr = ["ocrmypdf"] + [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "bbc56e5218316ab7ff7fb98e940720bdd61fd61ced05f8eca70062637cb0d823" +content-hash = "125f0836d30bc1d518620cb38afa25a19b3f7447a19c77e7fa0e2335cfba9606" diff --git a/pyproject.toml b/pyproject.toml index 6e85b678..343e3082 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,8 +26,7 @@ tabulate = "^0.9.0" pydantic = "^2.5.2" dateparser = "^1.2.0" strenum = "^0.4.15" -ocrmypdf = "^16.5.0" - +ocrmypdf = { version = "^16.5.0", optional = true } [tool.poetry.group.dev.dependencies] black = ">=23.7,<25.0" @@ -44,9 +43,11 @@ types-tabulate = "^0.9.0.20240106" pytest-xdist = "^3.6.1" flake8 = "^7.0.0" ruff = ">=0.4.7,<0.7.0" +git-cliff = "^2.3.0" +[tool.poetry.extras] +ocr = ["ocrmypdf"] -git-cliff = "^2.3.0" [tool.taskipy.tasks] format = "isort . && black ." lint = "flake8 src && pylint src && ruff check src" From 67943c374702b0980ae01302b29f335f776a3e5b Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Wed, 4 Sep 2024 23:22:26 +0800 Subject: [PATCH 22/27] refactor(pdf): lazily import ocrmypdf --- src/monopoly/pdf.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index a24e4854..796b5b94 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -6,8 +6,6 @@ from typing import Optional import pdftotext -from ocrmypdf import Verbosity, configure_logging, ocr -from ocrmypdf.exceptions import PriorOcrFoundError, TaggedPDFError from pydantic import SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict from pymupdf import TEXTFLAGS_TEXT, Document, Page @@ -212,6 +210,14 @@ def _remove_vertical_text(page: Page): @staticmethod def _apply_ocr(document: PdfDocument) -> PdfDocument: + # pylint: disable=import-outside-toplevel + try: + from ocrmypdf import Verbosity, configure_logging, ocr + from ocrmypdf.exceptions import PriorOcrFoundError, TaggedPDFError + except ImportError: + logger.warning("ocrmypdf not installed, skipping OCR") + return document + added_ocr = False try: logger.debug("Applying OCR") From ec356eab68f6b2d9bc8614d0e4ee1749ca5fde2c Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Wed, 4 Sep 2024 23:47:48 +0800 Subject: [PATCH 23/27] refactor(pdf): perform ocr based on metadata identifiers --- src/monopoly/banks/hsbc/hsbc.py | 36 ++++++++++++++++++--------------- src/monopoly/config.py | 8 +++++--- src/monopoly/identifiers.py | 15 +++++++++++++- src/monopoly/pdf.py | 16 ++++++++++----- tests/conftest.py | 6 +++++- 5 files changed, 55 insertions(+), 26 deletions(-) diff --git a/src/monopoly/banks/hsbc/hsbc.py b/src/monopoly/banks/hsbc/hsbc.py index 47d4fcf4..a75c1221 100644 --- a/src/monopoly/banks/hsbc/hsbc.py +++ b/src/monopoly/banks/hsbc/hsbc.py @@ -26,22 +26,26 @@ class Hsbc(BankBase): multiline_transactions=True, ) - pdf_config = PdfConfig(page_bbox=(0, 0, 379, 840), apply_ocr=True) - - identifiers = [ - [ - MetadataIdentifier( - title="PRJ_BEAGLE_ST_CNS_SGH_APP_Orchid", - author="Registered to: HSBCGLOB", - creator="OpenText Exstream", - ), - TextIdentifier("HSBC"), - ], - [ - MetadataIdentifier( - format="PDF 1.7", producer="OpenText Output Transformation Engine" - ) - ], + email_statement_identifier = [ + MetadataIdentifier( + title="PRJ_BEAGLE_ST_CNS_SGH_APP_Orchid", + author="Registered to: HSBCGLOB", + creator="OpenText Exstream", + ), + TextIdentifier("HSBC"), ] + web_and_mobile_statement_identifier = [ + MetadataIdentifier( + format="PDF 1.7", producer="OpenText Output Transformation Engine" + ) + ] + + pdf_config = PdfConfig( + page_bbox=(0, 0, 379, 840), + ocr_identifiers=web_and_mobile_statement_identifier, + ) + + identifiers = [email_statement_identifier, web_and_mobile_statement_identifier] + statement_configs = [credit_config] diff --git a/src/monopoly/config.py b/src/monopoly/config.py index 51c87aa4..29c3f51c 100644 --- a/src/monopoly/config.py +++ b/src/monopoly/config.py @@ -3,6 +3,7 @@ from monopoly.constants import BankNames, EntryType, InternalBankNames from monopoly.enums import RegexEnum +from monopoly.identifiers import MetadataIdentifier @dataclass @@ -65,10 +66,11 @@ class PdfConfig: - `page_bbox`: A tuple representing the bounding box range for every page. This is used to avoid weirdness like vertical text, and other PDF artifacts that may affect parsing. - - `apply_ocr`: Whether to attempt to apply OCR on the PDF. If the PDF already - has OCR, the original OCR text will be retained. + - `ocr_identifiers`: Applies OCR on PDFs with a specific metadata identifier. """ page_range: tuple[Optional[int], Optional[int]] = (None, None) page_bbox: Optional[tuple[float, float, float, float]] = None - apply_ocr: bool = False + ocr_identifiers: list[Optional[MetadataIdentifier]] = field( + default_factory=list[None] + ) diff --git a/src/monopoly/identifiers.py b/src/monopoly/identifiers.py index d3ffaf87..8262464c 100644 --- a/src/monopoly/identifiers.py +++ b/src/monopoly/identifiers.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, fields @dataclass @@ -22,6 +22,19 @@ class MetadataIdentifier(Identifier): trapped: str = "" encryption: dict = None + def matches(self, other: "MetadataIdentifier") -> bool: + """Check for partial matches on all string fields.""" + for field in fields(self): + self_value = getattr(self, field.name) + other_value = getattr(other, field.name) + + # Perform partial matching if both fields are non-empty strings + if isinstance(self_value, str) and isinstance(other_value, str): + if other_value and other_value not in self_value: + return False + + return True + @dataclass class TextIdentifier(Identifier): diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 796b5b94..36506b0f 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -115,7 +115,11 @@ def raw_text(self) -> str: class PdfParser: - def __init__(self, bank: BankBase, document: PdfDocument): + def __init__( + self, + bank: BankBase, + document: PdfDocument, + ): """ Class responsible for parsing PDFs and returning raw text @@ -124,6 +128,7 @@ def __init__(self, bank: BankBase, document: PdfDocument): """ self.bank = bank self.document = document + self.metadata_identifier = document.metadata_identifier @property def pdf_config(self): @@ -138,8 +143,8 @@ def page_bbox(self): return self.pdf_config.page_bbox @cached_property - def apply_ocr(self): - return self.pdf_config.apply_ocr + def ocr_identifiers(self): + return self.pdf_config.ocr_identifiers @lru_cache def get_pages(self) -> list[PdfPage]: @@ -158,8 +163,9 @@ def get_pages(self) -> list[PdfPage]: page.set_cropbox(cropbox) page = self._remove_vertical_text(page) - if self.apply_ocr: - document = self._apply_ocr(document) + for identifier in self.ocr_identifiers: + if self.metadata_identifier.matches(identifier): + document = self._apply_ocr(document) # certain statements requsire garbage collection, so that duplicate objects # do not cause pdftotext to fail due to missing xrefs/null values diff --git a/tests/conftest.py b/tests/conftest.py index 5334fa06..7cc99d67 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,7 +54,11 @@ class MockBank: @pytest.fixture def parser(mock_bank): - parser = PdfParser(bank=mock_bank, document=None) + class MockDocument: + metadata_identifier = [None] + + document = MockDocument() + parser = PdfParser(bank=mock_bank, document=document) yield parser From b7e46387a87ae12d14d719295bf2e304fa504eaa Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Thu, 5 Sep 2024 00:01:07 +0800 Subject: [PATCH 24/27] chore: linting for ocr changes --- pyproject.toml | 4 +++- src/monopoly/banks/__init__.py | 2 +- src/monopoly/banks/base.py | 4 ++-- src/monopoly/banks/detector.py | 4 +--- src/monopoly/config.py | 4 +--- src/monopoly/identifiers.py | 9 +++------ src/monopoly/pdf.py | 10 ++++++---- .../unit/test_bank_identifier/test_auto_detect_bank.py | 2 +- 8 files changed, 18 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 343e3082..b19ee601 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,7 +87,9 @@ disable_error_code = [ [[tool.mypy.overrides]] module = [ - "fitz", + "pymupdf", + "ocrmypdf", + "ocrmypdf.exceptions", "pdftotext", "pdf2john", ] diff --git a/src/monopoly/banks/__init__.py b/src/monopoly/banks/__init__.py index 662b9448..b4abbbf3 100644 --- a/src/monopoly/banks/__init__.py +++ b/src/monopoly/banks/__init__.py @@ -23,4 +23,4 @@ logger = logging.getLogger(__name__) -__all__ = ["BankDetector", "BankBase", *banks] +__all__ = ["BankDetector", "BankBase", *[bank.__name__ for bank in banks]] diff --git a/src/monopoly/banks/base.py b/src/monopoly/banks/base.py index 4def5022..7120ba28 100644 --- a/src/monopoly/banks/base.py +++ b/src/monopoly/banks/base.py @@ -1,7 +1,7 @@ import logging +from typing import Any from monopoly.config import PdfConfig, StatementConfig -from monopoly.identifiers import Identifier logger = logging.getLogger(__name__) @@ -16,7 +16,7 @@ class BankBase: statement_configs: list[StatementConfig] pdf_config: PdfConfig = PdfConfig() - identifiers: list[Identifier] + identifiers: list[list[Any]] def __init_subclass__(cls, **kwargs) -> None: if not hasattr(cls, "statement_configs"): diff --git a/src/monopoly/banks/detector.py b/src/monopoly/banks/detector.py index e1526b06..3cf4295e 100644 --- a/src/monopoly/banks/detector.py +++ b/src/monopoly/banks/detector.py @@ -30,9 +30,7 @@ def metadata_items(self) -> list[Any]: return identifiers - def detect_bank( - self, banks: list[Type["BankBase"]] = None - ) -> Type["BankBase"] | None: + def detect_bank(self, banks: list[Type["BankBase"]]) -> Type["BankBase"] | None: """ Reads the encryption metadata or actual metadata (if the PDF is not encrypted), and checks for a bank based on unique identifiers. diff --git a/src/monopoly/config.py b/src/monopoly/config.py index 29c3f51c..365ab09e 100644 --- a/src/monopoly/config.py +++ b/src/monopoly/config.py @@ -71,6 +71,4 @@ class PdfConfig: page_range: tuple[Optional[int], Optional[int]] = (None, None) page_bbox: Optional[tuple[float, float, float, float]] = None - ocr_identifiers: list[Optional[MetadataIdentifier]] = field( - default_factory=list[None] - ) + ocr_identifiers: Optional[list[MetadataIdentifier]] = None diff --git a/src/monopoly/identifiers.py b/src/monopoly/identifiers.py index 8262464c..36fe04c4 100644 --- a/src/monopoly/identifiers.py +++ b/src/monopoly/identifiers.py @@ -1,4 +1,6 @@ -from dataclasses import dataclass, fields +from dataclasses import fields + +from pydantic.dataclasses import dataclass @dataclass @@ -16,11 +18,6 @@ class MetadataIdentifier(Identifier): subject: str = "" creator: str = "" producer: str = "" - keywords: str = "" - creationDate: str = "" - modDate: str = "" - trapped: str = "" - encryption: dict = None def matches(self, other: "MetadataIdentifier") -> bool: """Check for partial matches on all string fields.""" diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 36506b0f..e1e2e8d7 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -3,16 +3,18 @@ from functools import cached_property, lru_cache from io import BytesIO from pathlib import Path -from typing import Optional +from typing import TYPE_CHECKING, Optional, Type import pdftotext from pydantic import SecretStr from pydantic_settings import BaseSettings, SettingsConfigDict from pymupdf import TEXTFLAGS_TEXT, Document, Page -from monopoly.banks import BankBase from monopoly.identifiers import MetadataIdentifier +if TYPE_CHECKING: + from monopoly.banks import BankBase + logger = logging.getLogger(__name__) @@ -117,7 +119,7 @@ def raw_text(self) -> str: class PdfParser: def __init__( self, - bank: BankBase, + bank: Type["BankBase"], document: PdfDocument, ): """ @@ -144,7 +146,7 @@ def page_bbox(self): @cached_property def ocr_identifiers(self): - return self.pdf_config.ocr_identifiers + return self.pdf_config.ocr_identifiers or [] @lru_cache def get_pages(self) -> list[PdfPage]: diff --git a/tests/unit/test_bank_identifier/test_auto_detect_bank.py b/tests/unit/test_bank_identifier/test_auto_detect_bank.py index 31c82a0b..98c7fc07 100644 --- a/tests/unit/test_bank_identifier/test_auto_detect_bank.py +++ b/tests/unit/test_bank_identifier/test_auto_detect_bank.py @@ -135,7 +135,7 @@ def test_detect_bank_with_not_matching_text_identifier( mock_banks_list = [MockBankTwo, MockBankWithMultipleTextIdentifier] monkeypatch.setattr("monopoly.banks.banks", mock_banks_list) - assert not metadata_analyzer.detect_bank() + assert not metadata_analyzer.detect_bank(mock_banks_list) @patch.object(PdfDocument, "raw_text", new_callable=PropertyMock) From 88daff35388a2587708f1231032aa07918cba477 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Thu, 5 Sep 2024 21:33:37 +0800 Subject: [PATCH 25/27] refactor(pipeline): move parser instantiation logic to CLI --- src/monopoly/cli.py | 6 ++++-- src/monopoly/examples/single_statement.py | 17 +++++++++++------ src/monopoly/pdf.py | 14 +++++++------- src/monopoly/pipeline.py | 6 ++---- tests/integration/banks/test_banks_credit.py | 6 +++++- tests/integration/banks/test_banks_debit.py | 6 +++++- .../banks/test_banks_generic_credit.py | 9 ++++++--- .../banks/test_banks_generic_debit.py | 6 +++++- tests/integration/test_pipeline.py | 14 +++++++++++--- 9 files changed, 56 insertions(+), 28 deletions(-) diff --git a/src/monopoly/cli.py b/src/monopoly/cli.py index 3b19ce27..72962dfd 100644 --- a/src/monopoly/cli.py +++ b/src/monopoly/cli.py @@ -126,16 +126,18 @@ def process_statement( # pylint: disable=import-outside-toplevel, too-many-locals from monopoly.banks import BankDetector, banks from monopoly.generic import GenericBank - from monopoly.pdf import PdfDocument + from monopoly.pdf import PdfDocument, PdfParser from monopoly.pipeline import Pipeline try: document = PdfDocument(file) analyzer = BankDetector(document) bank = analyzer.detect_bank(banks) or GenericBank + parser = PdfParser(bank, document) + pages = parser.get_pages() pipeline = Pipeline(file, bank=bank) - statement = pipeline.extract(safety_check=safety_check) + statement = pipeline.extract(pages, safety_check=safety_check) transactions = pipeline.transform(statement) if print_df: diff --git a/src/monopoly/examples/single_statement.py b/src/monopoly/examples/single_statement.py index 3e3656bd..a4a77207 100644 --- a/src/monopoly/examples/single_statement.py +++ b/src/monopoly/examples/single_statement.py @@ -1,3 +1,5 @@ +from monopoly.banks import ExampleBank +from monopoly.pdf import PdfParser from monopoly.pipeline import Pipeline @@ -6,28 +8,31 @@ def example(): a single bank statement You can pass in the bank class if you want to specify a specific bank, - or ignore the bank argument and let the Pipeline try to automatically - detect the bank. + or use the BankDetector class to try to detect the bank automatically. """ pipeline = Pipeline( - file_path="src/monopoly/examples/example_statement.pdf", - # bank=ExampleBank + file_path="src/monopoly/examples/example_statement.pdf", bank=ExampleBank ) + parser = PdfParser(pipeline.bank, pipeline.document) + pages = parser.get_pages() # This runs pdftotext on the PDF and # extracts transactions as raw text - statement = pipeline.extract() + statement = pipeline.extract(pages) # Dates are converted into an ISO 8601 date format transactions = pipeline.transform(statement) # Parsed transactions writen to a CSV file in the "example" directory - pipeline.load( + file_path = pipeline.load( transactions=transactions, statement=statement, output_directory="src/monopoly/examples", ) + with open(file_path) as file: + print(file.read()[0:248]) + if __name__ == "__main__": example() diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index e1e2e8d7..297aebd0 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -145,8 +145,12 @@ def page_bbox(self): return self.pdf_config.page_bbox @cached_property - def ocr_identifiers(self): - return self.pdf_config.ocr_identifiers or [] + def ocr_available(self): + if ids := self.pdf_config.ocr_identifiers: + for identifiers in ids: + if self.metadata_identifier.matches(identifiers): + return True + return False @lru_cache def get_pages(self) -> list[PdfPage]: @@ -165,10 +169,6 @@ def get_pages(self) -> list[PdfPage]: page.set_cropbox(cropbox) page = self._remove_vertical_text(page) - for identifier in self.ocr_identifiers: - if self.metadata_identifier.matches(identifier): - document = self._apply_ocr(document) - # certain statements requsire garbage collection, so that duplicate objects # do not cause pdftotext to fail due to missing xrefs/null values # however, setting `garbage=2` may cause issues with other statements @@ -217,7 +217,7 @@ def _remove_vertical_text(page: Page): return page @staticmethod - def _apply_ocr(document: PdfDocument) -> PdfDocument: + def apply_ocr(document: PdfDocument) -> PdfDocument: # pylint: disable=import-outside-toplevel try: from ocrmypdf import Verbosity, configure_logging, ocr diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index 9eec4731..38c0ce01 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -10,7 +10,7 @@ from monopoly.config import DateOrder from monopoly.generic import GenericBank, GenericStatementHandler from monopoly.handler import StatementHandler -from monopoly.pdf import PdfDocument, PdfPage, PdfParser +from monopoly.pdf import PdfDocument, PdfPage from monopoly.statements import BaseStatement, Transaction from monopoly.write import generate_name @@ -50,11 +50,9 @@ def create_handler(bank: Type[BankBase], pages: list[PdfPage]) -> StatementHandl logger.debug("Using statement handler with bank: %s", bank.__name__) return StatementHandler(bank, pages) - def extract(self, safety_check=True) -> BaseStatement: + def extract(self, pages: list[PdfPage], safety_check=True) -> BaseStatement: """Extracts transactions from the statement, and performs a safety check to make sure that total transactions add up""" - parser = PdfParser(self.bank, self.document) - pages = parser.get_pages() handler = self.create_handler(self.bank, pages) statement = handler.get_statement() transactions = statement.get_transactions() diff --git a/tests/integration/banks/test_banks_credit.py b/tests/integration/banks/test_banks_credit.py index 79b9c89d..1aca49c1 100644 --- a/tests/integration/banks/test_banks_credit.py +++ b/tests/integration/banks/test_banks_credit.py @@ -7,6 +7,7 @@ from monopoly.banks import Citibank, Dbs, Hsbc, Maybank, Ocbc, StandardChartered from monopoly.banks.base import BankBase +from monopoly.pdf import PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import CreditStatement @@ -33,7 +34,10 @@ def test_bank_credit_statements( bank_name = bank.credit_config.bank_name test_directory = Path(__file__).parent / bank_name / "credit" pipeline = Pipeline(test_directory / "input.pdf", bank=bank) - statement: CreditStatement = pipeline.extract() + + parser = PdfParser(bank, pipeline.document) + pages = parser.get_pages() + statement: CreditStatement = pipeline.extract(pages) # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/banks/test_banks_debit.py b/tests/integration/banks/test_banks_debit.py index 012c6d0e..3f2391ba 100644 --- a/tests/integration/banks/test_banks_debit.py +++ b/tests/integration/banks/test_banks_debit.py @@ -6,6 +6,7 @@ from test_utils.transactions import get_transactions_as_dict, read_transactions_from_csv from monopoly.banks import BankBase, Dbs, Maybank, Ocbc +from monopoly.pdf import PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import DebitStatement @@ -30,7 +31,10 @@ def test_bank_debit_statements( bank_name = bank.debit_config.bank_name test_directory = Path(__file__).parent / bank_name / "debit" pipeline = Pipeline(test_directory / "input.pdf", bank=bank) - statement: DebitStatement = pipeline.extract() + + parser = PdfParser(bank, pipeline.document) + pages = parser.get_pages() + statement: DebitStatement = pipeline.extract(pages) # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/banks/test_banks_generic_credit.py b/tests/integration/banks/test_banks_generic_credit.py index ab2b1e75..bc028051 100644 --- a/tests/integration/banks/test_banks_generic_credit.py +++ b/tests/integration/banks/test_banks_generic_credit.py @@ -5,9 +5,9 @@ from test_utils.skip import skip_if_encrypted from test_utils.transactions import get_transactions_as_dict, read_transactions_from_csv -from monopoly.banks import Citibank, Dbs, Maybank, Ocbc, StandardChartered -from monopoly.banks.base import BankBase +from monopoly.banks import BankBase, Citibank, Dbs, Maybank, Ocbc, StandardChartered from monopoly.constants import Columns +from monopoly.pdf import PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import CreditStatement @@ -33,7 +33,10 @@ def test_bank_credit_statements( bank_name = bank.credit_config.bank_name test_directory = Path(__file__).parent / bank_name / "credit" pipeline = Pipeline(test_directory / "input.pdf") - statement: CreditStatement = pipeline.extract() + + parser = PdfParser(bank, pipeline.document) + pages = parser.get_pages() + statement: CreditStatement = pipeline.extract(pages) # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/banks/test_banks_generic_debit.py b/tests/integration/banks/test_banks_generic_debit.py index 80ab2059..82a2ffe1 100644 --- a/tests/integration/banks/test_banks_generic_debit.py +++ b/tests/integration/banks/test_banks_generic_debit.py @@ -6,6 +6,7 @@ from test_utils.transactions import get_transactions_as_dict, read_transactions_from_csv from monopoly.banks import BankBase, Dbs, Maybank, Ocbc +from monopoly.pdf import PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import DebitStatement @@ -36,7 +37,10 @@ def test_bank_debit_statements( bank_name = bank.debit_config.bank_name test_directory = Path(__file__).parent / bank_name / "debit" pipeline = Pipeline(test_directory / "input.pdf") - statement: DebitStatement = pipeline.extract() + + parser = PdfParser(bank, pipeline.document) + pages = parser.get_pages() + statement: DebitStatement = pipeline.extract(pages) # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index 321315bf..42fcb732 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -3,6 +3,7 @@ import pytest from monopoly.banks import Dbs, ExampleBank +from monopoly.pdf import PdfParser from monopoly.pipeline import Pipeline @@ -24,7 +25,9 @@ def test_pipeline_initialization_with_bytes(pdf_file_bytes): def test_pipeline_with_bank(): file_path = Path("src/monopoly/examples/example_statement.pdf") pipeline = Pipeline(file_path=file_path, bank=ExampleBank) - transactions = pipeline.extract().transactions + parser = PdfParser(pipeline.bank, pipeline.document) + pages = parser.get_pages() + transactions = pipeline.extract(pages).transactions assert len(transactions) == 53 assert transactions[0].description == "LAST MONTH'S BALANCE" @@ -32,8 +35,11 @@ def test_pipeline_with_bank(): def test_pipeline_with_bad_bank(): file_path = Path("src/monopoly/examples/example_statement.pdf") pipeline = Pipeline(file_path=file_path, bank=Dbs) + parser = PdfParser(pipeline.bank, pipeline.document) + pages = parser.get_pages() + with pytest.raises(ValueError, match="No transactions found"): - pipeline.extract() + pipeline.extract(pages) def test_pipeline_initialization_with_file_path(): @@ -62,6 +68,8 @@ def test_pipeline_initialization_with_neither_raises_error(): def test_pipeline_bytes_etl(pdf_file_bytes): pipeline = Pipeline(file_bytes=pdf_file_bytes, bank=ExampleBank) - statement = pipeline.extract() + parser = PdfParser(pipeline.bank, pipeline.document) + pages = parser.get_pages() + statement = pipeline.extract(pages) transactions = pipeline.transform(statement) assert len(transactions) == 53 From 502b0aabf31283f04e364539024774af445f213f Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Thu, 5 Sep 2024 22:39:58 +0800 Subject: [PATCH 26/27] refactor(pipeline): allow custom document to be passed --- src/monopoly/cli.py | 5 +- src/monopoly/examples/single_statement.py | 14 +++--- src/monopoly/pdf.py | 8 +++ src/monopoly/pipeline.py | 26 +++------- tests/conftest.py | 2 +- tests/integration/banks/test_banks_credit.py | 10 ++-- tests/integration/banks/test_banks_debit.py | 10 ++-- .../banks/test_banks_generic_credit.py | 10 ++-- .../banks/test_banks_generic_debit.py | 10 ++-- tests/integration/test_pdf_document.py | 15 ++++++ tests/integration/test_pipeline.py | 50 +++++++------------ 11 files changed, 76 insertions(+), 84 deletions(-) diff --git a/src/monopoly/cli.py b/src/monopoly/cli.py index 72962dfd..f1d065fc 100644 --- a/src/monopoly/cli.py +++ b/src/monopoly/cli.py @@ -134,10 +134,9 @@ def process_statement( analyzer = BankDetector(document) bank = analyzer.detect_bank(banks) or GenericBank parser = PdfParser(bank, document) - pages = parser.get_pages() + pipeline = Pipeline(parser) - pipeline = Pipeline(file, bank=bank) - statement = pipeline.extract(pages, safety_check=safety_check) + statement = pipeline.extract(safety_check=safety_check) transactions = pipeline.transform(statement) if print_df: diff --git a/src/monopoly/examples/single_statement.py b/src/monopoly/examples/single_statement.py index a4a77207..f2d3bd9e 100644 --- a/src/monopoly/examples/single_statement.py +++ b/src/monopoly/examples/single_statement.py @@ -1,5 +1,5 @@ from monopoly.banks import ExampleBank -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfDocument, PdfParser from monopoly.pipeline import Pipeline @@ -10,15 +10,13 @@ def example(): You can pass in the bank class if you want to specify a specific bank, or use the BankDetector class to try to detect the bank automatically. """ - pipeline = Pipeline( - file_path="src/monopoly/examples/example_statement.pdf", bank=ExampleBank - ) - parser = PdfParser(pipeline.bank, pipeline.document) - pages = parser.get_pages() + document = PdfDocument(file_path="src/monopoly/examples/example_statement.pdf") + parser = PdfParser(ExampleBank, document) + pipeline = Pipeline(parser) # This runs pdftotext on the PDF and # extracts transactions as raw text - statement = pipeline.extract(pages) + statement = pipeline.extract() # Dates are converted into an ISO 8601 date format transactions = pipeline.transform(statement) @@ -30,7 +28,7 @@ def example(): output_directory="src/monopoly/examples", ) - with open(file_path) as file: + with open(file_path, encoding="utf8") as file: print(file.read()[0:248]) diff --git a/src/monopoly/pdf.py b/src/monopoly/pdf.py index 297aebd0..3bd0278b 100644 --- a/src/monopoly/pdf.py +++ b/src/monopoly/pdf.py @@ -75,6 +75,14 @@ def __init__( self.file_bytes = file_bytes self.passwords = passwords or PdfPasswords().pdf_passwords + if not any([self.file_path, self.file_bytes]): + raise RuntimeError("Either `file_path` or `file_bytes` must be passed") + + if self.file_path and self.file_bytes: + raise RuntimeError( + "Only one of `file_path` or `file_bytes` should be passed" + ) + args = {"filename": self.file_path, "stream": self.file_bytes} super().__init__(**args) diff --git a/src/monopoly/pipeline.py b/src/monopoly/pipeline.py index 38c0ce01..4658e068 100644 --- a/src/monopoly/pipeline.py +++ b/src/monopoly/pipeline.py @@ -10,7 +10,7 @@ from monopoly.config import DateOrder from monopoly.generic import GenericBank, GenericStatementHandler from monopoly.handler import StatementHandler -from monopoly.pdf import PdfDocument, PdfPage +from monopoly.pdf import PdfPage, PdfParser from monopoly.statements import BaseStatement, Transaction from monopoly.write import generate_name @@ -22,25 +22,12 @@ class Pipeline: def __init__( self, - file_path: Optional[Path] = None, - file_bytes: Optional[bytes] = None, + parser: PdfParser, passwords: Optional[list[SecretStr]] = None, - bank: Type[BankBase] = GenericBank, ): - self.file_path = file_path - self.file_bytes = file_bytes self.passwords = passwords - self.bank = bank - - if not any([self.file_path, self.file_bytes]): - raise RuntimeError("Either `file_path` or `file_bytes` must be passed") - - if self.file_path and self.file_bytes: - raise RuntimeError( - "Only one of `file_path` or `file_bytes` should be passed" - ) - - self.document = PdfDocument(file_path, file_bytes, passwords) + pages = parser.get_pages() + self.handler = self.create_handler(parser.bank, pages) @staticmethod def create_handler(bank: Type[BankBase], pages: list[PdfPage]) -> StatementHandler: @@ -50,11 +37,10 @@ def create_handler(bank: Type[BankBase], pages: list[PdfPage]) -> StatementHandl logger.debug("Using statement handler with bank: %s", bank.__name__) return StatementHandler(bank, pages) - def extract(self, pages: list[PdfPage], safety_check=True) -> BaseStatement: + def extract(self, safety_check=True) -> BaseStatement: """Extracts transactions from the statement, and performs a safety check to make sure that total transactions add up""" - handler = self.create_handler(self.bank, pages) - statement = handler.get_statement() + statement = self.handler.get_statement() transactions = statement.get_transactions() if not transactions: diff --git a/tests/conftest.py b/tests/conftest.py index 7cc99d67..71f9cf20 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,7 +21,7 @@ def mock_env(): @pytest.fixture def pdf_document(): - yield PdfDocument() + yield PdfDocument(file_path="src/monopoly/examples/example_statement.pdf") @pytest.fixture diff --git a/tests/integration/banks/test_banks_credit.py b/tests/integration/banks/test_banks_credit.py index 1aca49c1..46796f2e 100644 --- a/tests/integration/banks/test_banks_credit.py +++ b/tests/integration/banks/test_banks_credit.py @@ -7,7 +7,7 @@ from monopoly.banks import Citibank, Dbs, Hsbc, Maybank, Ocbc, StandardChartered from monopoly.banks.base import BankBase -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfDocument, PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import CreditStatement @@ -33,11 +33,11 @@ def test_bank_credit_statements( ): bank_name = bank.credit_config.bank_name test_directory = Path(__file__).parent / bank_name / "credit" - pipeline = Pipeline(test_directory / "input.pdf", bank=bank) - parser = PdfParser(bank, pipeline.document) - pages = parser.get_pages() - statement: CreditStatement = pipeline.extract(pages) + document = PdfDocument(test_directory / "input.pdf") + parser = PdfParser(bank, document) + pipeline = Pipeline(parser) + statement: CreditStatement = pipeline.extract() # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/banks/test_banks_debit.py b/tests/integration/banks/test_banks_debit.py index 3f2391ba..bd46a05e 100644 --- a/tests/integration/banks/test_banks_debit.py +++ b/tests/integration/banks/test_banks_debit.py @@ -6,7 +6,7 @@ from test_utils.transactions import get_transactions_as_dict, read_transactions_from_csv from monopoly.banks import BankBase, Dbs, Maybank, Ocbc -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfDocument, PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import DebitStatement @@ -30,11 +30,11 @@ def test_bank_debit_statements( ): bank_name = bank.debit_config.bank_name test_directory = Path(__file__).parent / bank_name / "debit" - pipeline = Pipeline(test_directory / "input.pdf", bank=bank) - parser = PdfParser(bank, pipeline.document) - pages = parser.get_pages() - statement: DebitStatement = pipeline.extract(pages) + document = PdfDocument(test_directory / "input.pdf") + parser = PdfParser(bank, document) + pipeline = Pipeline(parser) + statement: DebitStatement = pipeline.extract() # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/banks/test_banks_generic_credit.py b/tests/integration/banks/test_banks_generic_credit.py index bc028051..346ff06b 100644 --- a/tests/integration/banks/test_banks_generic_credit.py +++ b/tests/integration/banks/test_banks_generic_credit.py @@ -7,7 +7,7 @@ from monopoly.banks import BankBase, Citibank, Dbs, Maybank, Ocbc, StandardChartered from monopoly.constants import Columns -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfDocument, PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import CreditStatement @@ -32,11 +32,11 @@ def test_bank_credit_statements( ): bank_name = bank.credit_config.bank_name test_directory = Path(__file__).parent / bank_name / "credit" - pipeline = Pipeline(test_directory / "input.pdf") - parser = PdfParser(bank, pipeline.document) - pages = parser.get_pages() - statement: CreditStatement = pipeline.extract(pages) + document = PdfDocument(test_directory / "input.pdf") + parser = PdfParser(bank, document) + pipeline = Pipeline(parser) + statement: CreditStatement = pipeline.extract() # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/banks/test_banks_generic_debit.py b/tests/integration/banks/test_banks_generic_debit.py index 82a2ffe1..63015da0 100644 --- a/tests/integration/banks/test_banks_generic_debit.py +++ b/tests/integration/banks/test_banks_generic_debit.py @@ -6,7 +6,7 @@ from test_utils.transactions import get_transactions_as_dict, read_transactions_from_csv from monopoly.banks import BankBase, Dbs, Maybank, Ocbc -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfDocument, PdfParser from monopoly.pipeline import Pipeline from monopoly.statements import DebitStatement @@ -36,11 +36,11 @@ def test_bank_debit_statements( ): bank_name = bank.debit_config.bank_name test_directory = Path(__file__).parent / bank_name / "debit" - pipeline = Pipeline(test_directory / "input.pdf") - parser = PdfParser(bank, pipeline.document) - pages = parser.get_pages() - statement: DebitStatement = pipeline.extract(pages) + document = PdfDocument(test_directory / "input.pdf") + parser = PdfParser(bank, document) + pipeline = Pipeline(parser) + statement: DebitStatement = pipeline.extract() # check raw data expected_raw_transactions = read_transactions_from_csv(test_directory, "raw.csv") diff --git a/tests/integration/test_pdf_document.py b/tests/integration/test_pdf_document.py index 40b30bec..67e51ece 100644 --- a/tests/integration/test_pdf_document.py +++ b/tests/integration/test_pdf_document.py @@ -14,6 +14,21 @@ fixture_directory = Path(__file__).parent / "fixtures" +def test_document_initialization_with_both_raises_error(): + file_path = Path("src/monopoly/examples/example_statement.pdf") + with raises( + RuntimeError, match="Only one of `file_path` or `file_bytes` should be passed" + ): + PdfDocument(file_path=file_path, file_bytes=b"123") + + +def test_document_initialization_with_neither_raises_error(): + with raises( + RuntimeError, match="Either `file_path` or `file_bytes` must be passed" + ): + PdfDocument() + + def test_can_open_file_stream(): with open(fixture_directory / "4_pages_blank.pdf", "rb") as file: pdf_document = PdfDocument(file_bytes=file.read()) diff --git a/tests/integration/test_pipeline.py b/tests/integration/test_pipeline.py index 42fcb732..34821f9c 100644 --- a/tests/integration/test_pipeline.py +++ b/tests/integration/test_pipeline.py @@ -3,7 +3,7 @@ import pytest from monopoly.banks import Dbs, ExampleBank -from monopoly.pdf import PdfParser +from monopoly.pdf import PdfDocument, PdfParser from monopoly.pipeline import Pipeline @@ -14,18 +14,11 @@ def pdf_file_bytes(): yield f.read() -def test_pipeline_initialization_with_bytes(pdf_file_bytes): - try: - pipeline = Pipeline(file_bytes=pdf_file_bytes) - assert pipeline is not None - except RuntimeError as e: - pytest.fail(f"Pipeline initialization failed with RuntimeError: {e}") - - def test_pipeline_with_bank(): file_path = Path("src/monopoly/examples/example_statement.pdf") - pipeline = Pipeline(file_path=file_path, bank=ExampleBank) - parser = PdfParser(pipeline.bank, pipeline.document) + document = PdfDocument(file_path) + parser = PdfParser(ExampleBank, document) + pipeline = Pipeline(parser) pages = parser.get_pages() transactions = pipeline.extract(pages).transactions assert len(transactions) == 53 @@ -34,8 +27,9 @@ def test_pipeline_with_bank(): def test_pipeline_with_bad_bank(): file_path = Path("src/monopoly/examples/example_statement.pdf") - pipeline = Pipeline(file_path=file_path, bank=Dbs) - parser = PdfParser(pipeline.bank, pipeline.document) + document = PdfDocument(file_path) + parser = PdfParser(Dbs, document) + pipeline = Pipeline(parser) pages = parser.get_pages() with pytest.raises(ValueError, match="No transactions found"): @@ -45,30 +39,22 @@ def test_pipeline_with_bad_bank(): def test_pipeline_initialization_with_file_path(): file_path = Path("src/monopoly/examples/example_statement.pdf") try: - pipeline = Pipeline(file_path=file_path) - assert pipeline is not None + document = PdfDocument(file_path) + parser = PdfParser(ExampleBank, document) + pipeline = Pipeline(parser) + pages = parser.get_pages() + statement = pipeline.extract(pages) + transactions = pipeline.transform(statement) + assert len(transactions) == 53 except RuntimeError as e: pytest.fail(f"Pipeline initialization failed with RuntimeError: {e}") -def test_pipeline_initialization_with_both_raises_error(pdf_file_bytes): - file_path = Path("src/monopoly/examples/example_statement.pdf") - with pytest.raises( - RuntimeError, match="Only one of `file_path` or `file_bytes` should be passed" - ): - Pipeline(file_path=file_path, file_bytes=pdf_file_bytes) - - -def test_pipeline_initialization_with_neither_raises_error(): - with pytest.raises( - RuntimeError, match="Either `file_path` or `file_bytes` must be passed" - ): - Pipeline() - - def test_pipeline_bytes_etl(pdf_file_bytes): - pipeline = Pipeline(file_bytes=pdf_file_bytes, bank=ExampleBank) - parser = PdfParser(pipeline.bank, pipeline.document) + document = PdfDocument(file_bytes=pdf_file_bytes) + parser = PdfParser(ExampleBank, document) + pipeline = Pipeline(parser) + pages = parser.get_pages() statement = pipeline.extract(pages) transactions = pipeline.transform(statement) From dd6ee2639f277c306f1d1a8f212885fab1fff96e Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Thu, 5 Sep 2024 23:04:49 +0800 Subject: [PATCH 27/27] docs(README): add note about OCR feature --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 496ea002..941da49f 100644 --- a/README.md +++ b/README.md @@ -72,7 +72,7 @@ python3 src/monopoly/examples/single_statement.py ## Features - Parses PDFs using predefined configuration classes per bank. - Handles locked PDFs with credentials passed via environment variables. -- Supports a variety of date/number formats and determines if a transaction is debit or credit. +- Supports adding OCR for image-based bank statements. - Provides a generic parser that can be used without any predefined configuration (caveat emptor). - Includes a safety check (enabled by default) that validates totals for debit or credit statements.