diff --git a/monopoly/main.py b/monopoly/main.py index cd230435..4d19196f 100644 --- a/monopoly/main.py +++ b/monopoly/main.py @@ -38,8 +38,7 @@ def process_bank_statement(message: Message, banks: dict): with message.save(attachment) as file: processor: StatementProcessor = bank_class(file_path=file) - pages = processor.get_pages() - statement = processor.extract(pages) + statement = processor.extract() transformed_df = processor.transform(statement) processor.load(transformed_df, statement, upload_to_cloud=True) diff --git a/monopoly/processor.py b/monopoly/processor.py index 419c31f3..7f5acd2f 100644 --- a/monopoly/processor.py +++ b/monopoly/processor.py @@ -6,7 +6,7 @@ from monopoly.config import PdfConfig, StatementConfig, settings from monopoly.constants import StatementFields -from monopoly.pdf import PdfPage, PdfParser +from monopoly.pdf import PdfParser from monopoly.statement import Statement from monopoly.storage import upload_to_cloud_storage, write_to_csv @@ -24,7 +24,9 @@ def __init__( super().__init__(file_path=self.file_path, config=pdf_config) - def extract(self, pages: list[PdfPage]) -> Statement: + def extract(self) -> Statement: + parser = PdfParser(self.file_path, self.pdf_config) + pages = parser.get_pages() statement = Statement(pages, self.statement_config) if not statement.transactions: diff --git a/tests/integration/banks/citibank/test_citibank_extract.py b/tests/integration/banks/citibank/test_citibank_extract.py index 657f4802..360af79c 100644 --- a/tests/integration/banks/citibank/test_citibank_extract.py +++ b/tests/integration/banks/citibank/test_citibank_extract.py @@ -6,8 +6,7 @@ def test_citibank_extract_unprotected_pdf(citibank: Citibank): - pages = citibank.get_pages() - raw_df = citibank.extract(pages).df + raw_df = citibank.extract().df expected_df = pd.read_csv("tests/integration/fixtures/citibank/expected.csv") assert_frame_equal(raw_df, expected_df) diff --git a/tests/integration/banks/hsbc/test_hsbc_extract.py b/tests/integration/banks/hsbc/test_hsbc_extract.py index 8a461266..25e0d29a 100644 --- a/tests/integration/banks/hsbc/test_hsbc_extract.py +++ b/tests/integration/banks/hsbc/test_hsbc_extract.py @@ -6,8 +6,7 @@ def test_hsbc_extract_unprotected_pdf(hsbc: Hsbc): - pages = hsbc.get_pages() - raw_df = hsbc.extract(pages).df + raw_df = hsbc.extract().df expected_df = pd.read_csv("tests/integration/fixtures/hsbc/expected.csv") assert_frame_equal(raw_df, expected_df) diff --git a/tests/integration/banks/ocbc/test_ocbc_extract.py b/tests/integration/banks/ocbc/test_ocbc_extract.py index bbfe3c07..acf3be83 100644 --- a/tests/integration/banks/ocbc/test_ocbc_extract.py +++ b/tests/integration/banks/ocbc/test_ocbc_extract.py @@ -6,8 +6,7 @@ def test_ocbc_extract_unprotected_pdf(ocbc: Ocbc): - pages = ocbc.get_pages() - raw_df = ocbc.extract(pages).df + raw_df = ocbc.extract().df expected_df = pd.read_csv("tests/integration/fixtures/ocbc/expected.csv")