Skip to content

Commit

Permalink
refactor(banks): revert to having extract() handle pages
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Oct 21, 2023
1 parent 444437f commit 5460c83
Show file tree
Hide file tree
Showing 8 changed files with 12 additions and 14 deletions.
2 changes: 2 additions & 0 deletions monopoly/examples/example_bank.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,5 @@ class MonopolyBank(BankBase):
statement_date_pattern=r"\d{2}\-\d{2}\-\d{4}",
statement_date_format=r"%d-%m-%Y",
)

pdf_config = None
3 changes: 1 addition & 2 deletions monopoly/examples/multiple_statements.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ def ocbc_example():
bank = Ocbc(
file_path=file_path,
)
pages = bank.get_pages()
statement = bank.extract(pages)
statement = bank.extract()
transformed_df = bank.transform(statement)
bank.load(transformed_df, statement)

Expand Down
3 changes: 1 addition & 2 deletions monopoly/examples/single_statement.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@ def example():

# This runs Tesseract on the PDF and
# extracts transactions as raw text
pages = bank.get_pages()
statement = bank.extract(pages)
statement = bank.extract()

# Dates are converted into an ISO 8601 date format
transformed_df = bank.transform(statement)
Expand Down
3 changes: 1 addition & 2 deletions monopoly/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ def process_bank_statement(message: Message, banks: dict):

with message.save(attachment) as file:
processor: StatementProcessor = bank_class(file_path=file)
pages = processor.get_pages()
statement = processor.extract(pages)
statement = processor.extract()
transformed_df = processor.transform(statement)
processor.load(transformed_df, statement, upload_to_cloud=True)

Expand Down
6 changes: 4 additions & 2 deletions monopoly/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from monopoly.config import PdfConfig, StatementConfig, settings
from monopoly.constants import StatementFields
from monopoly.pdf import PdfPage, PdfParser
from monopoly.pdf import PdfParser
from monopoly.statement import Statement
from monopoly.storage import upload_to_cloud_storage, write_to_csv

Expand All @@ -24,7 +24,9 @@ def __init__(

super().__init__(file_path=self.file_path, config=pdf_config)

def extract(self, pages: list[PdfPage]) -> Statement:
def extract(self) -> Statement:
parser = PdfParser(self.file_path, self.pdf_config)
pages = parser.get_pages()
statement = Statement(pages, self.statement_config)

if not statement.transactions:
Expand Down
3 changes: 1 addition & 2 deletions tests/integration/banks/citibank/test_citibank_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@


def test_citibank_extract_unprotected_pdf(citibank: Citibank):
pages = citibank.get_pages()
raw_df = citibank.extract(pages).df
raw_df = citibank.extract().df
expected_df = pd.read_csv("tests/integration/fixtures/citibank/expected.csv")

assert_frame_equal(raw_df, expected_df)
Expand Down
3 changes: 1 addition & 2 deletions tests/integration/banks/hsbc/test_hsbc_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@


def test_hsbc_extract_unprotected_pdf(hsbc: Hsbc):
pages = hsbc.get_pages()
raw_df = hsbc.extract(pages).df
raw_df = hsbc.extract().df
expected_df = pd.read_csv("tests/integration/fixtures/hsbc/expected.csv")

assert_frame_equal(raw_df, expected_df)
Expand Down
3 changes: 1 addition & 2 deletions tests/integration/banks/ocbc/test_ocbc_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@


def test_ocbc_extract_unprotected_pdf(ocbc: Ocbc):
pages = ocbc.get_pages()
raw_df = ocbc.extract(pages).df
raw_df = ocbc.extract().df

expected_df = pd.read_csv("tests/integration/fixtures/ocbc/expected.csv")

Expand Down

0 comments on commit 5460c83

Please sign in to comment.