Skip to content

Commit

Permalink
chore(base): add boundary check for transactions
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Sep 8, 2024
1 parent bc3fc8f commit 8625a27
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/monopoly/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,16 @@ class StatementConfig:
- `transaction_date_order` represents the datetime format that a specific bank uses
for transactions. For example, "DMY" will parse 01/02/2024 as 1 Feb 2024.
Defaults to DMY.
- `statement_date_format` represents the datetime format that a specific bank uses
to represent a statement date.
- `multiline_transactions` controls whether Monopoly tries to concatenate
transactions that are split across two lines
- `header_pattern` is a regex pattern that is used to find the 'header' line
of a statement, and determine if it is a debit or credit card statement.
- `transaction_bound` will cause transactions that have an amount past a certain
number of spaces will be ignored. For example, if `transaction_bound` = 5:
"01 NOV BALANCE B/F 190.77" (will be ignored)
"01 NOV YA KUN KAYA TOAST 12.00" (will be kept)
"""

bank_name: BankNames | InternalBankNames
Expand All @@ -51,6 +57,7 @@ class StatementConfig:
transaction_date_order: DateOrder = field(default_factory=lambda: DateOrder("DMY"))
statement_date_order: DateOrder = field(default_factory=lambda: DateOrder("DMY"))
multiline_transactions: bool = False
transaction_bound: Optional[int] = None
has_withdraw_deposit_column: bool = False
prev_balance_pattern: Optional[Pattern[str] | RegexEnum] = None

Expand Down
10 changes: 10 additions & 0 deletions src/monopoly/statements/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ def get_transactions(self) -> list[Transaction] | None:
for page_num, page in enumerate(self.pages):
for line_num, line in enumerate(page.lines):
if match := self.pattern.search(line):
if self._check_bound(match):
continue

groupdict = TransactionGroupDict(**match.groupdict())
transaction_match = TransactionMatch(
groupdict, match, page_number=page_num
Expand All @@ -91,6 +94,13 @@ def get_transactions(self) -> list[Transaction] | None:
post_processed_transactions = self.post_process_transactions(transactions)
return post_processed_transactions

def _check_bound(self, match: re.Match):
if bound := self.config.transaction_bound:
if match.span(Columns.AMOUNT)[0] >= bound:
logger.debug("Transaction exists beyond boundary, ignoring")
return True
return False

def pre_process_match(
self, transaction_match: TransactionMatch
) -> TransactionMatch:
Expand Down
21 changes: 21 additions & 0 deletions tests/unit/test_statement_process_line.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,27 @@ def test_get_transactions(statement: BaseStatement):
assert transactions == expected


def test_check_bound(statement: BaseStatement):
pattern = Ocbc.credit_config.transaction_pattern
statement.config.transaction_pattern = pattern
statement.config.transaction_bound = 30

statement.pages = [
PdfPage("19/06 YA KUN KAYA TOAST 3.20\n20/06 FAIRPRICE FINEST 9.90")
]
transactions = statement.get_transactions()
expected = [
Transaction(
transaction_date="19/06",
description="YA KUN KAYA TOAST",
amount=-3.2,
suffix=None,
),
]
statement.config.transaction_bound = None
assert transactions == expected


def test_get_multiline_transactions(statement: BaseStatement):
pattern = Hsbc.credit_config.transaction_pattern
statement.config.multiline_transactions = True
Expand Down

0 comments on commit 8625a27

Please sign in to comment.