From 62ba9f2207c2682cf8d139140b2519bd3d0ee9a1 Mon Sep 17 00:00:00 2001 From: Benjamin Dornel Date: Wed, 4 Sep 2024 00:03:25 +0800 Subject: [PATCH] refactor: remove unnecessary usage of pydantic dataclasses --- src/monopoly/cli.py | 4 ++-- src/monopoly/config.py | 4 +--- src/monopoly/generic/generic.py | 14 +++++++------- src/monopoly/identifiers.py | 7 ++++++- src/monopoly/statements/base.py | 5 ++++- tests/unit/generic/test_date_pattern_analyzer.py | 4 ++-- 6 files changed, 22 insertions(+), 16 deletions(-) diff --git a/src/monopoly/cli.py b/src/monopoly/cli.py index c58f7d17..3b19ce27 100644 --- a/src/monopoly/cli.py +++ b/src/monopoly/cli.py @@ -1,10 +1,10 @@ import traceback from concurrent.futures import ProcessPoolExecutor +from dataclasses import dataclass, field from pathlib import Path from typing import Collection, Iterable, Optional, TypedDict import click -from pydantic.dataclasses import Field, dataclass from tabulate import tabulate from tqdm import tqdm @@ -41,7 +41,7 @@ class Result: source_file_name: str target_file_name: Optional[str] = None - error_info: dict[str, str] = Field(default_factory=dict) + error_info: dict[str, str] = field(default_factory=dict) @dataclass diff --git a/src/monopoly/config.py b/src/monopoly/config.py index bfdc6a2a..51c87aa4 100644 --- a/src/monopoly/config.py +++ b/src/monopoly/config.py @@ -1,8 +1,6 @@ -from dataclasses import field +from dataclasses import dataclass, field from typing import Optional, Pattern -from pydantic.dataclasses import dataclass - from monopoly.constants import BankNames, EntryType, InternalBankNames from monopoly.enums import RegexEnum diff --git a/src/monopoly/generic/generic.py b/src/monopoly/generic/generic.py index 96db76d1..6a8db46f 100644 --- a/src/monopoly/generic/generic.py +++ b/src/monopoly/generic/generic.py @@ -70,7 +70,7 @@ def is_transaction_date_first(self) -> bool: return True @lru_cache - def create_transaction_pattern(self) -> str: + def create_transaction_pattern(self) -> re.Pattern: """ Create a regex pattern that will be used for date parsing by the generic statement handler. @@ -100,16 +100,16 @@ def create_transaction_pattern(self) -> str: if self.get_statement_type() == EntryType.CREDIT: pattern += SharedPatterns.AMOUNT_EXTENDED - return pattern + return re.compile(pattern, re.IGNORECASE) @lru_cache - def create_statement_date_pattern(self) -> str: + def create_statement_date_pattern(self) -> re.Pattern: """ Creates a regex pattern for the statement date based on the first statement date. """ statement_date = self.matcher.get_statement_date_pattern() - return f"({statement_date})" + return re.compile(f"({statement_date})") @lru_cache def get_statement_type(self) -> str: @@ -186,7 +186,7 @@ def check_if_multiline(self) -> bool: return average_line_distance > 2 @lru_cache - def create_previous_balance_regex(self) -> str | None: + def create_previous_balance_regex(self) -> re.Pattern | None: """Helper function to check for a previous balance line items. Makes the assumption that the previous balance line item, if it exists, will be the line before the first line item with a date. @@ -218,13 +218,13 @@ def create_previous_balance_regex(self) -> str | None: + SharedPatterns.AMOUNT ) logger.debug("Found words, generated pattern %s", pattern) - return pattern + return re.compile(pattern) return None @lru_cache def get_first_transaction_location(self): # uses the transaction pattern to find the first transaction - pattern = re.compile(self.create_transaction_pattern(), re.IGNORECASE) + pattern = self.create_transaction_pattern() for page_num, page in enumerate(self.pages): for line_num, line in enumerate(page.lines): diff --git a/src/monopoly/identifiers.py b/src/monopoly/identifiers.py index 258ea758..d3ffaf87 100644 --- a/src/monopoly/identifiers.py +++ b/src/monopoly/identifiers.py @@ -1,4 +1,4 @@ -from pydantic.dataclasses import dataclass +from dataclasses import dataclass @dataclass @@ -16,6 +16,11 @@ class MetadataIdentifier(Identifier): subject: str = "" creator: str = "" producer: str = "" + keywords: str = "" + creationDate: str = "" + modDate: str = "" + trapped: str = "" + encryption: dict = None @dataclass diff --git a/src/monopoly/statements/base.py b/src/monopoly/statements/base.py index b11437d8..7c789fe6 100644 --- a/src/monopoly/statements/base.py +++ b/src/monopoly/statements/base.py @@ -59,7 +59,10 @@ def subtotal_pattern(self) -> re.Pattern: @property def pattern(self): - return self.config.transaction_pattern + pattern = self.config.transaction_pattern + if isinstance(pattern, str): + pattern = re.compile(pattern) + return pattern @lru_cache def get_transactions(self) -> list[Transaction] | None: diff --git a/tests/unit/generic/test_date_pattern_analyzer.py b/tests/unit/generic/test_date_pattern_analyzer.py index 72d24505..09f758e4 100644 --- a/tests/unit/generic/test_date_pattern_analyzer.py +++ b/tests/unit/generic/test_date_pattern_analyzer.py @@ -286,7 +286,7 @@ def test_create_transaction_pattern_with_transaction_first( date_pattern_analyzer.matches = matches_with_transaction_posting_dates result = date_pattern_analyzer.create_transaction_pattern() - assert result == expected + assert result == re.compile(expected, re.IGNORECASE) def test_create_transaction_pattern_with_posting_first( @@ -305,7 +305,7 @@ def test_create_transaction_pattern_with_posting_first( + SharedPatterns.AMOUNT_EXTENDED ) result = date_pattern_analyzer.create_transaction_pattern() - assert result == expected + assert result == re.compile(expected, re.IGNORECASE) def test_get_statement_type_debit(date_pattern_analyzer: DatePatternAnalyzer):