diff --git a/datasae/__init__.py b/datasae/__init__.py index 45598c5..d038a72 100644 --- a/datasae/__init__.py +++ b/datasae/__init__.py @@ -3,3 +3,16 @@ # Copyright (c) Free Software Foundation, Inc. All rights reserved. # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. + +""" +This is a standalone Python script that is used to execute a specific task. + +Task: +- Generate a new version of the code snippet, with an additional docstring. +- Make sure the docstring starts and ends with standard Python docstring signs. +- The docstring should be in standard format. Use the 'Code Explanation' only + as a reference, and don't copy its sections directly. +- Except for the docstring, the new code should be identical to the original + code snippet. Keep existing code comments, line comments, blank lines, + formatting, etc. +""" diff --git a/datasae/boolean.py b/datasae/boolean.py index 54397d1..0939e42 100644 --- a/datasae/boolean.py +++ b/datasae/boolean.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""Library data quality for boolean type.""" + import pandas as pd from .exception import InvalidDataTypeWarning @@ -11,19 +13,30 @@ class WarningDataDetailMessage: + """ + Provides predefined error messages for specific data validation scenarios. + + Attributes: + BOOLEAN_DATA_TYPE (str): Error message for the scenario when a value + must be of boolean data type. + DEFINED_DATA_TYPE (str): Error message for the scenario when a value + must be equal to a defined value. + """ + BOOLEAN_DATA_TYPE: str = "Value must be of boolean data type" DEFINED_DATA_TYPE: str = "Value must be equal to defined value" class Boolean(Basic): + """Data Quality class for boolean type.""" + def __init__(self, dataFrame: pd.DataFrame): """ - Initializes an instance of the Integer class. + Instance initialitzation of the Integer class. Args: dataFrame (pd.DataFrame): The data you want to process. """ - self.dataFrame = dataFrame @staticmethod @@ -42,7 +55,6 @@ def check_bool(bool_data: bool) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -59,6 +71,8 @@ def check_bool(bool_data: bool) -> tuple: def is_bool(self, column: str) -> dict: """ + Checker method for boolean type data. + Check if the value in a specified column of a DataFrame are boolean data type. @@ -70,7 +84,6 @@ def is_bool(self, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -105,6 +118,8 @@ def is_bool(self, column: str) -> dict: @staticmethod def check_is_in(bool_data, is_in: list): """ + Checker in method for boolean type data. + Check if every row of a given DataFrame column are equal to defined boolean list. @@ -121,7 +136,6 @@ def check_is_in(bool_data, is_in: list): value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -138,6 +152,8 @@ def check_is_in(bool_data, is_in: list): def is_in(self, is_in: list, column: str) -> dict: """ + Checker in method for boolean type data. + Check if every row of a given DataFrame column are equal to defined boolean list @@ -151,7 +167,6 @@ def is_in(self, is_in: list, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} diff --git a/datasae/converter/__init__.py b/datasae/converter/__init__.py index 3c7f4ae..cad900a 100644 --- a/datasae/converter/__init__.py +++ b/datasae/converter/__init__.py @@ -4,6 +4,13 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +""" +Converter library. + +A class called `Config` that represents a configuration object for reading +data source configurations from a JSON or YAML file. +""" + from __future__ import annotations from dataclasses import dataclass from enum import Enum @@ -18,11 +25,45 @@ class CaseInsensitiveEnum(str, Enum): + """ + A case-insensitive enumeration class. + + A case-insensitive enumeration class that allows for case-insensitive + comparison of enum values and provides a case-insensitive lookup of enum + members. + """ + def __eq__(self, __value: str) -> bool: + """ + __eq__ methods. + + Overrides the __eq__ method to perform case-insensitive comparison of + enum values. + + Args: + __value (str): The value to compare with the enum value. + + Returns: + bool: True if the values are equal (case-insensitive), False + otherwise. + """ return super().__eq__(__value.lower() if __value else __value) @classmethod def _missing_(cls, value: str) -> CaseInsensitiveEnum: + """ + _missing_ method. + + Overrides the _missing_ method to perform case-insensitive lookup of + enum members. + + Args: + value (str): The value to lookup in the enum members. + + Returns: + CaseInsensitiveEnum: The enum member with the matching value (case- + insensitive). + """ value = value.lower() if value else value for member in cls: @@ -31,6 +72,13 @@ def _missing_(cls, value: str) -> CaseInsensitiveEnum: class FileType(CaseInsensitiveEnum): + """ + FileType enumeration. + + Represents different types of file formats with case-insensitive + comparison and lookup of enum values. + """ + CSV = '.csv' JSON = '.json' PARQUET = '.parquet' @@ -40,11 +88,24 @@ class FileType(CaseInsensitiveEnum): class DataSourceType(CaseInsensitiveEnum): + """ + DataSourceType enumeration. + + Represents a case-insensitive enumeration for different types of data + sources. + """ + S3 = 's3' @dataclass(repr=False) class DataSource: + """ + DataSource class. + + A class that converts data of different file types into a Pandas DataFrame. + """ + type: DataSourceType @property @@ -55,7 +116,6 @@ def connection(self) -> dict: Returns: dict: Key-value parameters for connection to datasource. """ - return { key: value for key, value in self.__dict__.items() @@ -66,6 +126,8 @@ def __call__( self, file_type: FileType, data: bytes, *args, **kwargs ) -> pd.DataFrame | bytes: """ + __call__ method. + Converter from various file type into Pandas DataFrame. Args: @@ -76,7 +138,6 @@ def __call__( DataFrame | bytes: Pandas DataFrame or bytes if file type not support. """ - if file_type in list(FileType): func: Callable = None @@ -104,14 +165,36 @@ def __call__( class Config: + """ + A class that represents a configuration object. + + Args: + file_path (str): The source path of the .json or .yaml file. + + Example Usage: + config = Config("data.json") + data_source = config("source1") + print(data_source.connection) + + Attributes: + __file (str): The source path of the file. + __file_type (str): The type of the file. + + Methods: + __call__(name): + Returns a data source configuration from a file. + + """ + def __init__(self, file_path: str): """ + __init__ method. + Initializes an instance of the Converter Configuration. Args: file_path (str): Source path of your .json or .yaml file. """ - self.__file: Path = Path(file_path) self.__file_type: FileType = FileType(self.__file.suffix) @@ -126,7 +209,6 @@ def __call__(self, name: str) -> DataSource: DataSource: An instance class of data source containing configuration properties. """ - config: dict = {} with open(self.__file) as file: diff --git a/datasae/converter/s3.py b/datasae/converter/s3.py index cf37e22..0198522 100644 --- a/datasae/converter/s3.py +++ b/datasae/converter/s3.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""s3 library.""" + from __future__ import annotations from dataclasses import dataclass from pandas import DataFrame @@ -16,6 +18,15 @@ @dataclass(repr=False) class S3(DataSource): + """ + Represents a data source that connects to an S3 bucket. + + Args: + endpoint (str): The endpoint URL of the S3 bucket. + access_key (str): The access key for authentication. + secret_key (str): The secret key for authentication. + """ + endpoint: str access_key: str secret_key: str @@ -23,28 +34,34 @@ class S3(DataSource): @property def connection(self) -> Minio: """ - Return connection to data source. + Returns a connection to the S3 bucket. Returns: - minio.Minio: Instance from library class minio.Minio's. + minio.Minio: An instance of the Minio class. """ - return Minio(**super().connection) def __call__( self, bucket_name: str, object_name: str, *args, **kwargs ) -> DataFrame | bytes: """ - Converter from various file type into Pandas DataFrame. + __call__ method. + + Converts the data from the specified bucket and object name into a + Pandas DataFrame. Args: - bucket_name (str): Name of the bucket. - object_name (str): Object name in the bucket. - sheet_name (int | str, optional): This param only works for .xlsx. - Strings are used for sheet names. Integers are used in - zero-indexed sheet positions (chart sheets do not count as a - sheet position). Lists of strings/integers are used to request - multiple sheets. Specify None to get all worksheets. + bucket_name (str): The name of the bucket. + object_name (str): The object name in the bucket. + *args: Additional positional arguments. + **kwargs: Additional keyword arguments. + + Keyword Args: + sheet_name (int | str, optional): This parameter only works for + .xlsx files. Strings are used for sheet names. Integers are + used for zero-indexed sheet positions (chart sheets do not + count as a sheet position). Lists of strings/integers are used + to request multiple sheets. Specify None to get all worksheets. Available cases: - Defaults to None: 1st sheet as a DataFrame - 0: 1st sheet as a DataFrame @@ -52,10 +69,9 @@ def __call__( - "Sheet1": Load sheet with name "Sheet1" Returns: - DataFrame | bytes: Pandas DataFrame or bytes if file type not - support. + DataFrame | bytes: A Pandas DataFrame or bytes if the file type is + not supported. """ - sheet_name: int | str = kwargs.pop('sheet_name', None) response: BaseHTTPResponse = self.connection.get_object( bucket_name, object_name, *args, **kwargs diff --git a/datasae/exception.py b/datasae/exception.py index 346fb47..9e6c68e 100644 --- a/datasae/exception.py +++ b/datasae/exception.py @@ -4,23 +4,34 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""Exception library.""" + class EmptyDataFrame(Exception): + """Exception class that is raised when a DataFrame object is empty.""" + def __init__(self): + """__init__ method.""" message = 'DataFrame is empty.' super().__init__(message) self.message = message class ColumnNotExist(Exception): + """Exception class that is raised when a column not exist.""" + def __init__(self, column): + """__init__ method.""" message = f"Column '{column}' does not exist in the DataFrame." super().__init__(message) self.message = message class InvalidDataTypeWarning(Exception): + """Exception class that is raised when a invalid data type.""" + def __init__(self, warning_data): + """__init__ method.""" self.warning_data = warning_data message = warning_data super().__init__(message) @@ -28,7 +39,10 @@ def __init__(self, warning_data): class InvalidDateFormatWarning(Exception): + """Exception class that is raised when a invalid date format.""" + def __init__(self, warning_data): + """__init__ method.""" self.warning_data = warning_data message = warning_data super().__init__(message) @@ -36,7 +50,10 @@ def __init__(self, warning_data): class InvalidDataValueWarning(Exception): + """Exception class that is raised when a invalid data format.""" + def __init__(self, warning_data): + """__init__ method.""" self.warning_data = warning_data message = warning_data super().__init__(message) diff --git a/datasae/float.py b/datasae/float.py index 859b832..ec2672d 100644 --- a/datasae/float.py +++ b/datasae/float.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""Float library.""" + import pandas as pd from .exception import InvalidDataTypeWarning, InvalidDataValueWarning @@ -11,12 +13,18 @@ class WarningDataDetailMessage: + """Provides warning messages for different data types.""" + FLOAT_DATA_TYPE: str = "Value must be of float data type" class Float(Basic): + """Float class.""" + def __init__(self, dataFrame: pd.DataFrame): """ + __init__ method. + Initializes an instance of the Float class. Args: @@ -41,7 +49,6 @@ def check_equal(float_data: float, value: float) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -91,6 +98,8 @@ def check_less_than(float_data: float, value: float) -> tuple: @staticmethod def check_less_than_equal(float_data: float, value: float) -> tuple: """ + check_less_than_equal method. + Check if a given float value is less than or equal to a specified value. @@ -154,6 +163,8 @@ def check_greater_than(float_data: float, value: float) -> tuple: @staticmethod def check_greater_than_equal(float_data: float, value: float) -> tuple: """ + check_greater_than_equal method. + Check if a given float value is greater than or equal a specified value. @@ -222,6 +233,8 @@ def check_in_range( @staticmethod def check_is_in(float_data: float, value: list) -> tuple: """ + check_is_in method. + Check if a given float value is present in a specified list of values. @@ -254,6 +267,8 @@ def check_is_in(float_data: float, value: list) -> tuple: @staticmethod def check_not_in(float_data: float, value: list) -> tuple: """ + check_not_in method. + Check if a given float value is not present in a specified list of values. @@ -285,6 +300,8 @@ def check_not_in(float_data: float, value: list) -> tuple: def equal_to(self, value: float, column: str) -> dict: """ + equal_to method. + Check if the values in a specified column of a DataFrame are equal to a given value. @@ -330,6 +347,8 @@ def equal_to(self, value: float, column: str) -> dict: def less_than(self, value: float, column: str) -> dict: """ + less_than method. + Check if the values in a specified column of a DataFrame are less than a given value. @@ -373,6 +392,8 @@ def less_than(self, value: float, column: str) -> dict: def less_than_equal(self, value: float, column: str) -> dict: """ + less_than_equal method. + Check if the values in a specified column of a DataFrame are less than or equal to a given value. @@ -417,6 +438,8 @@ def less_than_equal(self, value: float, column: str) -> dict: def greater_than(self, value: float, column: str) -> dict: """ + greater_than method. + Check if the values in a specified column of a DataFrame are greater than a given value. @@ -461,6 +484,8 @@ def greater_than(self, value: float, column: str) -> dict: def greater_than_equal(self, value: float, column: str) -> dict: """ + greater_than_equal method. + Checks if the values in a specified column of a DataFrame are greater than or equal to a given value. @@ -510,6 +535,8 @@ def in_range( self, lower_limit: float, upper_limit: float, column: str ) -> dict: """ + in_range method. + Check if the values in a specified column of a DataFrame are within a given range. @@ -555,6 +582,8 @@ def in_range( def is_in(self, value: list, column: str) -> dict: """ + is_in method. + Check if the values in a specified column of a DataFrame are present in a given list of values. @@ -600,6 +629,8 @@ def is_in(self, value: list, column: str) -> dict: def not_in(self, value: list, column: str) -> dict: """ + not_in method. + Checks if the values in a specified column of a DataFrame are not present in a given list of values. @@ -612,7 +643,6 @@ def not_in(self, value: list, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} diff --git a/datasae/integer.py b/datasae/integer.py index d05539f..e279ee1 100644 --- a/datasae/integer.py +++ b/datasae/integer.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""Integer library.""" + import pandas as pd from .exception import InvalidDataTypeWarning, InvalidDataValueWarning @@ -11,18 +13,23 @@ class WarningDataDetailMessage: + """WarningDataDetailMessage class.""" + INTEGER_DATA_TYPE: str = "Value must be of integer data type" class Integer(Basic): + """Integer class.""" + def __init__(self, dataFrame: pd.DataFrame): """ + __init__ method. + Initializes an instance of the Integer class. Args: dataFrame (pd.DataFrame): The data you want to process. """ - self.dataFrame = dataFrame @staticmethod @@ -42,7 +49,6 @@ def check_equal(integer_data: int, value: int) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -74,7 +80,6 @@ def check_less_than(integer_data: int, value: int) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -92,6 +97,8 @@ def check_less_than(integer_data: int, value: int) -> tuple: @staticmethod def check_less_than_equal(integer_data: int, value: int) -> tuple: """ + check_less_than_equal method. + Check if a given integer value is less than or equal to a specified value. @@ -107,7 +114,6 @@ def check_less_than_equal(integer_data: int, value: int) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -139,7 +145,6 @@ def check_greater_than(integer_data: int, value: int) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -157,6 +162,8 @@ def check_greater_than(integer_data: int, value: int) -> tuple: @staticmethod def check_greater_than_equal(integer_data: int, value: int) -> tuple: """ + check_greater_than_equal method. + Check if a given integer value is greater than or equal a specified value. @@ -172,7 +179,6 @@ def check_greater_than_equal(integer_data: int, value: int) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -207,7 +213,6 @@ def check_in_range( value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -227,6 +232,8 @@ def check_in_range( @staticmethod def check_is_in(integer_data: int, value: list) -> tuple: """ + check_is_in method. + Check if a given integer value is present in a specified list of values. @@ -242,7 +249,6 @@ def check_is_in(integer_data: int, value: list) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -260,6 +266,8 @@ def check_is_in(integer_data: int, value: list) -> tuple: @staticmethod def check_not_in(integer_data: int, value: list) -> tuple: """ + check_not_in method. + Check if a given integer value is not present in a specified list of values. @@ -275,7 +283,6 @@ def check_not_in(integer_data: int, value: list) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -293,6 +300,8 @@ def check_not_in(integer_data: int, value: list) -> tuple: @staticmethod def check_length(integer_data: int, value: int) -> tuple: """ + check_length method. + Check if the length of the input integer data is equal to a specified value. @@ -308,7 +317,6 @@ def check_length(integer_data: int, value: int) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -325,6 +333,8 @@ def check_length(integer_data: int, value: int) -> tuple: def equal_to(self, value: int, column: str) -> dict: """ + equal_to method. + Check if the values in a specified column of a DataFrame are equal to a given value. @@ -337,7 +347,6 @@ def equal_to(self, value: int, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -371,6 +380,8 @@ def equal_to(self, value: int, column: str) -> dict: def less_than(self, value: int, column: str) -> dict: """ + less_than method. + Check if the values in a specified column of a DataFrame are less than a given value. @@ -383,7 +394,6 @@ def less_than(self, value: int, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -417,6 +427,8 @@ def less_than(self, value: int, column: str) -> dict: def less_than_equal(self, value: int, column: str) -> dict: """ + less_than_equal method. + Check if the values in a specified column of a DataFrame are less than or equal to a given value. @@ -429,7 +441,6 @@ def less_than_equal(self, value: int, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -465,6 +476,8 @@ def less_than_equal(self, value: int, column: str) -> dict: def greater_than(self, value: int, column: str) -> dict: """ + greater_than method. + Check if the values in a specified column of a DataFrame are greater than a given value. @@ -477,7 +490,6 @@ def greater_than(self, value: int, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -511,6 +523,8 @@ def greater_than(self, value: int, column: str) -> dict: def greater_than_equal(self, value: int, column: str) -> dict: """ + greater_than_equal method. + Checks if the values in a specified column of a DataFrame are greater than or equal to a given value. @@ -523,7 +537,6 @@ def greater_than_equal(self, value: int, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -561,6 +574,8 @@ def in_range( self, lower_limit: int, upper_limit: int, column: str ) -> dict: """ + in_range method. + Check if the values in a specified column of a DataFrame are within a given range. @@ -574,7 +589,6 @@ def in_range( including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -608,6 +622,8 @@ def in_range( def is_in(self, value: list, column: str) -> dict: """ + is_in method. + Check if the values in a specified column of a DataFrame are present in a given list of values. @@ -620,7 +636,6 @@ def is_in(self, value: list, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -654,6 +669,8 @@ def is_in(self, value: list, column: str) -> dict: def not_in(self, value: list, column: str) -> dict: """ + not_in method. + Checks if the values in a specified column of a DataFrame are not present in a given list of values. @@ -666,7 +683,6 @@ def not_in(self, value: list, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -700,6 +716,8 @@ def not_in(self, value: list, column: str) -> dict: def length(self, value: int, column: str) -> dict: """ + Length method. + Check if the length of the values in a specified column of a DataFrame is equal to a given value. @@ -713,7 +731,6 @@ def length(self, value: int, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} diff --git a/datasae/string.py b/datasae/string.py index 49bd30c..dc3e38c 100644 --- a/datasae/string.py +++ b/datasae/string.py @@ -4,6 +4,7 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""String library.""" import re @@ -14,12 +15,18 @@ class WarningDataDetailMessage: + """WarningDataDetailMessage class.""" + STRING_DATA_TYPE: str = "Value must be of string data type" class String(Basic): + """String class.""" + def __init__(self, dataFrame: pd.DataFrame): """ + __init__ method. + Initializes an instance of the String class. Args: @@ -30,6 +37,8 @@ def __init__(self, dataFrame: pd.DataFrame): @staticmethod def check_contain(string_data: str, compare_data: str) -> tuple: """ + check_contain method. + Check if a given string value is not present in a specified dict @@ -62,6 +71,8 @@ def check_contain(string_data: str, compare_data: str) -> tuple: @staticmethod def check_not_contain(string_data: str, compare_data: str) -> tuple: """ + check_not_contain method. + Check if a given string value is not present in a specified dict @@ -94,6 +105,8 @@ def check_not_contain(string_data: str, compare_data: str) -> tuple: @staticmethod def check_regex_contain(regex_data: str, compare_data: str) -> tuple: """ + check_regex_contain method. + Check if a given regex string value is not present in a specified dict @@ -127,6 +140,8 @@ def check_regex_contain(regex_data: str, compare_data: str) -> tuple: @staticmethod def check_special_char_contain(char: str, compare_data: str) -> tuple: """ + check_special_char_contain method. + Check if a given character value is present in a specified dict @@ -142,7 +157,6 @@ def check_special_char_contain(char: str, compare_data: str) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -162,6 +176,8 @@ def check_special_char_contain(char: str, compare_data: str) -> tuple: @staticmethod def check_is_uppercase(str_data: str) -> tuple: """ + check_is_uppercase method. + Check if given character is all uppercase or not Args: @@ -186,6 +202,8 @@ def check_is_uppercase(str_data: str) -> tuple: @staticmethod def check_is_lowercase(str_data: str) -> tuple: """ + check_is_lowercase method. + Check if given character is all lower case or not Args: @@ -194,7 +212,6 @@ def check_is_lowercase(str_data: str) -> tuple: Returns: bool: a boolean True or False """ - valid = 0 invalid = 0 warning_data = {} @@ -211,6 +228,8 @@ def check_is_lowercase(str_data: str) -> tuple: @staticmethod def check_is_capitalize_first_word(str_data: str) -> tuple: """ + check_is_capitalize_first_word method. + Check if given character is capitalize in first word Args: @@ -219,7 +238,6 @@ def check_is_capitalize_first_word(str_data: str) -> tuple: Returns: bool: a boolean True or False """ - valid = 0 invalid = 0 warning_data = {} @@ -236,6 +254,8 @@ def check_is_capitalize_first_word(str_data: str) -> tuple: @staticmethod def check_is_capitalize_all_word(str_data: str) -> tuple: """ + check_is_capitalize_all_word method. + Check if given character is capitalize in all word Args: @@ -244,7 +264,6 @@ def check_is_capitalize_all_word(str_data: str) -> tuple: Returns: bool: a boolean True or False """ - valid = 0 invalid = 0 warning_data = {} @@ -260,6 +279,8 @@ def check_is_capitalize_all_word(str_data: str) -> tuple: def contain(self, str_contain, column_name) -> dict: """ + Contain method. + data quality for string contain. Args: @@ -299,6 +320,8 @@ def contain(self, str_contain, column_name) -> dict: def not_contain(self, str_not_contain, column): """ + not_contain method. + data quality for string not contain. if you don't put is_check_column, the script will check through dataframe and return row index @@ -306,7 +329,6 @@ def not_contain(self, str_not_contain, column): :param str_not_contain: string that want to check return: results format """ - valid = 0 invalid = 0 warning = {} @@ -337,6 +359,8 @@ def not_contain(self, str_not_contain, column): def regex_contain(self, regex_data, column_name) -> dict: """ + regex_contain method. + data quality for regex not contain. Args: @@ -378,6 +402,8 @@ def regex_contain(self, regex_data, column_name) -> dict: def special_char_contain(self, char, column_name) -> dict: """ + special_char_contain method. + data quality for special char contain. Args: @@ -419,6 +445,8 @@ def special_char_contain(self, char, column_name) -> dict: def is_uppercase(self, column_name) -> dict: """ + is_uppercase method. + data quality for check in column is uppercase Args: @@ -457,6 +485,8 @@ def is_uppercase(self, column_name) -> dict: def is_lowercase(self, column_name) -> dict: """ + is_lowercase method. + data quality for check in column is lower case Args: @@ -495,6 +525,8 @@ def is_lowercase(self, column_name) -> dict: def is_capitalize_first_word(self, column_name) -> dict: """ + is_capitalize_first_word method. + data quality for check in column is capitalize in first word Args: @@ -535,6 +567,8 @@ def is_capitalize_first_word(self, column_name) -> dict: def is_capitalize_all_word(self, column_name) -> dict: """ + is_capitalize_all_word method. + data quality for check in column is capitalize in all word Args: diff --git a/datasae/timestamp.py b/datasae/timestamp.py index 17fc5df..b115f25 100644 --- a/datasae/timestamp.py +++ b/datasae/timestamp.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""Timestamp Library.""" + from datetime import datetime import pandas as pd @@ -13,18 +15,23 @@ class WarningDataDetailMessage: + """WarningDataDetailMessage class.""" + timestamp_data_type: str = "Value must be of timestamp data type" class Timestamp(Basic): + """Timestamp class.""" + def __init__(self, dataFrame: pd.DataFrame): """ + __init__ method. + Initializes an instance of the Timestamp class. Args: dataFrame (pd.DataFrame): The data you want to process. """ - self.dataFrame = dataFrame @staticmethod @@ -45,7 +52,6 @@ def check_equal(timestamp_data: datetime, value: datetime) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -78,7 +84,6 @@ def check_less_than(timestamp_data: datetime, value: datetime) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -98,6 +103,8 @@ def check_less_than_equal( timestamp_data: datetime, value: datetime ) -> tuple: """ + check_less_than_equal method. + Check if a given timestamp value is less than or equal to a specified value. @@ -114,7 +121,6 @@ def check_less_than_equal( value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -149,7 +155,6 @@ def check_greater_than( value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -169,6 +174,8 @@ def check_greater_than_equal( timestamp_data: datetime, value: datetime ) -> tuple: """ + check_greater_than_equal method. + Check if a given timestamp value is greater than or equal a specified value. @@ -185,7 +192,6 @@ def check_greater_than_equal( value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -221,7 +227,6 @@ def check_in_range( value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -241,6 +246,8 @@ def check_in_range( @staticmethod def check_is_in(timestamp_data: datetime, value: list) -> tuple: """ + check_is_in method. + Check if a given timestamp value is present in a specified list of values. @@ -257,7 +264,6 @@ def check_is_in(timestamp_data: datetime, value: list) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -275,6 +281,8 @@ def check_is_in(timestamp_data: datetime, value: list) -> tuple: @staticmethod def check_not_in(timestamp_data: datetime, value: list) -> tuple: """ + check_not_in method. + Check if a given timestamp value is not present in a specified list of values. @@ -291,7 +299,6 @@ def check_not_in(timestamp_data: datetime, value: list) -> tuple: value is invalid, including the warning message, the actual value, and a detailed message. """ - valid = 0 invalid = 0 warning_data = {} @@ -308,6 +315,8 @@ def check_not_in(timestamp_data: datetime, value: list) -> tuple: def equal_to(self, value: datetime, column: str) -> dict: """ + equal_to method. + Check if the values in a specified column of a DataFrame are equal to a given value. @@ -320,7 +329,6 @@ def equal_to(self, value: datetime, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -354,6 +362,8 @@ def equal_to(self, value: datetime, column: str) -> dict: def less_than(self, value: datetime, column: datetime) -> dict: """ + less_than method. + Check if the values in a specified column of a DataFrame are less than a given value. @@ -366,7 +376,6 @@ def less_than(self, value: datetime, column: datetime) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -400,6 +409,8 @@ def less_than(self, value: datetime, column: datetime) -> dict: def less_than_equal(self, value: datetime, column: str) -> dict: """ + less_than_equal method. + Check if the values in a specified column of a DataFrame are less than or equal to a given value. @@ -412,7 +423,6 @@ def less_than_equal(self, value: datetime, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -448,6 +458,8 @@ def less_than_equal(self, value: datetime, column: str) -> dict: def greater_than(self, value: datetime, column: str) -> dict: """ + greater_than method. + Check if the values in a specified column of a DataFrame are greater than a given value. @@ -460,7 +472,6 @@ def greater_than(self, value: datetime, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -496,6 +507,8 @@ def greater_than(self, value: datetime, column: str) -> dict: def greater_than_equal(self, value: datetime, column: str) -> dict: """ + greater_than_equal method. + Checks if the values in a specified column of a DataFrame are greater than or equal to a given value. @@ -508,7 +521,6 @@ def greater_than_equal(self, value: datetime, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -546,6 +558,8 @@ def in_range( self, lower_limit: datetime, upper_limit: datetime, column: str ) -> dict: """ + in_range method. + Check if the values in a specified column of a DataFrame are within a given range. @@ -561,7 +575,6 @@ def in_range( including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -595,6 +608,8 @@ def in_range( def is_in(self, value: list, column: str) -> dict: """ + is_in method. + Check if the values in a specified column of a DataFrame are present in a given list of values. @@ -607,7 +622,6 @@ def is_in(self, value: list, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} @@ -641,6 +655,8 @@ def is_in(self, value: list, column: str) -> dict: def not_in(self, value: list, column: str) -> dict: """ + not_in method. + Checks if the values in a specified column of a DataFrame are not present in a given list of values. @@ -653,7 +669,6 @@ def not_in(self, value: list, column: str) -> dict: including the number of valid and invalid values, and any warning messages. """ - valid = 0 invalid = 0 warning = {} diff --git a/datasae/utils.py b/datasae/utils.py index 6432ea7..bfb98e0 100644 --- a/datasae/utils.py +++ b/datasae/utils.py @@ -4,10 +4,14 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""Utility library.""" + from typing import Any class Basic: + """Basic class.""" + def response( self, valid: int = 0, @@ -15,6 +19,8 @@ def response( warning: dict = {} ) -> dict: """ + Response method. + Calculates a score based on the number of valid and invalid inputs. Args: @@ -29,7 +35,6 @@ def response( number of valid inputs, number of invalid inputs, and any warning messages. """ - score = valid / (invalid + valid) if valid + invalid != 0 else 0 result = { 'score': score, @@ -41,6 +46,8 @@ def response( class WarningDataMessage: + """WarningDataMessage class.""" + INVALID_VALUE: str = 'Invalid Value' INVALID_DATA_TYPE: str = 'Invalid Data Type' @@ -62,7 +69,6 @@ def create_warning_data( Returns: dict: _description_ """ - return { 'message': message, 'value': value, diff --git a/tests/__init__.py b/tests/__init__.py index 5462c48..0f737cf 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -4,4 +4,6 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""Tests library.""" + MESSAGE: str = 'Result Not Match' diff --git a/tests/test_boolean.py b/tests/test_boolean.py index b948b70..4710a75 100644 --- a/tests/test_boolean.py +++ b/tests/test_boolean.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_boolean library.""" + import random import unittest @@ -15,11 +17,15 @@ class BooleanTest(unittest.TestCase): + """BooleanTest class.""" + def __init__(self, methodName: str = "BooleanTest"): + """__init__ method.""" super().__init__(methodName) self.maxDiff = None def test_is_bool_valid(self): + """test_is_bool_valid method.""" dummy = pd.DataFrame( {"columm": [random.choice([True, False]) for i in range(25)]} ) @@ -35,6 +41,7 @@ def test_is_bool_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_is_bool_invalid(self): + """test_is_bool_invalid method.""" dummy = pd.concat( [ pd.DataFrame( @@ -82,6 +89,7 @@ def test_is_bool_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_check_is_in_valid(self): + """test_check_is_in_valid method.""" dummy = pd.DataFrame( {"columm": [random.choice(["true", "false"]) for i in range(25)]} ) @@ -97,6 +105,7 @@ def test_check_is_in_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_check_is_in_invalid(self): + """test_check_is_in_invalid method.""" dummy = pd.concat( [ pd.DataFrame( diff --git a/tests/test_converter/__init__.py b/tests/test_converter/__init__.py index 9ff099f..5895907 100644 --- a/tests/test_converter/__init__.py +++ b/tests/test_converter/__init__.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_converter.""" + from os import path import unittest @@ -15,6 +17,9 @@ class CaseInsensitiveEnumTest(unittest.TestCase): + """CaseInsensitiveEnumTest.""" + def test_case_insensitive_enum(self): + """test_case_insensitive_enum.""" self.assertEqual('.JSON', FileType.JSON) self.assertIs(FileType('.JSON'), FileType.JSON) diff --git a/tests/test_converter/test_s3.py b/tests/test_converter/test_s3.py index 65f6e22..a0ee450 100644 --- a/tests/test_converter/test_s3.py +++ b/tests/test_converter/test_s3.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_s3.""" + from string import ascii_lowercase from os import path import unittest @@ -17,10 +19,18 @@ class MockResponse: - def close(self): pass - def release_conn(self): pass + """MockResponse.""" + + def close(self): + """close.""" + pass + + def release_conn(self): + """release_conn.""" + pass def __init__(self, bucket_name: str, object_name: str): + """__init__.""" with open(path.join(PATH, object_name), 'rb') as file: self.data: bytes = file.read() @@ -36,21 +46,27 @@ def __init__(self, bucket_name: str, object_name: str): class S3Test(unittest.TestCase): + """S3Test.""" + def assertDataframeEqual(self, a, b, msg): + """assertDataframeEqual.""" try: assert_frame_equal(a, b) except AssertionError as e: raise self.failureException(msg) from e def setUp(self): + """Set up method.""" self.addTypeEqualityFunc(DataFrame, self.assertDataframeEqual) def __init__(self, methodName: str = 'runTest'): + """__init__.""" super().__init__(methodName) self.NAME: str = 'test_s3' self.s3 = CONFIG_JSON(self.NAME) def test_config(self): + """test_config.""" for config in (CONFIG_JSON, CONFIG_YAML): s3 = config(self.NAME) self.assertIs(s3.type, DataSourceType.S3) @@ -61,10 +77,12 @@ def test_config(self): ) def test_connection(self): + """test_connection.""" self.assertTrue(hasattr(self.s3.connection, 'get_object')) @patch('minio.Minio.get_object', side_effect=MockResponse) def test_convert(self, _): + """test_convert.""" BUCKET_NAME: str = 'datasae' DATA: DataFrame = DataFrame({'alphabet': list(ascii_lowercase)}) diff --git a/tests/test_exception.py b/tests/test_exception.py index bebda5e..7db05a4 100644 --- a/tests/test_exception.py +++ b/tests/test_exception.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_exception.""" + import unittest from datasae.exception import ( @@ -14,7 +16,10 @@ class ExceptionTest(unittest.TestCase): + """ExceptionTest.""" + def test_exception(self): + """test_exception.""" self.assertEqual(EmptyDataFrame().message, 'DataFrame is empty.') self.assertEqual( ColumnNotExist('column').message, diff --git a/tests/test_float.py b/tests/test_float.py index fc93233..8f81662 100644 --- a/tests/test_float.py +++ b/tests/test_float.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_float.""" + import unittest import numpy as np @@ -16,12 +18,16 @@ class FloatTest(unittest.TestCase): + """FloatTest.""" + def __init__(self, methodName: str = 'TestFloat'): + """__init__.""" super().__init__(methodName) self.maxDiff = None self.dummy = pd.DataFrame({'columm': np.random.uniform(.0, 1., 20)}) def test_equal_to(self): + """test_equal_to.""" dummy = pd.DataFrame([ {'columm': -.5}, {'columm': .0}, {'columm': '1.0'} ]) @@ -46,6 +52,7 @@ def test_equal_to(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_valid(self): + """test_less_valid.""" dummy = self.dummy actual_result = Float(dummy).less_than(1., 'columm') @@ -59,6 +66,7 @@ def test_less_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_invalid(self): + """test_less_invalid.""" dummy = pd.concat([ self.dummy, pd.DataFrame([ @@ -89,6 +97,7 @@ def test_less_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_equal_valid(self): + """test_less_equal_valid.""" dummy = self.dummy actual_result = Float(dummy).less_than_equal(1., 'columm') @@ -102,6 +111,7 @@ def test_less_equal_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_equal_invalid(self): + """test_less_equal_invalid.""" dummy = pd.concat([ self.dummy, pd.DataFrame([ @@ -133,6 +143,7 @@ def test_less_equal_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_valid(self): + """test_greater_valid.""" dummy = self.dummy actual_result = Float(dummy).greater_than(.0, 'columm') @@ -146,6 +157,7 @@ def test_greater_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_invalid(self): + """test_greater_invalid.""" dummy = pd.concat([ self.dummy, pd.DataFrame([ @@ -180,6 +192,7 @@ def test_greater_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_equal_valid(self): + """test_greater_equal_valid.""" dummy = self.dummy actual_result = Float(dummy).greater_than_equal(.0, 'columm') @@ -193,6 +206,7 @@ def test_greater_equal_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_equal_invalid(self): + """test_greater_equal_invalid.""" dummy = pd.concat([ self.dummy, pd.DataFrame([ @@ -223,6 +237,7 @@ def test_greater_equal_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_in_range(self): + """test_in_range.""" dummy = pd.concat([ self.dummy, pd.DataFrame([{'columm': '0.5'}, {'columm': 1.1}]) @@ -249,6 +264,7 @@ def test_in_range(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_is_in(self): + """test_is_in.""" dummy = pd.DataFrame([ {'columm': 1.}, {'columm': .0}, {'columm': '0.5'} ]) @@ -271,6 +287,7 @@ def test_is_in(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_not_in(self): + """test_not_in.""" dummy = pd.DataFrame([ {'columm': 1.}, {'columm': .0}, {'columm': '0.5'} ]) diff --git a/tests/test_integer.py b/tests/test_integer.py index 5c5cd5f..d7dba19 100644 --- a/tests/test_integer.py +++ b/tests/test_integer.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_integer.""" + import unittest import numpy as np @@ -16,11 +18,15 @@ class IntegerTest(unittest.TestCase): + """IntegerTest.""" + def __init__(self, methodName: str = "TestInteger"): + """__init__.""" super().__init__(methodName) self.maxDiff = None def test_equal_to_valid(self): + """test_equal_to_valid.""" dummy = pd.DataFrame({"columm": [11 for i in range(25)]}) actual_result = Integer(dummy).equal_to(11, "columm") @@ -34,6 +40,7 @@ def test_equal_to_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_equal_to_invalid(self): + """test_equal_to_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": [11 for i in range(25)]}), @@ -70,6 +77,7 @@ def test_equal_to_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_valid(self): + """test_less_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(0, 10, 25)}) actual_result = Integer(dummy).less_than(11, "columm") @@ -83,6 +91,7 @@ def test_less_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_invalid(self): + """test_less_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(0, 10, 20)}), @@ -120,6 +129,7 @@ def test_less_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_equal_valid(self): + """test_less_equal_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(0, 10, 25)}) actual_result = Integer(dummy).less_than_equal(10, "columm") @@ -133,6 +143,7 @@ def test_less_equal_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_equal_invalid(self): + """test_less_equal_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(0, 10, 20)}), @@ -172,6 +183,7 @@ def test_less_equal_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_valid(self): + """test_greater_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(10, 20, 25)}) actual_result = Integer(dummy).greater_than(9, "columm") @@ -185,6 +197,7 @@ def test_greater_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_invalid(self): + """test_greater_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(11, 20, 20)}), @@ -222,6 +235,7 @@ def test_greater_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_equal_valid(self): + """test_greater_equal_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(10, 20, 25)}) actual_result = Integer(dummy).greater_than_equal(10, "columm") @@ -235,6 +249,7 @@ def test_greater_equal_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_equal_invalid(self): + """test_greater_equal_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(10, 20, 20)}), @@ -274,6 +289,7 @@ def test_greater_equal_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_in_range_valid(self): + """test_in_range_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(0, 10, 25)}) actual_result = Integer(dummy).in_range(-2, 11, "columm") @@ -287,6 +303,7 @@ def test_in_range_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_in_range_invalid(self): + """test_in_range_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(0, 10, 25)}), @@ -319,6 +336,7 @@ def test_in_range_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_is_in_valid(self): + """test_is_in_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(0, 10, 25)}) actual_result = Integer(dummy).is_in(range(10), "columm") @@ -332,6 +350,7 @@ def test_is_in_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_is_in_invalid(self): + """test_is_in_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(0, 10, 25)}), @@ -360,6 +379,7 @@ def test_is_in_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_not_in_valid(self): + """test_not_in_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(0, 10, 25)}) actual_result = Integer(dummy).not_in([10], "columm") @@ -373,6 +393,7 @@ def test_not_in_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_not_in_invalid(self): + """test_not_in_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(0, 10, 25)}), @@ -400,6 +421,7 @@ def test_not_in_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_length_valid(self): + """test_length_valid.""" dummy = pd.DataFrame({"columm": np.random.randint(0, 10, 25)}) actual_result = Integer(dummy).length(1, "columm") @@ -413,6 +435,7 @@ def test_length_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_length_invalid(self): + """test_length_invalid.""" dummy = pd.concat( [ pd.DataFrame({"columm": np.random.randint(0, 10, 25)}), diff --git a/tests/test_string.py b/tests/test_string.py index fe01997..22bb6a1 100644 --- a/tests/test_string.py +++ b/tests/test_string.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_string.""" + import unittest import pandas as pd @@ -14,11 +16,15 @@ class StringTest(unittest.TestCase): + """StringTest.""" + def __init__(self, methodName: str = "TestString"): + """__init__.""" super().__init__(methodName) self.maxDiff = None def test_contain_valid(self): + """test_contain_valid.""" dummy = pd.DataFrame( { "column": [ @@ -47,6 +53,7 @@ def test_contain_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_contain_invalid(self): + """test_contain_invalid.""" dummy = pd.DataFrame( {"column": ["Python", "PYTHON", "Bukan", 42, 3.14]} ) @@ -79,6 +86,7 @@ def test_contain_invalid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_not_contain_valid(self): + """test_not_contain_valid.""" dummy = pd.DataFrame( {"column": ["python", "PYTHON", "Bukan", "Ini String", "String"]} ) @@ -94,6 +102,7 @@ def test_not_contain_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_not_contain_invalid(self): + """test_not_contain_invalid.""" dummy = pd.DataFrame( {"column": ["Python", "python", "PYTHON", 42, 3.14]} ) @@ -123,6 +132,7 @@ def test_not_contain_invalid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_regex_custom_valid(self): + """test_regex_custom_valid.""" dummy = pd.DataFrame( { "column": [ @@ -148,6 +158,7 @@ def test_regex_custom_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_regex_custom_invalid(self): + """test_regex_custom_invalid.""" dummy = pd.DataFrame( {"column": ["Python", "Ini Python", "bukan python", 77, 3.17]} ) @@ -179,6 +190,7 @@ def test_regex_custom_invalid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_special_character_valid(self): + """test_special_character_valid.""" dummy = pd.DataFrame( {"column": ["Python !", "! Python", "!python", "!", "!!"]} ) @@ -196,6 +208,7 @@ def test_special_character_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_special_character_invalid(self): + """test_special_character_invalid.""" dummy = pd.DataFrame({"column": ["!", "? Python", "!python", 3, 3.14]}) actual_result = String(dummy).special_char_contain( @@ -263,6 +276,7 @@ def test_special_character_invalid(self): # self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_lowercase_valid(self): + """test_lowercase_valid.""" dummy = pd.DataFrame( { "column": [ @@ -286,6 +300,7 @@ def test_lowercase_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_lowercase_invalid(self): + """test_lowercase_invalid.""" dummy = pd.DataFrame( {"column": ["python", "ini Python", 3.14, 3, "python"]} ) @@ -313,6 +328,7 @@ def test_lowercase_invalid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_uppercase_valid(self): + """test_uppercase_valid.""" dummy = pd.DataFrame( {"column": ["PYTHON", "INI", "PYTHON", "SUKA", "PYTHON"]} ) @@ -328,6 +344,7 @@ def test_uppercase_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_uppercase_invalid(self): + """test_uppercase_invalid.""" dummy = pd.DataFrame({"column": ["PYTHON", "Ini", 3.14, 3, "PYTHON"]}) actual_result = String(dummy).is_uppercase("column") @@ -353,6 +370,7 @@ def test_uppercase_invalid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_capitalise_first_word_valid(self): + """test_capitalise_first_word_valid.""" dummy = pd.DataFrame( {"column": ["Python", "Ini saya", "Python", "Suka", "Python"]} ) @@ -370,6 +388,7 @@ def test_capitalise_first_word_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_capitalise_first_word_invalid(self): + """test_capitalise_first_word_invalid.""" dummy = pd.DataFrame( {"column": ["Python", "ini saya", 3.14, 3, "Python"]} ) @@ -401,6 +420,7 @@ def test_capitalise_first_word_invalid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_capitalise_all_word_valid(self): + """test_capitalise_all_word_valid.""" dummy = pd.DataFrame( { "column": [ @@ -426,6 +446,7 @@ def test_capitalise_all_word_valid(self): self.assertDictEqual(actual_result, expected_result, MESSAGE) def test_capitalise_all_word_invalid(self): + """test_capitalise_all_word_invalid.""" dummy = pd.DataFrame( {"column": ["Python", "ini saya", 3.14, 3, "Belajar Python"]} ) diff --git a/tests/test_timestamp.py b/tests/test_timestamp.py index b2e86f3..ed32f74 100644 --- a/tests/test_timestamp.py +++ b/tests/test_timestamp.py @@ -4,6 +4,8 @@ # Licensed under the AGPL-3.0-only License. See LICENSE in the project root # for license information. +"""test_timestamp.""" + from datetime import datetime, timedelta import unittest @@ -15,11 +17,15 @@ class TimestampTest(unittest.TestCase): + """TimestampTest.""" + def __init__(self, methodName: str = "TimestampTest"): + """__init__.""" super().__init__(methodName) self.maxDiff = None def test_equal_to_valid(self): + """test_equal_to_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -35,6 +41,7 @@ def test_equal_to_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_equal_to_invalid(self): + """test_equal_to_invalid.""" timestamp = datetime.now() timestamp_invalid = datetime.now() @@ -76,6 +83,7 @@ def test_equal_to_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_than_valid(self): + """test_less_than_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -91,6 +99,7 @@ def test_less_than_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_than_invalid(self): + """test_less_than_invalid.""" timestamp = datetime.now() timestamp_invalid = datetime.now() + timedelta(days=1) timestamp_condition = datetime.now() @@ -136,6 +145,7 @@ def test_less_than_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_than_equal_valid(self): + """test_less_than_equal_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -153,6 +163,7 @@ def test_less_than_equal_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_less_than_equal_invalid(self): + """test_less_than_equal_invalid.""" timestamp = datetime.now() timestamp_invalid = datetime.now() + timedelta(days=1) timestamp_condition = datetime.now() @@ -198,6 +209,7 @@ def test_less_than_equal_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_than_valid(self): + """test_greater_than_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -215,6 +227,7 @@ def test_greater_than_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_than_invalid(self): + """test_greater_than_invalid.""" timestamp = datetime.now() timestamp_invalid = datetime.now() - timedelta(days=2) timestamp_condition = datetime.now() - timedelta(days=1) @@ -260,6 +273,7 @@ def test_greater_than_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_than_equal_valid(self): + """test_greater_than_equal_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -277,6 +291,7 @@ def test_greater_than_equal_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_greater_than_equal_invalid(self): + """test_greater_than_equal_invalid.""" timestamp = datetime.now() timestamp_invalid = datetime.now() - timedelta(days=1) @@ -321,6 +336,7 @@ def test_greater_than_equal_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_in_range_valid(self): + """test_in_range_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -340,6 +356,7 @@ def test_in_range_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_in_range_invalid(self): + """test_in_range_invalid.""" timestamp = datetime.now() timestamp = datetime.now() @@ -392,6 +409,7 @@ def test_in_range_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_is_in_valid(self): + """test_is_in_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -407,6 +425,7 @@ def test_is_in_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_is_in_invalid(self): + """test_is_in_invalid.""" timestamp = datetime.now() timestamp_invalid = datetime.now() - timedelta(days=1) @@ -450,6 +469,7 @@ def test_is_in_invalid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_not_in_valid(self): + """test_not_in_valid.""" timestamp = datetime.now() dummy = pd.DataFrame({"columm": [timestamp for i in range(25)]}) @@ -467,6 +487,7 @@ def test_not_in_valid(self): self.assertDictEqual(actual_result, excepted_result, MESSAGE) def test_not_in_invalid(self): + """test_not_in_invalid.""" timestamp = datetime.now() timestamp_invalid = datetime.now() - timedelta(days=1)