Skip to content

Commit

Permalink
fix: Filling empty docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
pipinfitriadi committed Oct 24, 2023
1 parent f310478 commit a2560ec
Show file tree
Hide file tree
Showing 19 changed files with 437 additions and 71 deletions.
13 changes: 13 additions & 0 deletions datasae/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,16 @@
# Copyright (c) Free Software Foundation, Inc. All rights reserved.
# Licensed under the AGPL-3.0-only License. See LICENSE in the project root
# for license information.

"""
This is a standalone Python script that is used to execute a specific task.
Task:
- Generate a new version of the code snippet, with an additional docstring.
- Make sure the docstring starts and ends with standard Python docstring signs.
- The docstring should be in standard format. Use the 'Code Explanation' only
as a reference, and don't copy its sections directly.
- Except for the docstring, the new code should be identical to the original
code snippet. Keep existing code comments, line comments, blank lines,
formatting, etc.
"""
27 changes: 21 additions & 6 deletions datasae/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,39 @@
# Licensed under the AGPL-3.0-only License. See LICENSE in the project root
# for license information.

"""Library data quality for boolean type."""

import pandas as pd

from .exception import InvalidDataTypeWarning
from .utils import Basic, create_warning_data, WarningDataMessage


class WarningDataDetailMessage:
"""
Provides predefined error messages for specific data validation scenarios.
Attributes:
BOOLEAN_DATA_TYPE (str): Error message for the scenario when a value
must be of boolean data type.
DEFINED_DATA_TYPE (str): Error message for the scenario when a value
must be equal to a defined value.
"""

BOOLEAN_DATA_TYPE: str = "Value must be of boolean data type"
DEFINED_DATA_TYPE: str = "Value must be equal to defined value"


class Boolean(Basic):
"""Data Quality class for boolean type."""

def __init__(self, dataFrame: pd.DataFrame):
"""
Initializes an instance of the Integer class.
Instance initialitzation of the Integer class.
Args:
dataFrame (pd.DataFrame): The data you want to process.
"""

self.dataFrame = dataFrame

@staticmethod
Expand All @@ -42,7 +55,6 @@ def check_bool(bool_data: bool) -> tuple:
value is invalid, including the warning message,
the actual value, and a detailed message.
"""

valid = 0
invalid = 0
warning_data = {}
Expand All @@ -59,6 +71,8 @@ def check_bool(bool_data: bool) -> tuple:

def is_bool(self, column: str) -> dict:
"""
Checker method for boolean type data.
Check if the value in a specified column of a DataFrame
are boolean data type.
Expand All @@ -70,7 +84,6 @@ def is_bool(self, column: str) -> dict:
including the number of valid and invalid values,
and any warning messages.
"""

valid = 0
invalid = 0
warning = {}
Expand Down Expand Up @@ -105,6 +118,8 @@ def is_bool(self, column: str) -> dict:
@staticmethod
def check_is_in(bool_data, is_in: list):
"""
Checker in method for boolean type data.
Check if every row of a given DataFrame column are equal to
defined boolean list.
Expand All @@ -121,7 +136,6 @@ def check_is_in(bool_data, is_in: list):
value is invalid, including the warning message,
the actual value, and a detailed message.
"""

valid = 0
invalid = 0
warning_data = {}
Expand All @@ -138,6 +152,8 @@ def check_is_in(bool_data, is_in: list):

def is_in(self, is_in: list, column: str) -> dict:
"""
Checker in method for boolean type data.
Check if every row of a given DataFrame column are equal to
defined boolean list
Expand All @@ -151,7 +167,6 @@ def is_in(self, is_in: list, column: str) -> dict:
including the number of valid and invalid values,
and any warning messages.
"""

valid = 0
invalid = 0
warning = {}
Expand Down
90 changes: 86 additions & 4 deletions datasae/converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
# Licensed under the AGPL-3.0-only License. See LICENSE in the project root
# for license information.

"""
Converter library.
A class called `Config` that represents a configuration object for reading
data source configurations from a JSON or YAML file.
"""

from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
Expand All @@ -18,11 +25,45 @@


class CaseInsensitiveEnum(str, Enum):
"""
A case-insensitive enumeration class.
A case-insensitive enumeration class that allows for case-insensitive
comparison of enum values and provides a case-insensitive lookup of enum
members.
"""

def __eq__(self, __value: str) -> bool:
"""
__eq__ methods.
Overrides the __eq__ method to perform case-insensitive comparison of
enum values.
Args:
__value (str): The value to compare with the enum value.
Returns:
bool: True if the values are equal (case-insensitive), False
otherwise.
"""
return super().__eq__(__value.lower() if __value else __value)

@classmethod
def _missing_(cls, value: str) -> CaseInsensitiveEnum:
"""
_missing_ method.
Overrides the _missing_ method to perform case-insensitive lookup of
enum members.
Args:
value (str): The value to lookup in the enum members.
Returns:
CaseInsensitiveEnum: The enum member with the matching value (case-
insensitive).
"""
value = value.lower() if value else value

for member in cls:
Expand All @@ -31,6 +72,13 @@ def _missing_(cls, value: str) -> CaseInsensitiveEnum:


class FileType(CaseInsensitiveEnum):
"""
FileType enumeration.
Represents different types of file formats with case-insensitive
comparison and lookup of enum values.
"""

CSV = '.csv'
JSON = '.json'
PARQUET = '.parquet'
Expand All @@ -40,11 +88,24 @@ class FileType(CaseInsensitiveEnum):


class DataSourceType(CaseInsensitiveEnum):
"""
DataSourceType enumeration.
Represents a case-insensitive enumeration for different types of data
sources.
"""

S3 = 's3'


@dataclass(repr=False)
class DataSource:
"""
DataSource class.
A class that converts data of different file types into a Pandas DataFrame.
"""

type: DataSourceType

@property
Expand All @@ -55,7 +116,6 @@ def connection(self) -> dict:
Returns:
dict: Key-value parameters for connection to datasource.
"""

return {
key: value
for key, value in self.__dict__.items()
Expand All @@ -66,6 +126,8 @@ def __call__(
self, file_type: FileType, data: bytes, *args, **kwargs
) -> pd.DataFrame | bytes:
"""
__call__ method.
Converter from various file type into Pandas DataFrame.
Args:
Expand All @@ -76,7 +138,6 @@ def __call__(
DataFrame | bytes: Pandas DataFrame or bytes if file type not
support.
"""

if file_type in list(FileType):
func: Callable = None

Expand Down Expand Up @@ -104,14 +165,36 @@ def __call__(


class Config:
"""
A class that represents a configuration object.
Args:
file_path (str): The source path of the .json or .yaml file.
Example Usage:
config = Config("data.json")
data_source = config("source1")
print(data_source.connection)
Attributes:
__file (str): The source path of the file.
__file_type (str): The type of the file.
Methods:
__call__(name):
Returns a data source configuration from a file.
"""

def __init__(self, file_path: str):
"""
__init__ method.
Initializes an instance of the Converter Configuration.
Args:
file_path (str): Source path of your .json or .yaml file.
"""

self.__file: Path = Path(file_path)
self.__file_type: FileType = FileType(self.__file.suffix)

Expand All @@ -126,7 +209,6 @@ def __call__(self, name: str) -> DataSource:
DataSource: An instance class of data source containing
configuration properties.
"""

config: dict = {}

with open(self.__file) as file:
Expand Down
44 changes: 30 additions & 14 deletions datasae/converter/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
# Licensed under the AGPL-3.0-only License. See LICENSE in the project root
# for license information.

"""s3 library."""

from __future__ import annotations
from dataclasses import dataclass
from pandas import DataFrame
Expand All @@ -16,46 +18,60 @@

@dataclass(repr=False)
class S3(DataSource):
"""
Represents a data source that connects to an S3 bucket.
Args:
endpoint (str): The endpoint URL of the S3 bucket.
access_key (str): The access key for authentication.
secret_key (str): The secret key for authentication.
"""

endpoint: str
access_key: str
secret_key: str

@property
def connection(self) -> Minio:
"""
Return connection to data source.
Returns a connection to the S3 bucket.
Returns:
minio.Minio: Instance from library class minio.Minio's.
minio.Minio: An instance of the Minio class.
"""

return Minio(**super().connection)

def __call__(
self, bucket_name: str, object_name: str, *args, **kwargs
) -> DataFrame | bytes:
"""
Converter from various file type into Pandas DataFrame.
__call__ method.
Converts the data from the specified bucket and object name into a
Pandas DataFrame.
Args:
bucket_name (str): Name of the bucket.
object_name (str): Object name in the bucket.
sheet_name (int | str, optional): This param only works for .xlsx.
Strings are used for sheet names. Integers are used in
zero-indexed sheet positions (chart sheets do not count as a
sheet position). Lists of strings/integers are used to request
multiple sheets. Specify None to get all worksheets.
bucket_name (str): The name of the bucket.
object_name (str): The object name in the bucket.
*args: Additional positional arguments.
**kwargs: Additional keyword arguments.
Keyword Args:
sheet_name (int | str, optional): This parameter only works for
.xlsx files. Strings are used for sheet names. Integers are
used for zero-indexed sheet positions (chart sheets do not
count as a sheet position). Lists of strings/integers are used
to request multiple sheets. Specify None to get all worksheets.
Available cases:
- Defaults to None: 1st sheet as a DataFrame
- 0: 1st sheet as a DataFrame
- 1: 2nd sheet as a DataFrame
- "Sheet1": Load sheet with name "Sheet1"
Returns:
DataFrame | bytes: Pandas DataFrame or bytes if file type not
support.
DataFrame | bytes: A Pandas DataFrame or bytes if the file type is
not supported.
"""

sheet_name: int | str = kwargs.pop('sheet_name', None)
response: BaseHTTPResponse = self.connection.get_object(
bucket_name, object_name, *args, **kwargs
Expand Down
Loading

0 comments on commit a2560ec

Please sign in to comment.