fix: Filling empty docstring

Ref: https://github.com/jabardigitalservice/DataSae/actions/runs/6625142514/job/17995530637
jabardigitalservice · Oct 24, 2023 · a2560ec · a2560ec
1 parent f310478
commit a2560ec
Show file tree

Hide file tree

Showing 19 changed files with 437 additions and 71 deletions.
diff --git a/datasae/__init__.py b/datasae/__init__.py
@@ -3,3 +3,16 @@
 # Copyright (c) Free Software Foundation, Inc. All rights reserved.
 # Licensed under the AGPL-3.0-only License. See LICENSE in the project root
 # for license information.
+
+"""
+This is a standalone Python script that is used to execute a specific task.
+
+Task:
+- Generate a new version of the code snippet, with an additional docstring.
+- Make sure the docstring starts and ends with standard Python docstring signs.
+- The docstring should be in standard format. Use the 'Code Explanation' only
+    as a reference, and don't copy its sections directly.
+- Except for the docstring, the new code should be identical to the original
+    code snippet. Keep existing code comments, line comments, blank lines,
+    formatting, etc.
+"""
diff --git a/datasae/boolean.py b/datasae/boolean.py
@@ -4,26 +4,39 @@
 # Licensed under the AGPL-3.0-only License. See LICENSE in the project root
 # for license information.
 
+"""Library data quality for boolean type."""
+
 import pandas as pd
 
 from .exception import InvalidDataTypeWarning
 from .utils import Basic, create_warning_data, WarningDataMessage
 
 
 class WarningDataDetailMessage:
+    """
+    Provides predefined error messages for specific data validation scenarios.
+
+    Attributes:
+        BOOLEAN_DATA_TYPE (str): Error message for the scenario when a value
+            must be of boolean data type.
+        DEFINED_DATA_TYPE (str): Error message for the scenario when a value
+            must be equal to a defined value.
+    """
+
     BOOLEAN_DATA_TYPE: str = "Value must be of boolean data type"
     DEFINED_DATA_TYPE: str = "Value must be equal to defined value"
 
 
 class Boolean(Basic):
+    """Data Quality class for boolean type."""
+
     def __init__(self, dataFrame: pd.DataFrame):
         """
-        Initializes an instance of the Integer class.
+        Instance initialitzation of the Integer class.
 
         Args:
             dataFrame (pd.DataFrame): The data you want to process.
         """
-
         self.dataFrame = dataFrame
 
     @staticmethod
@@ -42,7 +55,6 @@ def check_bool(bool_data: bool) -> tuple:
                     value is invalid, including the warning message,
                     the actual value, and a detailed message.
         """
-
         valid = 0
         invalid = 0
         warning_data = {}
@@ -59,6 +71,8 @@ def check_bool(bool_data: bool) -> tuple:
 
     def is_bool(self, column: str) -> dict:
         """
+        Checker method for boolean type data.
+
         Check if the value in a specified column of a DataFrame
             are boolean data type.
 
@@ -70,7 +84,6 @@ def is_bool(self, column: str) -> dict:
                 including the number of valid and invalid values,
                 and any warning messages.
         """
-
         valid = 0
         invalid = 0
         warning = {}
@@ -105,6 +118,8 @@ def is_bool(self, column: str) -> dict:
     @staticmethod
     def check_is_in(bool_data, is_in: list):
         """
+        Checker in method for boolean type data.
+
         Check if every row of a given DataFrame column are equal to
             defined boolean list.
 
@@ -121,7 +136,6 @@ def check_is_in(bool_data, is_in: list):
                     value is invalid, including the warning message,
                     the actual value, and a detailed message.
         """
-
         valid = 0
         invalid = 0
         warning_data = {}
@@ -138,6 +152,8 @@ def check_is_in(bool_data, is_in: list):
 
     def is_in(self, is_in: list, column: str) -> dict:
         """
+        Checker in method for boolean type data.
+
         Check if every row of a given DataFrame column are equal to
             defined boolean list
 
@@ -151,7 +167,6 @@ def is_in(self, is_in: list, column: str) -> dict:
                 including the number of valid and invalid values,
                 and any warning messages.
         """
-
         valid = 0
         invalid = 0
         warning = {}

diff --git a/datasae/converter/__init__.py b/datasae/converter/__init__.py
@@ -4,6 +4,13 @@
 # Licensed under the AGPL-3.0-only License. See LICENSE in the project root
 # for license information.
 
+"""
+Converter library.
+
+A class called `Config` that represents a configuration object for reading
+data source configurations from a JSON or YAML file.
+"""
+
 from __future__ import annotations
 from dataclasses import dataclass
 from enum import Enum
@@ -18,11 +25,45 @@
 
 
 class CaseInsensitiveEnum(str, Enum):
+    """
+    A case-insensitive enumeration class.
+
+    A case-insensitive enumeration class that allows for case-insensitive
+    comparison of enum values and provides a case-insensitive lookup of enum
+    members.
+    """
+
     def __eq__(self, __value: str) -> bool:
+        """
+        __eq__ methods.
+
+        Overrides the __eq__ method to perform case-insensitive comparison of
+        enum values.
+
+        Args:
+            __value (str): The value to compare with the enum value.
+
+        Returns:
+            bool: True if the values are equal (case-insensitive), False
+                otherwise.
+        """
         return super().__eq__(__value.lower() if __value else __value)
 
     @classmethod
     def _missing_(cls, value: str) -> CaseInsensitiveEnum:
+        """
+        _missing_ method.
+
+        Overrides the _missing_ method to perform case-insensitive lookup of
+            enum members.
+
+        Args:
+            value (str): The value to lookup in the enum members.
+
+        Returns:
+            CaseInsensitiveEnum: The enum member with the matching value (case-
+                insensitive).
+        """
         value = value.lower() if value else value
 
         for member in cls:
@@ -31,6 +72,13 @@ def _missing_(cls, value: str) -> CaseInsensitiveEnum:
 
 
 class FileType(CaseInsensitiveEnum):
+    """
+    FileType enumeration.
+
+    Represents different types of file formats with case-insensitive
+    comparison and lookup of enum values.
+    """
+
     CSV = '.csv'
     JSON = '.json'
     PARQUET = '.parquet'
@@ -40,11 +88,24 @@ class FileType(CaseInsensitiveEnum):
 
 
 class DataSourceType(CaseInsensitiveEnum):
+    """
+    DataSourceType enumeration.
+
+    Represents a case-insensitive enumeration for different types of data
+    sources.
+    """
+
     S3 = 's3'
 
 
 @dataclass(repr=False)
 class DataSource:
+    """
+    DataSource class.
+
+    A class that converts data of different file types into a Pandas DataFrame.
+    """
+
     type: DataSourceType
 
     @property
@@ -55,7 +116,6 @@ def connection(self) -> dict:
         Returns:
             dict: Key-value parameters for connection to datasource.
         """
-
         return {
             key: value
             for key, value in self.__dict__.items()
@@ -66,6 +126,8 @@ def __call__(
         self, file_type: FileType, data: bytes, *args, **kwargs
     ) -> pd.DataFrame | bytes:
         """
+        __call__ method.
+
         Converter from various file type into Pandas DataFrame.
 
         Args:
@@ -76,7 +138,6 @@ def __call__(
             DataFrame | bytes: Pandas DataFrame or bytes if file type not
                 support.
         """
-
         if file_type in list(FileType):
             func: Callable = None
 
@@ -104,14 +165,36 @@ def __call__(
 
 
 class Config:
+    """
+    A class that represents a configuration object.
+
+    Args:
+        file_path (str): The source path of the .json or .yaml file.
+
+    Example Usage:
+        config = Config("data.json")
+        data_source = config("source1")
+        print(data_source.connection)
+
+    Attributes:
+        __file (str): The source path of the file.
+        __file_type (str): The type of the file.
+
+    Methods:
+        __call__(name):
+            Returns a data source configuration from a file.
+
+    """
+
     def __init__(self, file_path: str):
         """
+        __init__ method.
+
         Initializes an instance of the Converter Configuration.
 
         Args:
             file_path (str): Source path of your .json or .yaml file.
         """
-
         self.__file: Path = Path(file_path)
         self.__file_type: FileType = FileType(self.__file.suffix)
 
@@ -126,7 +209,6 @@ def __call__(self, name: str) -> DataSource:
             DataSource: An instance class of data source containing
                 configuration properties.
         """
-
         config: dict = {}
 
         with open(self.__file) as file:

diff --git a/datasae/converter/s3.py b/datasae/converter/s3.py
@@ -4,6 +4,8 @@
 # Licensed under the AGPL-3.0-only License. See LICENSE in the project root
 # for license information.
 
+"""s3 library."""
+
 from __future__ import annotations
 from dataclasses import dataclass
 from pandas import DataFrame
@@ -16,46 +18,60 @@
 
 @dataclass(repr=False)
 class S3(DataSource):
+    """
+    Represents a data source that connects to an S3 bucket.
+
+    Args:
+        endpoint (str): The endpoint URL of the S3 bucket.
+        access_key (str): The access key for authentication.
+        secret_key (str): The secret key for authentication.
+    """
+
     endpoint: str
     access_key: str
     secret_key: str
 
     @property
     def connection(self) -> Minio:
         """
-        Return connection to data source.
+        Returns a connection to the S3 bucket.
 
         Returns:
-            minio.Minio: Instance from library class minio.Minio's.
+            minio.Minio: An instance of the Minio class.
         """
-
         return Minio(**super().connection)
 
     def __call__(
         self, bucket_name: str, object_name: str, *args, **kwargs
     ) -> DataFrame | bytes:
         """
-        Converter from various file type into Pandas DataFrame.
+        __call__ method.
+
+        Converts the data from the specified bucket and object name into a
+        Pandas DataFrame.
 
         Args:
-            bucket_name (str): Name of the bucket.
-            object_name (str): Object name in the bucket.
-            sheet_name (int | str, optional): This param only works for .xlsx.
-                Strings are used for sheet names. Integers are used in
-                zero-indexed sheet positions (chart sheets do not count as a
-                sheet position). Lists of strings/integers are used to request
-                multiple sheets. Specify None to get all worksheets.
+            bucket_name (str): The name of the bucket.
+            object_name (str): The object name in the bucket.
+            *args: Additional positional arguments.
+            **kwargs: Additional keyword arguments.
+
+        Keyword Args:
+            sheet_name (int | str, optional): This parameter only works for
+                .xlsx files. Strings are used for sheet names. Integers are
+                used for zero-indexed sheet positions (chart sheets do not
+                count as a sheet position). Lists of strings/integers are used
+                to request multiple sheets. Specify None to get all worksheets.
                 Available cases:
                     - Defaults to None: 1st sheet as a DataFrame
                     - 0: 1st sheet as a DataFrame
                     - 1: 2nd sheet as a DataFrame
                     - "Sheet1": Load sheet with name "Sheet1"
 
         Returns:
-            DataFrame | bytes: Pandas DataFrame or bytes if file type not
-                support.
+            DataFrame | bytes: A Pandas DataFrame or bytes if the file type is
+                not supported.
         """
-
         sheet_name: int | str = kwargs.pop('sheet_name', None)
         response: BaseHTTPResponse = self.connection.get_object(
             bucket_name, object_name, *args, **kwargs