Merge pull request #12 from jabardigitalservice/feature/pep-0257

Feature/pep 0257
jabardigitalservice · Oct 25, 2023 · 5a72827 · 5a72827
2 parents 570cafa + a2560ec
commit 5a72827
Show file tree

Hide file tree

Showing 22 changed files with 462 additions and 87 deletions.
diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml
@@ -11,18 +11,6 @@ on:
       - '[0-9]+.[0-9]+.[0-9]+-[a-z]+'
       - '[0-9]+.[0-9]+.[0-9]+-[a-z]+.[0-9]+'
 jobs:
-  lint:
-    name: Linter Test
-    runs-on: ubuntu-latest
-    container:
-      image: pipelinecomponents/flake8
-    steps:
-      - name: Checkout
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-      - name: Lint with flake8
-        run: flake8
   venv:
     name: Virtual Environment
     runs-on: ubuntu-latest
@@ -44,7 +32,25 @@ jobs:
           python -m venv venv/
           . venv/bin/activate
           pip install --upgrade pip
-          pip install build 'coverage[toml]' pdoc3
+          pip install build 'coverage[toml]' pdoc3 flake8 flake8-docstrings
+  lint:
+    name: Linter Test
+    needs: venv
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Cache venv
+        uses: actions/cache@v3
+        with:
+          path: venv/
+          key: python-venv
+      - name: Lint with flake8
+        run: |
+          . venv/bin/activate
+          python -m flake8 --exclude build/,venv/
   unittest:
     name: Unit Test
     needs: venv

diff --git a/.vscode/extensions.json b/.vscode/extensions.json
@@ -1,5 +1,7 @@
 {
     "recommendations": [
-        "ms-python.flake8"
+        "ms-python.flake8",
+        "njpwerner.autodocstring",
+        "codium.codium"
     ]
 }
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -22,7 +22,8 @@
                 "build",
                 "'coverage[toml]'",
                 "pdoc3",
-                "flake8"
+                "flake8",
+                "flake8-docstrings"
             ]
         },
         {
@@ -53,7 +54,7 @@
             "label": "Python: Package test linter",
             "type": "shell",
             "command": "${command:python.interpreterPath}",
-            "args": ["-m", "flake8", "--exclude", "venv/"]
+            "args": ["-m", "flake8", "--exclude", "build/,venv/"]
         },
         {
             "label": "Python: Package test report",

diff --git a/datasae/__init__.py b/datasae/__init__.py
@@ -3,3 +3,16 @@
 # Copyright (c) Free Software Foundation, Inc. All rights reserved.
 # Licensed under the AGPL-3.0-only License. See LICENSE in the project root
 # for license information.
+
+"""
+This is a standalone Python script that is used to execute a specific task.
+
+Task:
+- Generate a new version of the code snippet, with an additional docstring.
+- Make sure the docstring starts and ends with standard Python docstring signs.
+- The docstring should be in standard format. Use the 'Code Explanation' only
+    as a reference, and don't copy its sections directly.
+- Except for the docstring, the new code should be identical to the original
+    code snippet. Keep existing code comments, line comments, blank lines,
+    formatting, etc.
+"""
diff --git a/datasae/boolean.py b/datasae/boolean.py
@@ -4,26 +4,39 @@
 # Licensed under the AGPL-3.0-only License. See LICENSE in the project root
 # for license information.
 
+"""Library data quality for boolean type."""
+
 import pandas as pd
 
 from .exception import InvalidDataTypeWarning
 from .utils import Basic, create_warning_data, WarningDataMessage
 
 
 class WarningDataDetailMessage:
+    """
+    Provides predefined error messages for specific data validation scenarios.
+
+    Attributes:
+        BOOLEAN_DATA_TYPE (str): Error message for the scenario when a value
+            must be of boolean data type.
+        DEFINED_DATA_TYPE (str): Error message for the scenario when a value
+            must be equal to a defined value.
+    """
+
     BOOLEAN_DATA_TYPE: str = "Value must be of boolean data type"
     DEFINED_DATA_TYPE: str = "Value must be equal to defined value"
 
 
 class Boolean(Basic):
+    """Data Quality class for boolean type."""
+
     def __init__(self, dataFrame: pd.DataFrame):
         """
-        Initializes an instance of the Integer class.
+        Instance initialitzation of the Integer class.
 
         Args:
             dataFrame (pd.DataFrame): The data you want to process.
         """
-
         self.dataFrame = dataFrame
 
     @staticmethod
@@ -42,7 +55,6 @@ def check_bool(bool_data: bool) -> tuple:
                     value is invalid, including the warning message,
                     the actual value, and a detailed message.
         """
-
         valid = 0
         invalid = 0
         warning_data = {}
@@ -59,6 +71,8 @@ def check_bool(bool_data: bool) -> tuple:
 
     def is_bool(self, column: str) -> dict:
         """
+        Checker method for boolean type data.
+
         Check if the value in a specified column of a DataFrame
             are boolean data type.
 
@@ -70,7 +84,6 @@ def is_bool(self, column: str) -> dict:
                 including the number of valid and invalid values,
                 and any warning messages.
         """
-
         valid = 0
         invalid = 0
         warning = {}
@@ -105,6 +118,8 @@ def is_bool(self, column: str) -> dict:
     @staticmethod
     def check_is_in(bool_data, is_in: list):
         """
+        Checker in method for boolean type data.
+
         Check if every row of a given DataFrame column are equal to
             defined boolean list.
 
@@ -121,7 +136,6 @@ def check_is_in(bool_data, is_in: list):
                     value is invalid, including the warning message,
                     the actual value, and a detailed message.
         """
-
         valid = 0
         invalid = 0
         warning_data = {}
@@ -138,6 +152,8 @@ def check_is_in(bool_data, is_in: list):
 
     def is_in(self, is_in: list, column: str) -> dict:
         """
+        Checker in method for boolean type data.
+
         Check if every row of a given DataFrame column are equal to
             defined boolean list
 
@@ -151,7 +167,6 @@ def is_in(self, is_in: list, column: str) -> dict:
                 including the number of valid and invalid values,
                 and any warning messages.
         """
-
         valid = 0
         invalid = 0
         warning = {}

diff --git a/datasae/converter/__init__.py b/datasae/converter/__init__.py
@@ -4,6 +4,13 @@
 # Licensed under the AGPL-3.0-only License. See LICENSE in the project root
 # for license information.
 
+"""
+Converter library.
+
+A class called `Config` that represents a configuration object for reading
+data source configurations from a JSON or YAML file.
+"""
+
 from __future__ import annotations
 from dataclasses import dataclass
 from enum import Enum
@@ -18,11 +25,45 @@
 
 
 class CaseInsensitiveEnum(str, Enum):
+    """
+    A case-insensitive enumeration class.
+
+    A case-insensitive enumeration class that allows for case-insensitive
+    comparison of enum values and provides a case-insensitive lookup of enum
+    members.
+    """
+
     def __eq__(self, __value: str) -> bool:
+        """
+        __eq__ methods.
+
+        Overrides the __eq__ method to perform case-insensitive comparison of
+        enum values.
+
+        Args:
+            __value (str): The value to compare with the enum value.
+
+        Returns:
+            bool: True if the values are equal (case-insensitive), False
+                otherwise.
+        """
         return super().__eq__(__value.lower() if __value else __value)
 
     @classmethod
     def _missing_(cls, value: str) -> CaseInsensitiveEnum:
+        """
+        _missing_ method.
+
+        Overrides the _missing_ method to perform case-insensitive lookup of
+            enum members.
+
+        Args:
+            value (str): The value to lookup in the enum members.
+
+        Returns:
+            CaseInsensitiveEnum: The enum member with the matching value (case-
+                insensitive).
+        """
         value = value.lower() if value else value
 
         for member in cls:
@@ -31,6 +72,13 @@ def _missing_(cls, value: str) -> CaseInsensitiveEnum:
 
 
 class FileType(CaseInsensitiveEnum):
+    """
+    FileType enumeration.
+
+    Represents different types of file formats with case-insensitive
+    comparison and lookup of enum values.
+    """
+
     CSV = '.csv'
     JSON = '.json'
     PARQUET = '.parquet'
@@ -40,11 +88,24 @@ class FileType(CaseInsensitiveEnum):
 
 
 class DataSourceType(CaseInsensitiveEnum):
+    """
+    DataSourceType enumeration.
+
+    Represents a case-insensitive enumeration for different types of data
+    sources.
+    """
+
     S3 = 's3'
 
 
 @dataclass(repr=False)
 class DataSource:
+    """
+    DataSource class.
+
+    A class that converts data of different file types into a Pandas DataFrame.
+    """
+
     type: DataSourceType
 
     @property
@@ -55,7 +116,6 @@ def connection(self) -> dict:
         Returns:
             dict: Key-value parameters for connection to datasource.
         """
-
         return {
             key: value
             for key, value in self.__dict__.items()
@@ -66,6 +126,8 @@ def __call__(
         self, file_type: FileType, data: bytes, *args, **kwargs
     ) -> pd.DataFrame | bytes:
         """
+        __call__ method.
+
         Converter from various file type into Pandas DataFrame.
 
         Args:
@@ -76,7 +138,6 @@ def __call__(
             DataFrame | bytes: Pandas DataFrame or bytes if file type not
                 support.
         """
-
         if file_type in list(FileType):
             func: Callable = None
 
@@ -104,14 +165,36 @@ def __call__(
 
 
 class Config:
+    """
+    A class that represents a configuration object.
+
+    Args:
+        file_path (str): The source path of the .json or .yaml file.
+
+    Example Usage:
+        config = Config("data.json")
+        data_source = config("source1")
+        print(data_source.connection)
+
+    Attributes:
+        __file (str): The source path of the file.
+        __file_type (str): The type of the file.
+
+    Methods:
+        __call__(name):
+            Returns a data source configuration from a file.
+
+    """
+
     def __init__(self, file_path: str):
         """
+        __init__ method.
+
         Initializes an instance of the Converter Configuration.
 
         Args:
             file_path (str): Source path of your .json or .yaml file.
         """
-
         self.__file: Path = Path(file_path)
         self.__file_type: FileType = FileType(self.__file.suffix)
 
@@ -126,7 +209,6 @@ def __call__(self, name: str) -> DataSource:
             DataSource: An instance class of data source containing
                 configuration properties.
         """
-
         config: dict = {}
 
         with open(self.__file) as file: