Skip to content

Commit

Permalink
Merge pull request #12 from jabardigitalservice/feature/pep-0257
Browse files Browse the repository at this point in the history
Feature/pep 0257
  • Loading branch information
agistyaanugrah authored Oct 25, 2023
2 parents 570cafa + a2560ec commit 5a72827
Show file tree
Hide file tree
Showing 22 changed files with 462 additions and 87 deletions.
32 changes: 19 additions & 13 deletions .github/workflows/python.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,6 @@ on:
- '[0-9]+.[0-9]+.[0-9]+-[a-z]+'
- '[0-9]+.[0-9]+.[0-9]+-[a-z]+.[0-9]+'
jobs:
lint:
name: Linter Test
runs-on: ubuntu-latest
container:
image: pipelinecomponents/flake8
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Lint with flake8
run: flake8
venv:
name: Virtual Environment
runs-on: ubuntu-latest
Expand All @@ -44,7 +32,25 @@ jobs:
python -m venv venv/
. venv/bin/activate
pip install --upgrade pip
pip install build 'coverage[toml]' pdoc3
pip install build 'coverage[toml]' pdoc3 flake8 flake8-docstrings
lint:
name: Linter Test
needs: venv
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Cache venv
uses: actions/cache@v3
with:
path: venv/
key: python-venv
- name: Lint with flake8
run: |
. venv/bin/activate
python -m flake8 --exclude build/,venv/
unittest:
name: Unit Test
needs: venv
Expand Down
4 changes: 3 additions & 1 deletion .vscode/extensions.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
{
"recommendations": [
"ms-python.flake8"
"ms-python.flake8",
"njpwerner.autodocstring",
"codium.codium"
]
}
5 changes: 3 additions & 2 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
"build",
"'coverage[toml]'",
"pdoc3",
"flake8"
"flake8",
"flake8-docstrings"
]
},
{
Expand Down Expand Up @@ -53,7 +54,7 @@
"label": "Python: Package test linter",
"type": "shell",
"command": "${command:python.interpreterPath}",
"args": ["-m", "flake8", "--exclude", "venv/"]
"args": ["-m", "flake8", "--exclude", "build/,venv/"]
},
{
"label": "Python: Package test report",
Expand Down
13 changes: 13 additions & 0 deletions datasae/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,16 @@
# Copyright (c) Free Software Foundation, Inc. All rights reserved.
# Licensed under the AGPL-3.0-only License. See LICENSE in the project root
# for license information.

"""
This is a standalone Python script that is used to execute a specific task.
Task:
- Generate a new version of the code snippet, with an additional docstring.
- Make sure the docstring starts and ends with standard Python docstring signs.
- The docstring should be in standard format. Use the 'Code Explanation' only
as a reference, and don't copy its sections directly.
- Except for the docstring, the new code should be identical to the original
code snippet. Keep existing code comments, line comments, blank lines,
formatting, etc.
"""
27 changes: 21 additions & 6 deletions datasae/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,39 @@
# Licensed under the AGPL-3.0-only License. See LICENSE in the project root
# for license information.

"""Library data quality for boolean type."""

import pandas as pd

from .exception import InvalidDataTypeWarning
from .utils import Basic, create_warning_data, WarningDataMessage


class WarningDataDetailMessage:
"""
Provides predefined error messages for specific data validation scenarios.
Attributes:
BOOLEAN_DATA_TYPE (str): Error message for the scenario when a value
must be of boolean data type.
DEFINED_DATA_TYPE (str): Error message for the scenario when a value
must be equal to a defined value.
"""

BOOLEAN_DATA_TYPE: str = "Value must be of boolean data type"
DEFINED_DATA_TYPE: str = "Value must be equal to defined value"


class Boolean(Basic):
"""Data Quality class for boolean type."""

def __init__(self, dataFrame: pd.DataFrame):
"""
Initializes an instance of the Integer class.
Instance initialitzation of the Integer class.
Args:
dataFrame (pd.DataFrame): The data you want to process.
"""

self.dataFrame = dataFrame

@staticmethod
Expand All @@ -42,7 +55,6 @@ def check_bool(bool_data: bool) -> tuple:
value is invalid, including the warning message,
the actual value, and a detailed message.
"""

valid = 0
invalid = 0
warning_data = {}
Expand All @@ -59,6 +71,8 @@ def check_bool(bool_data: bool) -> tuple:

def is_bool(self, column: str) -> dict:
"""
Checker method for boolean type data.
Check if the value in a specified column of a DataFrame
are boolean data type.
Expand All @@ -70,7 +84,6 @@ def is_bool(self, column: str) -> dict:
including the number of valid and invalid values,
and any warning messages.
"""

valid = 0
invalid = 0
warning = {}
Expand Down Expand Up @@ -105,6 +118,8 @@ def is_bool(self, column: str) -> dict:
@staticmethod
def check_is_in(bool_data, is_in: list):
"""
Checker in method for boolean type data.
Check if every row of a given DataFrame column are equal to
defined boolean list.
Expand All @@ -121,7 +136,6 @@ def check_is_in(bool_data, is_in: list):
value is invalid, including the warning message,
the actual value, and a detailed message.
"""

valid = 0
invalid = 0
warning_data = {}
Expand All @@ -138,6 +152,8 @@ def check_is_in(bool_data, is_in: list):

def is_in(self, is_in: list, column: str) -> dict:
"""
Checker in method for boolean type data.
Check if every row of a given DataFrame column are equal to
defined boolean list
Expand All @@ -151,7 +167,6 @@ def is_in(self, is_in: list, column: str) -> dict:
including the number of valid and invalid values,
and any warning messages.
"""

valid = 0
invalid = 0
warning = {}
Expand Down
90 changes: 86 additions & 4 deletions datasae/converter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@
# Licensed under the AGPL-3.0-only License. See LICENSE in the project root
# for license information.

"""
Converter library.
A class called `Config` that represents a configuration object for reading
data source configurations from a JSON or YAML file.
"""

from __future__ import annotations
from dataclasses import dataclass
from enum import Enum
Expand All @@ -18,11 +25,45 @@


class CaseInsensitiveEnum(str, Enum):
"""
A case-insensitive enumeration class.
A case-insensitive enumeration class that allows for case-insensitive
comparison of enum values and provides a case-insensitive lookup of enum
members.
"""

def __eq__(self, __value: str) -> bool:
"""
__eq__ methods.
Overrides the __eq__ method to perform case-insensitive comparison of
enum values.
Args:
__value (str): The value to compare with the enum value.
Returns:
bool: True if the values are equal (case-insensitive), False
otherwise.
"""
return super().__eq__(__value.lower() if __value else __value)

@classmethod
def _missing_(cls, value: str) -> CaseInsensitiveEnum:
"""
_missing_ method.
Overrides the _missing_ method to perform case-insensitive lookup of
enum members.
Args:
value (str): The value to lookup in the enum members.
Returns:
CaseInsensitiveEnum: The enum member with the matching value (case-
insensitive).
"""
value = value.lower() if value else value

for member in cls:
Expand All @@ -31,6 +72,13 @@ def _missing_(cls, value: str) -> CaseInsensitiveEnum:


class FileType(CaseInsensitiveEnum):
"""
FileType enumeration.
Represents different types of file formats with case-insensitive
comparison and lookup of enum values.
"""

CSV = '.csv'
JSON = '.json'
PARQUET = '.parquet'
Expand All @@ -40,11 +88,24 @@ class FileType(CaseInsensitiveEnum):


class DataSourceType(CaseInsensitiveEnum):
"""
DataSourceType enumeration.
Represents a case-insensitive enumeration for different types of data
sources.
"""

S3 = 's3'


@dataclass(repr=False)
class DataSource:
"""
DataSource class.
A class that converts data of different file types into a Pandas DataFrame.
"""

type: DataSourceType

@property
Expand All @@ -55,7 +116,6 @@ def connection(self) -> dict:
Returns:
dict: Key-value parameters for connection to datasource.
"""

return {
key: value
for key, value in self.__dict__.items()
Expand All @@ -66,6 +126,8 @@ def __call__(
self, file_type: FileType, data: bytes, *args, **kwargs
) -> pd.DataFrame | bytes:
"""
__call__ method.
Converter from various file type into Pandas DataFrame.
Args:
Expand All @@ -76,7 +138,6 @@ def __call__(
DataFrame | bytes: Pandas DataFrame or bytes if file type not
support.
"""

if file_type in list(FileType):
func: Callable = None

Expand Down Expand Up @@ -104,14 +165,36 @@ def __call__(


class Config:
"""
A class that represents a configuration object.
Args:
file_path (str): The source path of the .json or .yaml file.
Example Usage:
config = Config("data.json")
data_source = config("source1")
print(data_source.connection)
Attributes:
__file (str): The source path of the file.
__file_type (str): The type of the file.
Methods:
__call__(name):
Returns a data source configuration from a file.
"""

def __init__(self, file_path: str):
"""
__init__ method.
Initializes an instance of the Converter Configuration.
Args:
file_path (str): Source path of your .json or .yaml file.
"""

self.__file: Path = Path(file_path)
self.__file_type: FileType = FileType(self.__file.suffix)

Expand All @@ -126,7 +209,6 @@ def __call__(self, name: str) -> DataSource:
DataSource: An instance class of data source containing
configuration properties.
"""

config: dict = {}

with open(self.__file) as file:
Expand Down
Loading

0 comments on commit 5a72827

Please sign in to comment.