Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Update CI configuration for PyPy compatibility #47

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: CI

on:
push:
branches: [ main, master ]
pull_request:
branches: [ main, master ]

jobs:
test:
name: ${{ matrix.python-version }} / ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04, windows-latest, macos-latest]
python-version: [
"3.8",
"3.9",
"3.10",
"3.11",
"3.12",
"pypy-3.9",
"pypy-3.10"
]

steps:
- uses: actions/checkout@v3

# Install gettext on macOS
- name: Install gettext on macOS
if: runner.os == 'macOS'
run: |
brew install gettext
brew link gettext --force

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
architecture: x64

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --upgrade setuptools wheel
python -m pip install tox tox-gh-actions

- name: Test with tox
run: tox --skip-missing-interpreters false

check:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox
- run: tox -e check

docs:
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v4
with:
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install tox
- run: tox -e docs
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[flake8]
max-line-length = 140
exclude = .tox,*.egg,build,data
select = E,W,F
ignore = W503
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@
import re
from pathlib import Path

from setuptools import find_packages
from setuptools import setup
from setuptools import (
find_packages,
setup,
)


def read(*names, **kwargs):
Expand Down
14 changes: 11 additions & 3 deletions src/datapilot/clients/altimate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,15 @@ def validate_permissions(
return api_client.validate_upload_to_integration()


def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integration_environment, file_type, file_path, backend_url) -> Dict:
def onboard_file(
api_token,
tenant,
dbt_core_integration_id,
dbt_core_integration_environment,
file_type,
file_path,
backend_url,
) -> Dict:
api_client = APIClient(api_token, base_url=backend_url, tenant=tenant)

params = {
Expand Down Expand Up @@ -84,7 +92,7 @@ def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integratio
api_client.log("Error getting signed URL.")
return {
"ok": False,
"message": "Error in uploading the manifest. ",
"message": "Error in uploading the manifest.",
}


Expand All @@ -101,7 +109,7 @@ def start_dbt_ingestion(api_token, tenant, dbt_core_integration_id, dbt_core_int
api_client.log("Error starting dbt ingestion worker")
return {
"ok": False,
"message": "Error starting dbt ingestion worker. ",
"message": "Error starting dbt ingestion worker.",
}


Expand Down
7 changes: 4 additions & 3 deletions src/datapilot/core/platforms/dbt/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,11 @@ def __init__(
)
if not self.selected_models:
raise AltimateCLIArgumentError(
f"Invalid values provided in the --select argument. Could not find models associated with pattern: --select {' '.join(selected_models)}"
"Invalid values provided in the --select argument. "
f"Could not find models associated with pattern: --select {' '.join(selected_models)}"
)
self.excluded_models = None
self.excluded_models_flag = False
self.excluded_models = None
self.excluded_models_flag = False

def _check_if_skipped(self, insight):
if self.config.get("disabled_insights", False):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class CheckMacroArgsHaveDesc(ChecksInsight):
NAME = "Check macro arguments has description"
ALIAS = "check_macro_args_have_desc"
DESCRIPTION = "Macro arguments should have a description. "
REASON_TO_FLAG = "Clear descriptions for macro arguments are crucial as they prevent misunderstandings, enhance user comprehension, and simplify maintenance. This leads to more accurate data analysis and efficient workflows."
REASON_TO_FLAG = (
"Clear descriptions for macro arguments are crucial as they prevent misunderstandings, "
"enhance user comprehension, and simplify maintenance. "
"This leads to more accurate data analysis and efficient workflows."
)

def _build_failure_result(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class CheckMacroHasDesc(ChecksInsight):
NAME = "Macro has documentation"
ALIAS = "check_macro_has_desc"
DESCRIPTION = "Macros should be documented."
REASON_TO_FLAG = "Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, as they make it difficult to understand their purpose and usage. Clear descriptions are vital for accuracy and streamlined workflow."
REASON_TO_FLAG = (
"Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, "
"as they make it difficult to understand their purpose and usage. "
"Clear descriptions are vital for accuracy and streamlined workflow."
)

def _build_failure_result(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
def _build_failure_result(self, model_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult:
missing_test_group_str = ""
for test in missing_test_groups:
missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
missing_test_group_str += (
f"Test Group: {test.get(self.TEST_GROUP_STR)}, "
f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
f"Actual Count: {test.get('actual_count')}\n"
)

failure_message = f"The model `{model_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. "
failure_message = (
f"The model `{model_unique_id}` does not have enough tests for the following groups:\n"
f"{missing_test_group_str}. "
)
recommendation = (
"Add tests with the specified groups for each model listed above. "
"Having tests with specific groups ensures proper validation and data integrity."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
tests_str = ""
for test in missing_tests:
tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
tests_str += (
f"Test Name: {test.get(self.TEST_NAME_STR)}, "
f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
f"Actual Count: {test.get('actual_count')}\n"
)

failure_message = f"The model `{model_unique_id}` does not have enough tests:\n{tests_str}. "
failure_message = (
f"The model `{model_unique_id}` does not have enough tests:\n"
f"{tests_str}. "
)
recommendation = (
"Add tests with the specified names for each model listed above. "
"Having tests with specific names ensures proper validation and data integrity."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
missing_test_type_str = ""
for test in missing_tests:
missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
missing_test_type_str += (
f"Test type: {test.get(self.TEST_TYPE_STR)}, "
f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
f"Actual Count: {test.get('actual_count')}\n"
)

failure_message = f"The model `{model_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. "
recommendation = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class CheckModelMaterializationByChilds(ChecksInsight):
NAME = "Model materialization by children"
ALIAS = "check_model_materialization_by_childs"
DESCRIPTION = "Fewer children than threshold ideally should be view or ephemeral, more or equal should be table or incremental."
REASON_TO_FLAG = "The model is flagged due to inappropriate materialization: models with child counts above the threshold require robust and efficient data processing, hence they should be materialized as tables or incrementals for optimized query performance and data management."
REASON_TO_FLAG = (
"The model is flagged due to inappropriate materialization: models with child counts above the threshold "
"require robust and efficient data processing, hence they should be materialized as tables or incrementals "
"for optimized query performance and data management."
)
THRESHOLD_CHILDS_STR = "threshold_childs"

def _build_failure_result_view_materialization(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,10 @@ def get_config_schema(cls):
},
"required": [cls.PATTERN_STR, cls.FOLDER_STR],
},
"description": "A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. If no pattern is found for the folder, the default pattern is used.",
"description": (
"A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. "
"If no pattern is found for the folder, the default pattern is used."
),
"default": [],
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:

if not self.max_childs and not self.max_parents:
self.logger.info(
"max_children and max_parents are required values in the configuration. Please provide the required values. Skipping the insight."
"max_children and max_parents are required values in the configuration. "
"Please provide the required values. Skipping the insight."
)
return insights

Expand Down Expand Up @@ -98,10 +99,18 @@ def _check_model_parents_and_childs(self, model_unique_id: str) -> Optional[str]
parents = node.depends_on.nodes
message = ""
if len(parents) < self.min_parents or len(parents) > self.max_parents:
message += f"The model:{model_unique_id} doesn't have the required number of parents.\n Min parents: {self.min_parents}, Max parents: {self.max_parents}. It has f{len(parents)} parents\n"
message += (
f"The model:{model_unique_id} doesn't have the required number of parents.\n"
f"Min parents: {self.min_parents}, Max parents: {self.max_parents}. "
f"It has {len(parents)} parents\n"
)

if len(children) < self.min_childs or len(children) > self.max_childs:
message += f"The model:{model_unique_id} doesn't have the required number of childs.\n Min childs: {self.min_childs}, Max childs: {self.max_childs}. It has f{len(children)} childs\n"
message += (
f"The model:{model_unique_id} doesn't have the required number of childs.\n"
f"Min childs: {self.min_childs}, Max childs: {self.max_childs}. "
f"It has {len(children)} childs\n"
)

return message

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,10 @@ def get_config_schema(cls):
"properties": {
cls.FRESHNESS_STR: {
"type": "array",
"description": "The freshness options that should be defined for the source. If not provided, all freshness options are allowed.",
"description": (
"The freshness options that should be defined for the source. "
"If not provided, all freshness options are allowed."
),
"items": {
"type": "string",
"enum": ["error_after", "warn_after"],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ class CheckSourceHasMetaKeys(ChecksInsight):
NAME = "Source has required metadata keys"
ALIAS = "check_source_has_meta_keys"
DESCRIPTION = "Check if the source has required metadata keys"
REASON_TO_FLAG = "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. It's important to ensure that the source includes all the required meta keys as per the configuration."
REASON_TO_FLAG = (
"Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. "
"It's important to ensure that the source includes all the required meta keys as per the configuration."
)
META_KEYS_STR = "meta_keys"
ALLOW_EXTRA_KEYS_STR = "allow_extra_keys"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,15 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
def _build_failure_result(self, source_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult:
missing_test_group_str = ""
for test in missing_test_groups:
missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
missing_test_group_str += (
f"Test Group: {test.get(self.TEST_GROUP_STR)}, "
f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
f"Actual Count: {test.get('actual_count')}\n"
)

failure_message = (
f"The source `{source_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. "
f"The source `{source_unique_id}` does not have enough tests for the following groups:\n"
f"{missing_test_group_str}. "
)
recommendation = (
"Add tests with the specified groups for each source listed above. "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
def _build_failure_result(self, source_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
tests_str = ""
for test in missing_tests:
tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
tests_str += (
f"Test Name: {test.get(self.TEST_NAME_STR)}, "
f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
f"Actual Count: {test.get('actual_count')}\n"
)

failure_message = f"The source `{source_unique_id}` does not have enough tests:\n{tests_str}. "
recommendation = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
def _build_failure_result(self, source_unique_id: str, missing_tests) -> DBTInsightResult:
missing_test_type_str = ""
for test in missing_tests:
missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
missing_test_type_str += (
f"Test type: {test.get(self.TEST_TYPE_STR)}, "
f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
f"Actual Count: {test.get('actual_count')}\n"
)

failure_message = f"The source `{source_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. "
recommendation = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
)
)
self.logger.debug(
f"Finished generating insights for DBTDownstreamModelsDependentOnSource. Found {len(insights)} models with direct source dependencies"
"Finished generating insights for DBTDownstreamModelsDependentOnSource. "
f"Found {len(insights)} models with direct source dependencies"
)
return insights
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ class DBTRootModel(DBTModellingInsight):

NAME = "Root model traceability"
ALIAS = "root_model"
DESCRIPTION = "Identifies models in a dbt project with 0 direct parents, meaning these models cannot be traced back to a declared source or model."
DESCRIPTION = (
"Identifies models in a dbt project with 0 direct parents, "
"meaning these models cannot be traced back to a declared source or model."
)
REASON_TO_FLAG = (
"Best Practice is to ensure all models can be traced back to a source or another model in the project. "
"Root models with no direct parents can lead to challenges in tracking data lineage and understanding"
Expand Down
Loading
Loading