AltimateAI · anandgupta42 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025 · Jan 9, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,76 @@
+name: CI
+
+on:
+  push:
+    branches: [ main, master ]
+  pull_request:
+    branches: [ main, master ]
+
+jobs:
+  test:
+    name: ${{ matrix.python-version }} / ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-22.04, windows-latest, macos-latest]
+        python-version: [
+          "3.8",
+          "3.9",
+          "3.10",
+          "3.11",
+          "3.12",
+          "pypy-3.9",
+          "pypy-3.10"
+        ]
+
+    steps:
+      - uses: actions/checkout@v3
+
+      # Install gettext on macOS
+      - name: Install gettext on macOS
+        if: runner.os == 'macOS'
+        run: |
+          brew install gettext
+          brew link gettext --force
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install --upgrade setuptools wheel
+          python -m pip install tox tox-gh-actions
+
+      - name: Test with tox
+        run: tox --skip-missing-interpreters false
+
+  check:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install tox
+      - run: tox -e check
+
+  docs:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install tox
+      - run: tox -e docs 
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,5 @@
+[flake8]
+max-line-length = 140
+exclude = .tox,*.egg,build,data
+select = E,W,F
+ignore = W503 
diff --git a/setup.py b/setup.py
@@ -2,8 +2,10 @@
 import re
 from pathlib import Path
 
-from setuptools import find_packages
-from setuptools import setup
+from setuptools import (
+    find_packages,
+    setup,
+)
 
 
 def read(*names, **kwargs):

diff --git a/src/datapilot/clients/altimate/utils.py b/src/datapilot/clients/altimate/utils.py
@@ -55,7 +55,15 @@ def validate_permissions(
     return api_client.validate_upload_to_integration()
 
 
-def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integration_environment, file_type, file_path, backend_url) -> Dict:
+def onboard_file(
+    api_token,
+    tenant,
+    dbt_core_integration_id,
+    dbt_core_integration_environment,
+    file_type,
+    file_path,
+    backend_url,
+) -> Dict:
     api_client = APIClient(api_token, base_url=backend_url, tenant=tenant)
 
     params = {
@@ -84,7 +92,7 @@ def onboard_file(api_token, tenant, dbt_core_integration_id, dbt_core_integratio
         api_client.log("Error getting signed URL.")
         return {
             "ok": False,
-            "message": "Error in uploading the manifest.                                                                                                                              ",
+            "message": "Error in uploading the manifest.",
         }
 
 
@@ -101,7 +109,7 @@ def start_dbt_ingestion(api_token, tenant, dbt_core_integration_id, dbt_core_int
         api_client.log("Error starting dbt ingestion worker")
         return {
             "ok": False,
-            "message": "Error starting dbt ingestion worker.                                                                                                                              ",
+            "message": "Error starting dbt ingestion worker.",
         }
 
 

diff --git a/src/datapilot/core/platforms/dbt/executor.py b/src/datapilot/core/platforms/dbt/executor.py
@@ -88,10 +88,11 @@ def __init__(
             )
             if not self.selected_models:
                 raise AltimateCLIArgumentError(
-                    f"Invalid values provided in the --select argument. Could not find models associated with pattern: --select {' '.join(selected_models)}"
+                    "Invalid values provided in the --select argument. "
+                    f"Could not find models associated with pattern: --select {' '.join(selected_models)}"
                 )
-        self.excluded_models = None
-        self.excluded_models_flag = False
+            self.excluded_models = None
+            self.excluded_models_flag = False
 
     def _check_if_skipped(self, insight):
         if self.config.get("disabled_insights", False):

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_args_have_desc.py
@@ -11,7 +11,11 @@ class CheckMacroArgsHaveDesc(ChecksInsight):
     NAME = "Check macro arguments has description"
     ALIAS = "check_macro_args_have_desc"
     DESCRIPTION = "Macro arguments should have a description. "
-    REASON_TO_FLAG = "Clear descriptions for macro arguments are crucial as they prevent misunderstandings, enhance user comprehension, and simplify maintenance. This leads to more accurate data analysis and efficient workflows."
+    REASON_TO_FLAG = (
+        "Clear descriptions for macro arguments are crucial as they prevent misunderstandings, "
+        "enhance user comprehension, and simplify maintenance. "
+        "This leads to more accurate data analysis and efficient workflows."
+    )
 
     def _build_failure_result(
         self,

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py b/src/datapilot/core/platforms/dbt/insights/checks/check_macro_has_desc.py
@@ -11,7 +11,11 @@ class CheckMacroHasDesc(ChecksInsight):
     NAME = "Macro has documentation"
     ALIAS = "check_macro_has_desc"
     DESCRIPTION = "Macros should be documented."
-    REASON_TO_FLAG = "Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, as they make it difficult to understand their purpose and usage. Clear descriptions are vital for accuracy and streamlined workflow."
+    REASON_TO_FLAG = (
+        "Undocumented macros can cause misunderstandings and inefficiencies in data modeling and analysis, "
+        "as they make it difficult to understand their purpose and usage. "
+        "Clear descriptions are vital for accuracy and streamlined workflow."
+    )
 
     def _build_failure_result(
         self,

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_group.py
@@ -50,9 +50,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
     def _build_failure_result(self, model_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult:
         missing_test_group_str = ""
         for test in missing_test_groups:
-            missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
+            missing_test_group_str += (
+                f"Test Group: {test.get(self.TEST_GROUP_STR)}, "
+                f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
+                f"Actual Count: {test.get('actual_count')}\n"
+            )
 
-        failure_message = f"The model `{model_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. "
+        failure_message = (
+            f"The model `{model_unique_id}` does not have enough tests for the following groups:\n"
+            f"{missing_test_group_str}. "
+        )
         recommendation = (
             "Add tests with the specified groups for each model listed above. "
             "Having tests with specific groups ensures proper validation and data integrity."

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_name.py
@@ -48,9 +48,16 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
     def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
         tests_str = ""
         for test in missing_tests:
-            tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
+            tests_str += (
+                f"Test Name: {test.get(self.TEST_NAME_STR)}, "
+                f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
+                f"Actual Count: {test.get('actual_count')}\n"
+            )
 
-        failure_message = f"The model `{model_unique_id}` does not have enough tests:\n{tests_str}. "
+        failure_message = (
+            f"The model `{model_unique_id}` does not have enough tests:\n"
+            f"{tests_str}. "
+        )
         recommendation = (
             "Add tests with the specified names for each model listed above. "
             "Having tests with specific names ensures proper validation and data integrity."

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_has_tests_by_type.py
@@ -49,7 +49,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
     def _build_failure_result(self, model_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
         missing_test_type_str = ""
         for test in missing_tests:
-            missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
+            missing_test_type_str += (
+                f"Test type: {test.get(self.TEST_TYPE_STR)}, "
+                f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
+                f"Actual Count: {test.get('actual_count')}\n"
+            )
 
         failure_message = f"The model `{model_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. "
         recommendation = (

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_materialization_by_childs.py
@@ -11,7 +11,11 @@ class CheckModelMaterializationByChilds(ChecksInsight):
     NAME = "Model materialization by children"
     ALIAS = "check_model_materialization_by_childs"
     DESCRIPTION = "Fewer children than threshold ideally should be view or ephemeral, more or equal should be table or incremental."
-    REASON_TO_FLAG = "The model is flagged due to inappropriate materialization: models with child counts above the threshold require robust and efficient data processing, hence they should be materialized as tables or incrementals for optimized query performance and data management."
+    REASON_TO_FLAG = (
+        "The model is flagged due to inappropriate materialization: models with child counts above the threshold "
+        "require robust and efficient data processing, hence they should be materialized as tables or incrementals "
+        "for optimized query performance and data management."
+    )
     THRESHOLD_CHILDS_STR = "threshold_childs"
 
     def _build_failure_result_view_materialization(

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_name_contract.py
@@ -122,7 +122,10 @@ def get_config_schema(cls):
                         },
                         "required": [cls.PATTERN_STR, cls.FOLDER_STR],
                     },
-                    "description": "A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. If no pattern is found for the folder, the default pattern is used.",
+                    "description": (
+                        "A list of regex patterns to check the model name against. Each pattern is applied to the folder specified. "
+                        "If no pattern is found for the folder, the default pattern is used."
+                    ),
                     "default": [],
                 },
             },

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py b/src/datapilot/core/platforms/dbt/insights/checks/check_model_parents_and_childs.py
@@ -66,7 +66,8 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
 
         if not self.max_childs and not self.max_parents:
             self.logger.info(
-                "max_children and max_parents are required values in the configuration. Please provide the required values. Skipping the insight."
+                "max_children and max_parents are required values in the configuration. "
+                "Please provide the required values. Skipping the insight."
             )
             return insights
 
@@ -98,10 +99,18 @@ def _check_model_parents_and_childs(self, model_unique_id: str) -> Optional[str]
         parents = node.depends_on.nodes
         message = ""
         if len(parents) < self.min_parents or len(parents) > self.max_parents:
-            message += f"The model:{model_unique_id} doesn't have the required number of parents.\n Min parents: {self.min_parents}, Max parents: {self.max_parents}. It has f{len(parents)} parents\n"
+            message += (
+                f"The model:{model_unique_id} doesn't have the required number of parents.\n"
+                f"Min parents: {self.min_parents}, Max parents: {self.max_parents}. "
+                f"It has {len(parents)} parents\n"
+            )
 
         if len(children) < self.min_childs or len(children) > self.max_childs:
-            message += f"The model:{model_unique_id} doesn't have the required number of childs.\n Min childs: {self.min_childs}, Max childs: {self.max_childs}. It has f{len(children)} childs\n"
+            message += (
+                f"The model:{model_unique_id} doesn't have the required number of childs.\n"
+                f"Min childs: {self.min_childs}, Max childs: {self.max_childs}. "
+                f"It has {len(children)} childs\n"
+            )
 
         return message
 

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_freshness.py
@@ -82,7 +82,10 @@ def get_config_schema(cls):
             "properties": {
                 cls.FRESHNESS_STR: {
                     "type": "array",
-                    "description": "The freshness options that should be defined for the source. If not provided, all freshness options are allowed.",
+                    "description": (
+                        "The freshness options that should be defined for the source. "
+                        "If not provided, all freshness options are allowed."
+                    ),
                     "items": {
                         "type": "string",
                         "enum": ["error_after", "warn_after"],

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_meta_keys.py
@@ -14,7 +14,10 @@ class CheckSourceHasMetaKeys(ChecksInsight):
     NAME = "Source has required metadata keys"
     ALIAS = "check_source_has_meta_keys"
     DESCRIPTION = "Check if the source has required metadata keys"
-    REASON_TO_FLAG = "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. It's important to ensure that the source includes all the required meta keys as per the configuration."
+    REASON_TO_FLAG = (
+        "Missing meta keys in the source can lead to inconsistency in metadata management and understanding of the source. "
+        "It's important to ensure that the source includes all the required meta keys as per the configuration."
+    )
     META_KEYS_STR = "meta_keys"
     ALLOW_EXTRA_KEYS_STR = "allow_extra_keys"
 

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_group.py
@@ -48,10 +48,15 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
     def _build_failure_result(self, source_unique_id: str, missing_test_groups: List[Dict]) -> DBTInsightResult:
         missing_test_group_str = ""
         for test in missing_test_groups:
-            missing_test_group_str += f"Test Group: {test.get(self.TEST_GROUP_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
+            missing_test_group_str += (
+                f"Test Group: {test.get(self.TEST_GROUP_STR)}, "
+                f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
+                f"Actual Count: {test.get('actual_count')}\n"
+            )
 
         failure_message = (
-            f"The source `{source_unique_id}` does not have enough tests for the following groups:\n{missing_test_group_str}. "
+            f"The source `{source_unique_id}` does not have enough tests for the following groups:\n"
+            f"{missing_test_group_str}. "
         )
         recommendation = (
             "Add tests with the specified groups for each source listed above. "

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_name.py
@@ -48,7 +48,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
     def _build_failure_result(self, source_unique_id: str, missing_tests: List[Dict]) -> DBTInsightResult:
         tests_str = ""
         for test in missing_tests:
-            tests_str += f"Test Name: {test.get(self.TEST_NAME_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
+            tests_str += (
+                f"Test Name: {test.get(self.TEST_NAME_STR)}, "
+                f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
+                f"Actual Count: {test.get('actual_count')}\n"
+            )
 
         failure_message = f"The source `{source_unique_id}` does not have enough tests:\n{tests_str}. "
         recommendation = (

diff --git a/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py b/src/datapilot/core/platforms/dbt/insights/checks/check_source_has_tests_by_type.py
@@ -47,7 +47,11 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
     def _build_failure_result(self, source_unique_id: str, missing_tests) -> DBTInsightResult:
         missing_test_type_str = ""
         for test in missing_tests:
-            missing_test_type_str += f"Test type: {test.get(self.TEST_TYPE_STR)}, Min Count: {test.get(self.TEST_COUNT_STR)}, Actual Count: {test.get('actual_count')}\n"
+            missing_test_type_str += (
+                f"Test type: {test.get(self.TEST_TYPE_STR)}, "
+                f"Min Count: {test.get(self.TEST_COUNT_STR)}, "
+                f"Actual Count: {test.get('actual_count')}\n"
+            )
 
         failure_message = f"The source `{source_unique_id}` does not have enough tests for the following types:\n{missing_test_type_str}. "
         recommendation = (

diff --git a/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py b/src/datapilot/core/platforms/dbt/insights/modelling/downstream_models_dependent_on_source.py
@@ -108,6 +108,7 @@ def generate(self, *args, **kwargs) -> List[DBTModelInsightResponse]:
                         )
                     )
         self.logger.debug(
-            f"Finished generating insights for DBTDownstreamModelsDependentOnSource. Found  {len(insights)} models with direct source dependencies"
+            "Finished generating insights for DBTDownstreamModelsDependentOnSource. "
+            f"Found {len(insights)} models with direct source dependencies"
         )
         return insights
diff --git a/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py b/src/datapilot/core/platforms/dbt/insights/modelling/root_model.py
@@ -15,7 +15,10 @@ class DBTRootModel(DBTModellingInsight):
 
     NAME = "Root model traceability"
     ALIAS = "root_model"
-    DESCRIPTION = "Identifies models in a dbt project with 0 direct parents, meaning these models cannot be traced back to a declared source or model."
+    DESCRIPTION = (
+        "Identifies models in a dbt project with 0 direct parents, "
+        "meaning these models cannot be traced back to a declared source or model."
+    )
     REASON_TO_FLAG = (
         "Best Practice is to ensure all models can be traced back to a source or another model in the project. "
         "Root models with no direct parents can lead to challenges in tracking data lineage and understanding"