Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: sql insights #17

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/datapilot/clients/altimate/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,3 +83,7 @@ def get_signed_url(self, params=None):
def validate_credentials(self):
endpoint = "/dbt/v3/validate-credentials"
return self.get(endpoint)

def sql_insights(self, data):
endpoint = "/dbt/v1/sqlinsights"
return self.post(endpoint, data=data)
1 change: 1 addition & 0 deletions src/datapilot/core/insights/sql/base/insight.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

class SqlInsight(Insight):
NAME = "SqlInsight"
TYPE = "sql"

def __init__(self, sql: str, dialect: Optional[Dialect], *args, **kwargs):
self.sql = sql
Expand Down
58 changes: 54 additions & 4 deletions src/datapilot/core/platforms/dbt/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import click

from datapilot.clients.altimate.client import APIClient
from datapilot.clients.altimate.utils import check_token_and_instance
from datapilot.clients.altimate.utils import onboard_manifest
from datapilot.clients.altimate.utils import validate_credentials
Expand All @@ -12,6 +13,7 @@
from datapilot.core.platforms.dbt.formatting import generate_model_insights_table
from datapilot.core.platforms.dbt.formatting import generate_project_insights_table
from datapilot.core.platforms.dbt.utils import load_manifest
from datapilot.core.platforms.sql.executor import DBTSqlInsightGenerator
from datapilot.utils.formatting.utils import tabulate_data

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -72,11 +74,11 @@ def project_health(manifest_path, catalog_path, config_path=None):


@dbt.command("onboard")
@click.option("--token", prompt="API Token", help="Your API token for authentication.")
@click.option("--instance-name", prompt="Instance Name", help="Your tenant ID.")
@click.option("--dbt_core_integration_id", prompt="DBT Core Integration ID", help="DBT Core Integration ID")
@click.option("--token", required=False, help="Your API token for authentication.")
@click.option("--instance-name", required=False, help="Instance Name")
@click.option("--dbt_core_integration_id", required=True, help="DBT Core Integration ID")
@click.option("--manifest-path", required=True, prompt="Manifest Path", help="Path to the manifest file.")
@click.option("--backend-url", required=False, prompt="Altimate's Backend URL", help="Altimate's Backend URL")
@click.option("--backend-url", required=False, default="https://api.myaltimate.com", help="Altimate's Backend URL")
def onboard(token, instance_name, dbt_core_integration_id, manifest_path, backend_url="https://api.myaltimate.com", env=None):
"""Onboard a manifest file to DBT."""
check_token_and_instance(token, instance_name)
Expand All @@ -94,3 +96,51 @@ def onboard(token, instance_name, dbt_core_integration_id, manifest_path, backen
click.echo("Manifest onboarded successfully!")
else:
click.echo(f"{response['message']}")


@click.group()
def dbt():
"""DBT specific commands."""


@dbt.command("sql-insights")
@click.option("--adapter", required=True, help="The adapter to use for the DBT project.")
@click.option("--manifest-path", required=True, help="Path to the DBT manifest file")
@click.option("--catalog-path", required=False, help="Path to the DBT catalog file")
@click.option("--config-path", required=False, help="Path to the DBT config file")
@click.option("--token", help="Your API token for authentication.")
@click.option("--instance-name", help="Your tenant ID.")
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
def sql_insights(
adapter, manifest_path, catalog_path, config_path=None, token=None, instance_name=None, backend_url="https://api.myaltimate.com"
):
"""
Validate the DBT project's configuration and structure.
:param manifest_path: Path to the DBT manifest file.
"""
config = None
if config_path:
config = load_config(config_path)

check_token_and_instance(token, instance_name)

if not validate_credentials(token, backend_url, instance_name):
click.echo("Error: Invalid credentials.")
return

api_client = APIClient(api_token=token, base_url=backend_url, tenant=instance_name)

insight_generator = DBTSqlInsightGenerator(
manifest_path=manifest_path, catalog_path=catalog_path, adapter=adapter, config=config, api_client=api_client
)
reports = insight_generator.run()
model_report = generate_model_insights_table(reports)
if len(model_report) > 0:
click.echo("--" * 50)
click.echo("Model Insights")
click.echo("--" * 50)
for model_id, report in model_report.items():
click.echo(f"Model: {model_id}")
click.echo(f"File path: {report['path']}")
click.echo(tabulate_data(report["table"], headers="keys"))
click.echo("\n")
13 changes: 13 additions & 0 deletions src/datapilot/core/platforms/dbt/wrappers/catalog/v1/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,16 @@ def get_schema(self):
column_name: column_node.type for column_name, column_node in catalog_source_node.columns.items()
}
return nodes_with_schemas

def get_columns(self):
# Combine nodes and sources into a single dictionary for iteration
combined_catalog_items = {**self.catalog.nodes, **self.catalog.sources}

nodes_with_columns = {}
for item_id, catalog_item in combined_catalog_items.items():
# Initialize an empty list for each item to store its columns
nodes_with_columns[item_id] = []
for name, column_node in catalog_item.columns.items():
# Append the column details to the corresponding item
nodes_with_columns[item_id].append({"name": name, "data_type": column_node.type, "description": ""})
return nodes_with_columns
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def _get_node(self, node: ManifestNode) -> AltimateManifestNode:
compiled = node.compiled
raw_code = node.raw_code
language = node.language
compiled_code = node.compiled_code if node.compiled_code else None
contract = AltimateDBTContract(**node.contract.__dict__) if node.contract else None

return AltimateManifestNode(
Expand Down
Empty file.
104 changes: 104 additions & 0 deletions src/datapilot/core/platforms/sql/executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import logging

# from src.utils.formatting.utils import generate_model_insights_table
from typing import Dict
from typing import Optional

from configtree.tree import Tree
from requests import Response

from datapilot.clients.altimate.client import APIClient
from datapilot.core.platforms.dbt.factory import DBTFactory
from datapilot.core.platforms.dbt.insights.schema import DBTModelInsightResponse
from datapilot.core.platforms.dbt.utils import load_catalog
from datapilot.core.platforms.dbt.utils import load_manifest


class DBTSqlInsightGenerator:
def __init__(
self,
manifest_path: str,
adapter: str,
catalog_path: Optional[str] = None,
run_results_path: Optional[str] = None,
env: Optional[str] = None,
config: Optional[Tree] = None,
target: str = "dev",
api_client: APIClient = None,
):
self.manifest_path = manifest_path
self.catalog_path = catalog_path
self.run_results_path = run_results_path
self.target = target
self.env = env
self.adapter = adapter
self.config = config or Tree()
manifest = load_manifest(self.manifest_path)

self.manifest_wrapper = DBTFactory.get_manifest_wrapper(manifest)
self.manifest_present = True
self.catalog_present = False

if catalog_path:
catalog = load_catalog(self.catalog_path)
self.catalog_wrapper = DBTFactory.get_catalog_wrapper(catalog)
self.catalog_present = True

self.run_results_present = False
self.logger = logging.getLogger("dbt-insight-generator")

self.nodes = self.manifest_wrapper.get_nodes()
self.sources = self.manifest_wrapper.get_sources()
self.exposures = self.manifest_wrapper.get_exposures()
self.children_map = self.manifest_wrapper.parent_to_child_map(self.nodes)
self.tests = self.manifest_wrapper.get_tests()
self.project_name = self.manifest_wrapper.get_package()
self.api_client = api_client

def _build_request(self):
request = {
"models": [],
"config": dict(self.config) if self.config else {},
"adapter": self.adapter,
}
request["config"] = dict(self.config)
request["adapter"] = self.adapter
for node_id, node in self.nodes.items():
model = {}
model["uniqueId"] = node_id
model["name"] = node.name
model["alias"] = node.alias
model["database"] = node.database
model["schema_name"] = node.schema_name
model["package_name"] = node.package_name
model["path"] = node.path
model["original_file_path"] = node.original_file_path
model["columns"] = []
if self.catalog_present:
model["columns"] = self.catalog_wrapper.get_columns(node_id)
request["models"].append(
{
"compiled_sql": node.compiled_code,
"model_node": model,
}
)
return request

def _parse_response(self, response: Dict[str, Dict]) -> Dict[str, DBTModelInsightResponse]:
model_insights = {}
for model_id, insights in response.items():
model_insights[model_id] = [DBTModelInsightResponse(**insight) for insight in insights]
return model_insights

def run(self):
reports = {}

request = self._build_request()

response: Response = self.api_client.sql_insights(data=request)

if response.status_code == 200:
return self._parse_response(response.json())
else:
self.logger.error(f"Error running SQL insights: {response.status_code}, {response.text}. Skipping SQL insights.")
return reports
1 change: 1 addition & 0 deletions src/datapilot/schemas/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ class Dialect(Enum):
MYSQL = "mysql"
BIGQUERY = "bigquery"
OTHERS = "others"
DATABRICKS = "databricks"
Loading
Loading