Skip to content

Commit

Permalink
Merge pull request #192 from nicholasyager/feature/jinja_block_support
Browse files Browse the repository at this point in the history
Feature: Block-level copying of Jinja content between split projects
  • Loading branch information
nicholasyager authored Jan 2, 2024
2 parents 207c56a + 0163c70 commit 207be4a
Show file tree
Hide file tree
Showing 14 changed files with 275 additions and 16 deletions.
2 changes: 2 additions & 0 deletions dbt_meshify/change.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ class Operation(str, Enum):
"""An operation describes the type of work being performed."""

Add = "add"
Append = "append"
Update = "update"
Remove = "remove"
Copy = "copy"
Expand All @@ -17,6 +18,7 @@ class Operation(str, Enum):

prepositions = {
Operation.Add: "to",
Operation.Append: "to",
Operation.Move: "to",
Operation.Copy: "to",
Operation.Update: "in",
Expand Down
60 changes: 60 additions & 0 deletions dbt_meshify/dbt_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from dbt_meshify.dbt import Dbt
from dbt_meshify.exceptions import FatalMeshifyException
from dbt_meshify.storage.jinja_blocks import JinjaBlock, find_doc_reference


class BaseDbtProject:
Expand Down Expand Up @@ -310,6 +311,30 @@ def __init__(
resources = self.select_resources(output_key="unique_id")

super().__init__(manifest, project, catalog, name, resources)
self.jinja_blocks: Dict[str, JinjaBlock] = self.find_jinja_blocks()

def find_jinja_blocks(self) -> Dict[str, JinjaBlock]:
"""For a given dbt Project, find all Jinja blocks for docs and macros"""

blocks = {}

for unique_id, item in self.manifest.docs.items():
if item.package_name != self.name:
continue

blocks[unique_id] = JinjaBlock.from_file(
path=self.path / item.original_file_path, block_type="docs", name=item.name
)

for unique_id, macro in self.manifest.macros.items():
if macro.package_name != self.name:
continue

blocks[unique_id] = JinjaBlock.from_file(
path=self.path / macro.original_file_path, block_type="macro", name=macro.name
)

return blocks

def select_resources(
self,
Expand Down Expand Up @@ -396,6 +421,8 @@ def __init__(
self.groups = self._get_indirect_groups()
self._rename_project()

self._referenced_docs: Optional[Set[str]] = None

def _rename_project(self) -> None:
"""
edits the project yml to take any instance of the parent project name and update it to the subproject name
Expand Down Expand Up @@ -432,6 +459,39 @@ def _get_custom_macros(self) -> Set[str]:
macros_set.update(self._get_macro_dependencies(macro))
return macros_set

@property
def referenced_docs(self) -> Set[str]:
"""Return a list of all docs referenced within this SubProject."""

if self._referenced_docs:
return self._referenced_docs

docs = set()
for unique_id in self.resources:
if unique_id.startswith("test."):
continue

node = self.get_manifest_node(unique_id)

if node is None:
raise Exception(f"Unable to find referenced node {node}")

if hasattr(node, "raw_code"):
docs.update(find_doc_reference(node.raw_code))

if hasattr(node, "patch_path"):
path = self.parent_project.resolve_patch_path(node)
if path.exists():
with open(path) as file:
docs.update(find_doc_reference(file.read()))

# Use the search name for the doc to resolve a unique_id for the doc resource.
self._referenced_docs = {
unique_id for unique_id, doc in self.manifest.docs.items() if doc.name in docs
}

return self._referenced_docs

def _get_indirect_groups(self) -> Set[str]:
"""
get a set of group unique_ids for all the selected resources
Expand Down
3 changes: 2 additions & 1 deletion dbt_meshify/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,8 @@ def split(
change_set = subproject_creator.initialize()

return [change_set]
except Exception:
except Exception as e:
logger.exception(e) # TODO: Remove this line!
raise FatalMeshifyException(f"Error creating subproject {subproject.name}")


Expand Down
33 changes: 30 additions & 3 deletions dbt_meshify/storage/dbt_project_editors.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from dbt_meshify.dbt_projects import DbtSubProject
from dbt_meshify.storage.file_content_editors import NamedList, filter_empty_dict_items
from dbt_meshify.storage.file_manager import YAMLFileManager, yaml
from dbt_meshify.storage.jinja_blocks import JinjaBlock
from dbt_meshify.utilities.contractor import Contractor
from dbt_meshify.utilities.dependencies import DependenciesUpdater
from dbt_meshify.utilities.grouper import ResourceGrouper
Expand Down Expand Up @@ -141,7 +142,12 @@ def initialize(self) -> ChangeSet:
f"Identifying operations required to split {subproject.name} from {subproject.parent_project.name}."
)

for unique_id in subproject.resources | subproject.custom_macros | subproject.groups:
for unique_id in (
subproject.resources
| subproject.custom_macros
| subproject.groups
| subproject.referenced_docs
):
resource = subproject.get_manifest_node(unique_id)
if not resource:
raise KeyError(f"Resource {unique_id} not found in manifest")
Expand Down Expand Up @@ -182,10 +188,20 @@ def initialize(self) -> ChangeSet:
):
change_set.extend(reference_updater.update_parent_refs(resource))

elif resource.resource_type in ["macro", "group"]:
elif resource.resource_type in ["macro", "group", "doc"]:
if hasattr(resource, "patch_path") and resource.patch_path:
change_set.add(self.copy_resource_yml(resource))
change_set.add(self.copy_resource(resource))

if resource.unique_id in self.subproject.parent_project.jinja_blocks:
change_set.add(
self.copy_jinja_block(
resource,
self.subproject.parent_project.jinja_blocks[resource.unique_id],
)
)

else:
change_set.add(self.copy_resource(resource))

else:
logger.debug(
Expand Down Expand Up @@ -237,6 +253,17 @@ def move_resource(self, resource: Resource) -> FileChange:
source=self.subproject.parent_project.resolve_file_path(resource),
)

def copy_jinja_block(self, resource: Resource, jinja_block: JinjaBlock) -> FileChange:
"""Move an existing jinja block to a new project"""

return FileChange(
operation=Operation.Append,
entity_type=EntityType.Code,
identifier=resource.name,
path=self.subproject.resolve_file_path(resource),
data=jinja_block.content,
)

def copy_resource(self, resource: Resource) -> FileChange:
"""
Copy a resource file from one project to another
Expand Down
12 changes: 12 additions & 0 deletions dbt_meshify/storage/file_content_editors.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,18 @@ def add(change: FileChange):

RawFileManager.write_file(path=change.path, content=change.data)

@staticmethod
def append(change: FileChange):
"""Append data to an existing file."""

if not change.path.parent.exists():
change.path.parent.mkdir(parents=True, exist_ok=True)

if change.data is None:
return RawFileManager.touch_file(change.path)

RawFileManager.append_file(path=change.path, content=change.data)

@staticmethod
def update(change: FileChange):
"""Update data to a new file."""
Expand Down
6 changes: 6 additions & 0 deletions dbt_meshify/storage/file_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ def write_file(path: Path, content: str) -> None:
"""Write a string value to a file in the filesystem"""
path.write_text(content)

@staticmethod
def append_file(path: Path, content: str) -> None:
"""Append a string value to a file in the filesystem"""
with open(path, "a") as file:
file.write(content)

@staticmethod
def copy_file(source_path: Path, target_path: Path) -> None:
if not target_path.parent.exists():
Expand Down
75 changes: 75 additions & 0 deletions dbt_meshify/storage/jinja_blocks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Set, Tuple


@dataclass
class JinjaBlock:
"""
A data structure for tracking Jinja blocks of text. Includes the start and end character positions, and the content of the block
"""

path: Path
block_type: str
name: str
start: int
end: int
content: str

@staticmethod
def find_block_range(file_content: str, block_type: str, name: str) -> Tuple[int, int]:
"""Find the line number that a block started."""
start_line = None
end_line = None

for match in re.finditer(
r"{%-?\s+" + block_type + r"\s+" + name + r"([(a-zA-Z0-9=,_ )]*)\s-?%}",
file_content,
re.MULTILINE,
):
start = match.span()[0] # .span() gives tuple (start, end)
start_line = start # file_content[:start].count("\n")
break

if start_line is None:
raise Exception(f"Unable to find a {block_type} block with the name {name}.")

for match in re.finditer(
r"{%-?\s+end" + block_type + r"\s+-?%}", file_content, re.MULTILINE
):
end = match.span()[1] # .span() gives tuple (start, end)
new_end_line = end # file_content[:start].count("\n")

if new_end_line >= start_line:
end_line = new_end_line
break

if end_line is None:
raise Exception(f"Unable to find a the closing end{block_type} block for {name}.")

return start_line, end_line

@staticmethod
def isolate_content(file_content: str, start: int, end: int) -> str:
"""Given content, a start position, and an end position, return the content of a Jinja block."""
return file_content[start:end]

@classmethod
def from_file(cls, path: Path, block_type: str, name: str) -> "JinjaBlock":
"""Find a specific Jinja block within a file, based on the block type and the name."""

file_content = path.read_text()
start, end = cls.find_block_range(file_content, block_type, name)
content = cls.isolate_content(file_content=file_content, start=start, end=end)

return cls(
path=path, block_type=block_type, name=name, start=start, end=end, content=content
)


def find_doc_reference(content: str) -> Set[str]:
"""Find all doc block references within a string."""
matches = re.findall(r"{{\sdoc\(\'?\"?([a-zA-Z0-9_\-\.]+)\'?\"?\)\s}}", content)

return set(matches)
4 changes: 3 additions & 1 deletion test-projects/split/split_proj/macros/_macros.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
macros:
- name: cents_to_dollars
description: Converts cents to dollars
description: Converts cents to dollars
- name: dollars_to_cents
description: Converts dollars to cents
5 changes: 5 additions & 0 deletions test-projects/split/split_proj/macros/cents_to_dollars.sql
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,8 @@
{% macro cents_to_dollars(column_name, precision=2) -%}
({{ column_name }} / 100)::{{ type_numeric() }}(16, {{ precision }})
{%- endmacro %}


{% macro dollars_to_cents(column_name) -%}
({{ column_name }} * 100)::{{ type_numeric() }}(16, 0)
{%- endmacro %}
3 changes: 3 additions & 0 deletions test-projects/split/split_proj/models/docs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{% docs customer_id %}
The unique key for each customer.
{% enddocs %}
26 changes: 16 additions & 10 deletions test-projects/split/split_proj/models/marts/__models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ version: 2

models:
- name: customers
description: Customer overview data mart, offering key details for each unique
description:
Customer overview data mart, offering key details for each unique
customer. One row per customer.
columns:
- name: customer_id
description: The unique key of the orders mart.
description: "{{ doc('customer_id') }}"
tests:
- not_null
- unique
Expand All @@ -19,20 +20,24 @@ models:
- name: last_ordered_at
description: The timestamp of a customer's most recent order.
- name: lifetime_spend_pretax
description: The sum of all the pre-tax subtotals of every order a customer
description:
The sum of all the pre-tax subtotals of every order a customer
has placed.
- name: lifetime_spend
description: The sum of all the order totals (including tax) that a customer
description:
The sum of all the order totals (including tax) that a customer
has ever placed.
- name: customer_type
description: Options are 'new' or 'returning', indicating if a customer has
description:
Options are 'new' or 'returning', indicating if a customer has
ordered more than once or has only placed their first order to date.
tests:
- accepted_values:
values: [new, returning]

- name: orders
description: Order overview data mart, offering key details for each order inlcluding
description:
Order overview data mart, offering key details for each order inlcluding
if it's a customer's first order and a food vs. drink item breakdown. One row
per order.
tests:
Expand All @@ -53,7 +58,8 @@ models:
to: ref('stg_customers')
field: customer_id
- name: location_id
description: The foreign key relating to the location the order was placed
description:
The foreign key relating to the location the order was placed
at.
- name: order_total
description: The total amount of the order in USD including tax.
Expand All @@ -74,19 +80,19 @@ models:
- name: order_cost
description: The sum of supply expenses to fulfill the order.
- name: location_name
description: The full location name of where this order was placed. Denormalized
description:
The full location name of where this order was placed. Denormalized
from `stg_locations`.
- name: is_food_order
description: A boolean indicating if this order included any food items.
- name: is_drink_order
description: A boolean indicating if this order included any drink items.


- name: leaf_node
description: A leaf node model that is not referenced by any other model.
columns:
- name: order_id
description: The unique key of the leaf node.
tests:
- not_null
- unique
- unique
2 changes: 1 addition & 1 deletion test-projects/split/split_proj/models/staging/__models.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ models:
description: Customer data with basic cleaning and transformation applied, one row per customer.
columns:
- name: customer_id
description: The unique key for each customer.
description: "{{ doc('customer_id') }}"
tests:
- not_null
- unique
Expand Down
Loading

0 comments on commit 207be4a

Please sign in to comment.