Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #7551: Create add_from_artifact to populate state_relation field of nodes #7796

Merged
merged 6 commits into from
Jun 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20230605-234706.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Create `add_from_artifact` to populate `state_relation` field of nodes
time: 2023-06-05T23:47:06.581326-07:00
custom:
Author: stu-k aranke
Issue: "7551"
18 changes: 18 additions & 0 deletions core/dbt/contracts/graph/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
BaseNode,
ManifestOrPublicNode,
ModelNode,
RelationalNode,
)
from dbt.contracts.graph.unparsed import SourcePatch, NodeVersion, UnparsedVersion
from dbt.contracts.graph.manifest_upgrade import upgrade_manifest_json
Expand Down Expand Up @@ -1143,6 +1144,23 @@ def merge_from_artifact(
sample = list(islice(merged, 5))
fire_event(MergedFromState(num_merged=len(merged), sample=sample))

# Called by CloneTask.defer_to_manifest
def add_from_artifact(
self,
other: "WritableManifest",
) -> None:
"""Update this manifest by *adding* information about each node's location
in the other manifest.

Only non-ephemeral refable nodes are examined.
"""
refables = set(NodeType.refable())
for unique_id, node in other.nodes.items():
current = self.nodes.get(unique_id)
if current and (node.resource_type in refables and not node.is_ephemeral):
state_relation = RelationalNode(node.database, node.schema, node.alias)
self.nodes[unique_id] = current.replace(state_relation=state_relation)

# Methods that were formerly in ParseResult

def add_macro(self, source_file: SourceFile, macro: Macro):
Expand Down
9 changes: 9 additions & 0 deletions core/dbt/contracts/graph/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,15 @@ def add_macro(self, value: str):
self.macros.append(value)


@dataclass
class RelationalNode(HasRelationMetadata):
alias: str

@property
def identifier(self):
return self.alias


@dataclass
class DependsOn(MacroDependsOn):
nodes: List[str] = field(default_factory=list)
Expand Down
82 changes: 55 additions & 27 deletions test/unit/test_manifest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import os
import unittest
from unittest import mock

from argparse import Namespace
import copy
from collections import namedtuple
from itertools import product
from copy import deepcopy
from datetime import datetime
from itertools import product
from unittest import mock

import freezegun
import pytest

import dbt.flags
Expand All @@ -27,22 +27,17 @@
Group,
RefArgs,
)

from dbt.contracts.graph.unparsed import (
ExposureType,
Owner,
MaturityType,
MetricFilter,
MetricTime,
)

from dbt.events.functions import reset_metadata_vars
from dbt.exceptions import AmbiguousResourceNameRefError
from dbt.flags import set_from_args

from dbt.node_types import NodeType
import freezegun

from .utils import (
MockMacro,
MockDocumentation,
Expand All @@ -53,7 +48,6 @@
inject_plugin,
)


REQUIRED_PARSED_NODE_KEYS = frozenset(
{
"alias",
Expand Down Expand Up @@ -103,7 +97,6 @@
| {"compiled", "extra_ctes_injected", "extra_ctes", "compiled_code", "relation_name"}
)


ENV_KEY_NAME = "KEY" if os.name == "nt" else "key"


Expand Down Expand Up @@ -365,7 +358,7 @@ def tearDown(self):
reset_metadata_vars()

@freezegun.freeze_time("2018-02-14T09:15:13Z")
def test__no_nodes(self):
def test_no_nodes(self):
manifest = Manifest(
nodes={},
sources={},
Expand Down Expand Up @@ -407,8 +400,8 @@ def test__no_nodes(self):
)

@freezegun.freeze_time("2018-02-14T09:15:13Z")
def test__nested_nodes(self):
nodes = copy.copy(self.nested_nodes)
def test_nested_nodes(self):
nodes = deepcopy(self.nested_nodes)
aranke marked this conversation as resolved.
Show resolved Hide resolved
manifest = Manifest(
nodes=nodes,
sources={},
Expand Down Expand Up @@ -462,12 +455,12 @@ def test__nested_nodes(self):
)
self.assertEqual(child_map["model.snowplow.events"], [])

def test__build_flat_graph(self):
exposures = copy.copy(self.exposures)
metrics = copy.copy(self.metrics)
groups = copy.copy(self.groups)
nodes = copy.copy(self.nested_nodes)
sources = copy.copy(self.sources)
def test_build_flat_graph(self):
exposures = deepcopy(self.exposures)
metrics = deepcopy(self.metrics)
groups = deepcopy(self.groups)
nodes = deepcopy(self.nested_nodes)
sources = deepcopy(self.sources)
manifest = Manifest(
nodes=nodes,
sources=sources,
Expand Down Expand Up @@ -588,7 +581,7 @@ def test_get_resource_fqns_empty(self):
self.assertEqual(manifest.get_resource_fqns(), {})

def test_get_resource_fqns(self):
nodes = copy.copy(self.nested_nodes)
nodes = deepcopy(self.nested_nodes)
nodes["seed.root.seed"] = SeedNode(
name="seed",
database="dbt",
Expand Down Expand Up @@ -634,7 +627,7 @@ def test_get_resource_fqns(self):
resource_fqns = manifest.get_resource_fqns()
self.assertEqual(resource_fqns, expect)

def test__deepcopy_copies_flat_graph(self):
def test_deepcopy_copies_flat_graph(self):
test_node = ModelNode(
name="events",
database="dbt",
Expand Down Expand Up @@ -663,6 +656,41 @@ def test__deepcopy_copies_flat_graph(self):
copy = original.deepcopy()
self.assertEqual(original.flat_graph, copy.flat_graph)

def test_add_from_artifact(self):
original_nodes = deepcopy(self.nested_nodes)
other_nodes = deepcopy(self.nested_nodes)

nested2 = other_nodes.pop("model.root.nested")
nested2.name = "nested2"
nested2.alias = "nested2"
nested2.fqn = ["root", "nested2"]

other_nodes["model.root.nested2"] = nested2

for k, v in other_nodes.items():
v.database = "other_" + v.database
v.schema = "other_" + v.schema
v.alias = "other_" + v.alias

other_nodes[k] = v

original_manifest = Manifest(nodes=original_nodes)
other_manifest = Manifest(nodes=other_nodes)
original_manifest.add_from_artifact(other_manifest.writable_manifest())

# new node added should not be in original manifest
assert "model.root.nested2" not in original_manifest.nodes

# old node removed should not have state relation in original manifest
assert original_manifest.nodes["model.root.nested"].state_relation is None

# for all other nodes, check that state relation is updated
for k, v in original_manifest.nodes.items():
if k != "model.root.nested":
self.assertEqual("other_" + v.database, v.state_relation.database)
self.assertEqual("other_" + v.schema, v.state_relation.schema)
self.assertEqual("other_" + v.alias, v.state_relation.alias)


class MixedManifestTest(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -869,7 +897,7 @@ def tearDown(self):
del os.environ["DBT_ENV_CUSTOM_ENV_key"]

@freezegun.freeze_time("2018-02-14T09:15:13Z")
def test__no_nodes(self):
def test_no_nodes(self):
metadata = ManifestMetadata(
generated_at=datetime.utcnow(), invocation_id="01234567-0123-0123-0123-0123456789ab"
)
Expand Down Expand Up @@ -911,8 +939,8 @@ def test__no_nodes(self):
)

@freezegun.freeze_time("2018-02-14T09:15:13Z")
def test__nested_nodes(self):
nodes = copy.copy(self.nested_nodes)
def test_nested_nodes(self):
nodes = deepcopy(self.nested_nodes)
manifest = Manifest(
nodes=nodes,
sources={},
Expand Down Expand Up @@ -963,8 +991,8 @@ def test__nested_nodes(self):
)
self.assertEqual(child_map["model.snowplow.events"], [])

def test__build_flat_graph(self):
nodes = copy.copy(self.nested_nodes)
def test_build_flat_graph(self):
nodes = deepcopy(self.nested_nodes)
manifest = Manifest(
nodes=nodes,
sources={},
Expand Down