From 8aaf6eebb523c206beae4659928659afba52edc5 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 31 Oct 2023 12:16:40 -0500 Subject: [PATCH 1/3] WIP --- core/dbt/config/project.py | 2 +- core/dbt/contracts/files.py | 1 + core/dbt/contracts/project.py | 1 + core/dbt/parser/partial.py | 10 +++++++--- core/dbt/parser/schemas.py | 8 +++++--- core/dbt/parser/unit_tests.py | 7 +++---- tests/functional/unit_testing/test_unit_testing.py | 4 ++-- tests/unit/test_unit_test_parser.py | 14 +++++++------- 8 files changed, 27 insertions(+), 20 deletions(-) diff --git a/core/dbt/config/project.py b/core/dbt/config/project.py index adb8a29b325..71b71f720ff 100644 --- a/core/dbt/config/project.py +++ b/core/dbt/config/project.py @@ -697,7 +697,7 @@ def to_project_config(self, with_packages=False): "snapshots": self.snapshots, "sources": self.sources, "tests": self.tests, - "unit_tests": self.unit_tests, + "unit-tests": self.unit_tests, "metrics": self.metrics, "semantic-models": self.semantic_models, "saved-queries": self.saved_queries, diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index e85a8d91b5e..367517f8345 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -287,6 +287,7 @@ def remove_tests(self, yaml_key, name): # this is only used in tests (unit + functional) def get_tests(self, yaml_key, name): + breakpoint() if yaml_key in self.tests: if name in self.tests[yaml_key]: return self.tests[yaml_key][name] diff --git a/core/dbt/contracts/project.py b/core/dbt/contracts/project.py index 5e0d83e9011..de3e3262aaa 100644 --- a/core/dbt/contracts/project.py +++ b/core/dbt/contracts/project.py @@ -256,6 +256,7 @@ class Config(dbtMashConfig): "semantic_models": "semantic-models", "saved_queries": "saved-queries", "dbt_cloud": "dbt-cloud", + "unit_tests": "unit-tests", } @classmethod diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index 530a6d86489..9e335e0a618 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -98,6 +98,7 @@ def skip_parsing(self): # Compare the previously saved manifest files and the just-loaded manifest # files to see if anything changed def build_file_diff(self): + breakpoint() saved_file_ids = set(self.saved_files.keys()) new_file_ids = set(self.new_files.keys()) deleted_all_files = saved_file_ids.difference(new_file_ids) @@ -582,6 +583,7 @@ def delete_doc_node(self, source_file): # Schema files ----------------------- # Changed schema files def change_schema_file(self, file_id): + breakpoint() saved_schema_file = self.saved_files[file_id] new_schema_file = deepcopy(self.new_files[file_id]) saved_yaml_dict = saved_schema_file.dict_from_yaml @@ -681,7 +683,8 @@ def handle_change(key: str, delete: Callable): handle_change("metrics", self.delete_schema_metric) handle_change("groups", self.delete_schema_group) handle_change("semantic_models", self.delete_schema_semantic_model) - handle_change("unit", self.delete_schema_unit_test) + breakpoint() + handle_change("unit_tests", self.delete_schema_unit_test) handle_change("saved_queries", self.delete_schema_saved_query) def _handle_element_change( @@ -711,7 +714,8 @@ def _handle_element_change( # Take a "section" of the schema file yaml dictionary from saved and new schema files # and determine which parts have changed def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict): - dict_name = "model" if key == "unit" else "name" + # breakpoint() + dict_name = "model" if key == "unit_tests" else "name" if key in saved_yaml_dict or key in new_yaml_dict: saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else [] new_elements = new_yaml_dict[key] if key in new_yaml_dict else [] @@ -754,7 +758,7 @@ def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict): # flag indicates that we're processing a schema file, so if a matching # patch has already been scheduled, replace it. def merge_patch(self, schema_file, key, patch, new_patch=False): - elem_name = "model" if key == "unit" else "name" + elem_name = "model" if key == "unit_tests" else "name" if schema_file.pp_dict is None: schema_file.pp_dict = {} pp_dict = schema_file.pp_dict diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index ae71dc69f49..3916e197a20 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -231,7 +231,7 @@ def parse_file(self, block: FileBlock, dct: Optional[Dict] = None) -> None: semantic_model_parser = SemanticModelParser(self, yaml_block) semantic_model_parser.parse() - if "unit" in dct: + if "unit_tests" in dct: from dbt.parser.unit_tests import UnitTestParser unit_test_parser = UnitTestParser(self, yaml_block) @@ -262,12 +262,13 @@ class ParseResult: # abstract base class (ABCMeta) -# Four subclasses: MetricParser, ExposureParser, GroupParser, SourceParser, PatchParser +# Many subclasses: MetricParser, ExposureParser, GroupParser, SourceParser, +# PatchParser, SemanticModelParser, SavedQueryParser, UnitTestParser class YamlReader(metaclass=ABCMeta): def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock, key: str) -> None: self.schema_parser = schema_parser # key: models, seeds, snapshots, sources, macros, - # analyses, exposures + # analyses, exposures, unit_tests self.key = key self.yaml = yaml self.schema_yaml_vars = SchemaYamlVars() @@ -298,6 +299,7 @@ def root_project(self): # get the list of dicts pointed to by the key in the yaml config, # ensure that the dicts have string keys def get_key_dicts(self) -> Iterable[Dict[str, Any]]: + breakpoint() data = self.yaml.data.get(self.key, []) if not isinstance(data, list): raise ParsingError( diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index 98cd08bb4b6..4865f985485 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -193,20 +193,19 @@ def _get_original_input_node(self, input: str, tested_node: ModelNode): class UnitTestParser(YamlReader): def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock) -> None: - super().__init__(schema_parser, yaml, "unit") + super().__init__(schema_parser, yaml, "unit_tests") self.schema_parser = schema_parser self.yaml = yaml def parse(self) -> ParseResult: + breakpoint() for data in self.get_key_dicts(): unit_test_suite = self._get_unit_test_suite(data) model_name_split = unit_test_suite.model.split() tested_model_node = self._find_tested_model_node(unit_test_suite) for test in unit_test_suite.tests: - unit_test_case_unique_id = ( - f"unit.{self.project.project_name}.{unit_test_suite.model}.{test.name}" - ) + unit_test_case_unique_id = f"{NodeType.Unit}.{self.project.project_name}.{unit_test_suite.model}.{test.name}" unit_test_fqn = [self.project.project_name] + model_name_split + [test.name] unit_test_config = self._build_unit_test_config(unit_test_fqn, test.config) diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index d520ebc1543..fd79ae78533 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -34,7 +34,7 @@ """ test_my_model_yml = """ -unit: +unit-tests: - model: my_model tests: - name: test_my_model @@ -180,7 +180,7 @@ def test_basic(self, project): test_my_model_csv_yml = """ -unit: +unit-tests: - model: my_model tests: - name: test_my_model diff --git a/tests/unit/test_unit_test_parser.py b/tests/unit/test_unit_test_parser.py index e58bf869814..5faa5598de8 100644 --- a/tests/unit/test_unit_test_parser.py +++ b/tests/unit/test_unit_test_parser.py @@ -11,7 +11,7 @@ UNIT_TEST_MODEL_NOT_FOUND_SOURCE = """ -unit: +unit-tests: - model: my_model_doesnt_exist tests: - name: test_my_model_doesnt_exist @@ -24,7 +24,7 @@ UNIT_TEST_SOURCE = """ -unit: +unit-tests: - model: my_model tests: - name: test_my_model @@ -37,7 +37,7 @@ UNIT_TEST_VERSIONED_MODEL_SOURCE = """ -unit: +unit-tests: - model: my_model_versioned.v1 tests: - name: test_my_model_versioned @@ -50,7 +50,7 @@ UNIT_TEST_CONFIG_SOURCE = """ -unit: +unit-tests: - model: my_model tests: - name: test_my_model @@ -68,7 +68,7 @@ UNIT_TEST_MULTIPLE_SOURCE = """ -unit: +unit-tests: - model: my_model tests: - name: test_my_model @@ -105,7 +105,7 @@ def setUp(self): ) def file_block_for(self, data, filename): - return super().file_block_for(data, filename, "unit") + return super().file_block_for(data, filename, "unit-tests") def test_basic_model_not_found(self): block = self.yaml_block_for(UNIT_TEST_MODEL_NOT_FOUND_SOURCE, "test_my_model.yml") @@ -127,7 +127,7 @@ def test_basic(self): package_name="snowplow", path=block.path.relative_path, original_file_path=block.path.original_file_path, - unique_id="unit.snowplow.my_model.test_my_model", + unique_id="unit_tests.snowplow.my_model.test_my_model", given=[], expect=UnitTestOutputFixture(rows=[{"a": 1}]), description="unit test description", From 374c61fa5d2ba120bac3beb41dcfb46f3be337d2 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 2 Nov 2023 11:47:53 -0500 Subject: [PATCH 2/3] remove breakpoint --- core/dbt/contracts/files.py | 1 - core/dbt/parser/partial.py | 4 ---- core/dbt/parser/schemas.py | 1 - core/dbt/parser/unit_tests.py | 1 - 4 files changed, 7 deletions(-) diff --git a/core/dbt/contracts/files.py b/core/dbt/contracts/files.py index 367517f8345..e85a8d91b5e 100644 --- a/core/dbt/contracts/files.py +++ b/core/dbt/contracts/files.py @@ -287,7 +287,6 @@ def remove_tests(self, yaml_key, name): # this is only used in tests (unit + functional) def get_tests(self, yaml_key, name): - breakpoint() if yaml_key in self.tests: if name in self.tests[yaml_key]: return self.tests[yaml_key][name] diff --git a/core/dbt/parser/partial.py b/core/dbt/parser/partial.py index 9e335e0a618..a91ad4189a4 100644 --- a/core/dbt/parser/partial.py +++ b/core/dbt/parser/partial.py @@ -98,7 +98,6 @@ def skip_parsing(self): # Compare the previously saved manifest files and the just-loaded manifest # files to see if anything changed def build_file_diff(self): - breakpoint() saved_file_ids = set(self.saved_files.keys()) new_file_ids = set(self.new_files.keys()) deleted_all_files = saved_file_ids.difference(new_file_ids) @@ -583,7 +582,6 @@ def delete_doc_node(self, source_file): # Schema files ----------------------- # Changed schema files def change_schema_file(self, file_id): - breakpoint() saved_schema_file = self.saved_files[file_id] new_schema_file = deepcopy(self.new_files[file_id]) saved_yaml_dict = saved_schema_file.dict_from_yaml @@ -683,7 +681,6 @@ def handle_change(key: str, delete: Callable): handle_change("metrics", self.delete_schema_metric) handle_change("groups", self.delete_schema_group) handle_change("semantic_models", self.delete_schema_semantic_model) - breakpoint() handle_change("unit_tests", self.delete_schema_unit_test) handle_change("saved_queries", self.delete_schema_saved_query) @@ -714,7 +711,6 @@ def _handle_element_change( # Take a "section" of the schema file yaml dictionary from saved and new schema files # and determine which parts have changed def get_diff_for(self, key, saved_yaml_dict, new_yaml_dict): - # breakpoint() dict_name = "model" if key == "unit_tests" else "name" if key in saved_yaml_dict or key in new_yaml_dict: saved_elements = saved_yaml_dict[key] if key in saved_yaml_dict else [] diff --git a/core/dbt/parser/schemas.py b/core/dbt/parser/schemas.py index 3916e197a20..f5041c41a08 100644 --- a/core/dbt/parser/schemas.py +++ b/core/dbt/parser/schemas.py @@ -299,7 +299,6 @@ def root_project(self): # get the list of dicts pointed to by the key in the yaml config, # ensure that the dicts have string keys def get_key_dicts(self) -> Iterable[Dict[str, Any]]: - breakpoint() data = self.yaml.data.get(self.key, []) if not isinstance(data, list): raise ParsingError( diff --git a/core/dbt/parser/unit_tests.py b/core/dbt/parser/unit_tests.py index 4865f985485..c27c7e427e1 100644 --- a/core/dbt/parser/unit_tests.py +++ b/core/dbt/parser/unit_tests.py @@ -198,7 +198,6 @@ def __init__(self, schema_parser: SchemaParser, yaml: YamlBlock) -> None: self.yaml = yaml def parse(self) -> ParseResult: - breakpoint() for data in self.get_key_dicts(): unit_test_suite = self._get_unit_test_suite(data) model_name_split = unit_test_suite.model.split() From de98e6151a2e2259341e2c23a8931211e029c330 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Thu, 2 Nov 2023 14:37:14 -0500 Subject: [PATCH 3/3] fix tests, fix schema --- core/dbt/contracts/graph/model_config.py | 1 - .../unit_testing/test_unit_testing.py | 6 +++--- tests/unit/test_unit_test_parser.py | 18 +++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/core/dbt/contracts/graph/model_config.py b/core/dbt/contracts/graph/model_config.py index 659394fbac6..fc5505cc41d 100644 --- a/core/dbt/contracts/graph/model_config.py +++ b/core/dbt/contracts/graph/model_config.py @@ -748,7 +748,6 @@ class UnitTestConfig(BaseConfig): NodeType.Source: SourceConfig, NodeType.Seed: SeedConfig, NodeType.Test: TestConfig, - NodeType.Unit: TestConfig, NodeType.Model: NodeConfig, NodeType.Snapshot: SnapshotConfig, NodeType.Unit: UnitTestConfig, diff --git a/tests/functional/unit_testing/test_unit_testing.py b/tests/functional/unit_testing/test_unit_testing.py index fd79ae78533..747d1fd733b 100644 --- a/tests/functional/unit_testing/test_unit_testing.py +++ b/tests/functional/unit_testing/test_unit_testing.py @@ -34,7 +34,7 @@ """ test_my_model_yml = """ -unit-tests: +unit_tests: - model: my_model tests: - name: test_my_model @@ -180,7 +180,7 @@ def test_basic(self, project): test_my_model_csv_yml = """ -unit-tests: +unit_tests: - model: my_model tests: - name: test_my_model @@ -355,7 +355,7 @@ def test_basic(self, project): """ test_my_model_incremental_yml = """ -unit: +unit_tests: - model: my_incremental_model tests: - name: incremental_false diff --git a/tests/unit/test_unit_test_parser.py b/tests/unit/test_unit_test_parser.py index 5faa5598de8..d80b226e3fe 100644 --- a/tests/unit/test_unit_test_parser.py +++ b/tests/unit/test_unit_test_parser.py @@ -11,7 +11,7 @@ UNIT_TEST_MODEL_NOT_FOUND_SOURCE = """ -unit-tests: +unit_tests: - model: my_model_doesnt_exist tests: - name: test_my_model_doesnt_exist @@ -24,7 +24,7 @@ UNIT_TEST_SOURCE = """ -unit-tests: +unit_tests: - model: my_model tests: - name: test_my_model @@ -37,7 +37,7 @@ UNIT_TEST_VERSIONED_MODEL_SOURCE = """ -unit-tests: +unit_tests: - model: my_model_versioned.v1 tests: - name: test_my_model_versioned @@ -50,7 +50,7 @@ UNIT_TEST_CONFIG_SOURCE = """ -unit-tests: +unit_tests: - model: my_model tests: - name: test_my_model @@ -68,7 +68,7 @@ UNIT_TEST_MULTIPLE_SOURCE = """ -unit-tests: +unit_tests: - model: my_model tests: - name: test_my_model @@ -105,7 +105,7 @@ def setUp(self): ) def file_block_for(self, data, filename): - return super().file_block_for(data, filename, "unit-tests") + return super().file_block_for(data, filename, "unit_tests") def test_basic_model_not_found(self): block = self.yaml_block_for(UNIT_TEST_MODEL_NOT_FOUND_SOURCE, "test_my_model.yml") @@ -127,7 +127,7 @@ def test_basic(self): package_name="snowplow", path=block.path.relative_path, original_file_path=block.path.original_file_path, - unique_id="unit_tests.snowplow.my_model.test_my_model", + unique_id="unit_test.snowplow.my_model.test_my_model", given=[], expect=UnitTestOutputFixture(rows=[{"a": 1}]), description="unit test description", @@ -147,7 +147,7 @@ def test_unit_test_config(self): UnitTestParser(self.parser, block).parse() self.assert_has_manifest_lengths(self.parser.manifest, nodes=1, unit_tests=1) - unit_test = self.parser.manifest.unit_tests["unit.snowplow.my_model.test_my_model"] + unit_test = self.parser.manifest.unit_tests["unit_test.snowplow.my_model.test_my_model"] self.assertEqual(sorted(unit_test.config.tags), sorted(["schema_tag", "project_tag"])) self.assertEqual(unit_test.config.meta, {"meta_key": "meta_value", "meta_jinja_key": "2"}) @@ -168,7 +168,7 @@ def test_unit_test_versioned_model(self): self.assert_has_manifest_lengths(self.parser.manifest, nodes=2, unit_tests=1) unit_test = self.parser.manifest.unit_tests[ - "unit.snowplow.my_model_versioned.v1.test_my_model_versioned" + "unit_test.snowplow.my_model_versioned.v1.test_my_model_versioned" ] self.assertEqual(len(unit_test.depends_on.nodes), 1) self.assertEqual(unit_test.depends_on.nodes[0], "model.snowplow.my_model_versioned.v1")