Skip to content

Commit

Permalink
Release 0.2.11 (#743)
Browse files Browse the repository at this point in the history
# Description

Please describe the change you have made.

## Checklist

- [ ] Tests added/updated.
- [ ] Run Demo Job Locally.
- [ ] Documentation updated.
- [ ] Changelogs updated in
[CHANGELOG.cdf-tk.md](https://github.com/cognitedata/toolkit/blob/main/CHANGELOG.cdf-tk.md).
- [ ] Template changelogs updated in
[CHANGELOG.templates.md](https://github.com/cognitedata/toolkit/blob/main/CHANGELOG.templates.md).
- [ ] Version bumped.

[_version.py](https://github.com/cognitedata/toolkit/blob/main/cognite/cognite_toolkit/_version.py)
and

[pyproject.toml](https://github.com/cognitedata/toolkit/blob/main/pyproject.toml)
per [semantic versioning](https://semver.org/).
  • Loading branch information
doctrino authored Jul 5, 2024
2 parents 8d3ae0d + 47c60e2 commit 538bb97
Show file tree
Hide file tree
Showing 21 changed files with 161 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ celerybeat.pid
*.sage.py

# Environments
.env
*.env
.venv
.local/
env/
Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.cdf-tk.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ Changes are grouped as follows:
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.2.11] - 2024-07-05

### Fixed

- When running `cdf-tk build`, if you had two files non-YAML files named the same in different modules, or subdirectories
in the same module, the Toolkit would overwrite the first file with the second file. This is now fixed.

## [0.2.10] - 2024-07-03

### Fixed
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.templates.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ Changes are grouped as follows:
- `Fixed` for any bug fixes.
- `Security` in case of vulnerabilities.

## [0.2.11] - 2024-07-05

No changes to templates.

## [0.2.10] - 2024-07-03

No changes to templates.
Expand Down
7 changes: 6 additions & 1 deletion cognite_toolkit/_cdf_tk/_migration.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
- version: 0.2.10
- version: 0.2.11
cognite_modules: {}
resources: {}
tool: {}
cognite_modules_hash: ""
- version: 0.2.10
cognite_modules: {}
resources: {}
tool: {}
cognite_modules_hash: "a9440375346d12c63daa0ee0ae7d76b6251503395328f7f04b31e566c64d655a"
- version: 0.2.9
cognite_modules: {}
resources: {}
Expand Down
89 changes: 61 additions & 28 deletions cognite_toolkit/_cdf_tk/commands/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import shutil
import sys
import traceback
from collections import ChainMap, defaultdict
from collections import ChainMap, Counter, defaultdict
from collections.abc import Hashable, Mapping, Sequence
from dataclasses import dataclass, field
from pathlib import Path
Expand All @@ -24,7 +24,7 @@
from cognite_toolkit._cdf_tk.commands._base import ToolkitCommand
from cognite_toolkit._cdf_tk.constants import (
_RUNNING_IN_BROWSER,
EXCL_INDEX_SUFFIX,
INDEX_PATTERN,
ROOT_MODULES,
TEMPLATE_VARS_FILE_SUFFIXES,
)
Expand Down Expand Up @@ -219,9 +219,11 @@ def process_config_files(
for resource_directory_name, directory_files in files_by_resource_directory.items():
build_folder: list[Path] = []
for source_path in directory_files.resource_files:
destination = self._replace_variables_validate_to_build_directory(
source_path, resource_directory_name, state, build_dir, verbose
destination = state.create_destination_path(
source_path, resource_directory_name, module_dir, build_dir
)

self._replace_variables_validate_to_build_directory(source_path, destination, state, verbose)
build_folder.append(destination)

if resource_directory_name == FunctionLoader.folder_name:
Expand All @@ -236,13 +238,16 @@ def process_config_files(
self.copy_files_to_upload_to_build_directory(
file_to_upload=directory_files.other_files,
resource_files_build_folder=build_folder,
state=state,
module_dir=module_dir,
build_dir=build_dir,
verbose=verbose,
)
else:
for source_path in directory_files.other_files:
destination = build_dir / resource_directory_name / source_path.name
destination = state.create_destination_path(
source_path, resource_directory_name, module_dir, build_dir
)
destination.parent.mkdir(parents=True, exist_ok=True)
if (
resource_directory_name == DatapointsLoader.folder_name
Expand Down Expand Up @@ -290,27 +295,26 @@ def _validate_function_directory(
)

def _replace_variables_validate_to_build_directory(
self, source_path: Path, resource_directory: str, state: _BuildState, build_dir: Path, verbose: bool
) -> Path:
self, source_path: Path, destination_path: Path, state: _BuildState, verbose: bool
) -> None:
if verbose:
print(f" [bold green]INFO:[/] Processing {source_path.name}")
destination = build_dir / resource_directory / state.create_file_name(source_path, resource_directory)
destination.parent.mkdir(parents=True, exist_ok=True)

destination_path.parent.mkdir(parents=True, exist_ok=True)

content = source_path.read_text()
state.hash_by_source_path[source_path] = calculate_str_or_file_hash(content)

content = state.replace_variables(content, source_path.suffix)
destination.write_text(content)
state.source_by_build_path[destination] = source_path
destination_path.write_text(content)
state.source_by_build_path[destination_path] = source_path

file_warnings = self.validate(content, source_path, destination, state, verbose)
file_warnings = self.validate(content, source_path, destination_path, state, verbose)
if file_warnings:
self.warning_list.extend(file_warnings)
# Here we do not use the self.warn method as we want to print the warnings as a group.
if self.print_warning:
print(str(file_warnings))
return destination

def _check_missing_dependencies(self, state: _BuildState, project_config_dir: Path) -> None:
existing = {(resource_cls, id_) for resource_cls, ids in state.ids_by_resource_type.items() for id_ in ids}
Expand Down Expand Up @@ -372,11 +376,18 @@ def _is_selected_module(relative_module_dir: Path, selected_modules: list[str |

def _to_files_by_resource_directory(self, filepaths: list[Path], module_dir: Path) -> dict[str, ResourceDirectory]:
# Sort to support 1., 2. etc prefixes
def sort_key(p: Path) -> int:
if result := re.findall(r"^(\d+)", p.stem):
return int(result[0])
def sort_key(p: Path) -> tuple[int, int, str]:
first = {
".yaml": 0,
".yml": 0,
}.get(p.suffix.lower(), 1)
# We ensure that the YAML files are sorted before other files.
# This is when we add indexes to files. We want to ensure that, for example, a .sql file
# with the same name as a .yaml file gets the same index as the .yaml file.
if result := INDEX_PATTERN.search(p.stem):
return first, int(result.group()[:-1]), p.name
else:
return len(filepaths)
return first, len(filepaths) + 1, p.name

# The builder of a module can control the order that resources are deployed by prefixing a number
# The custom key 'sort_key' is to get the sort on integer and not the string.
Expand Down Expand Up @@ -524,6 +535,7 @@ def _read_function_path_by_external_id(
def copy_files_to_upload_to_build_directory(
file_to_upload: list[Path],
resource_files_build_folder: list[Path],
state: _BuildState,
module_dir: Path,
build_dir: Path,
verbose: bool = False,
Expand All @@ -542,8 +554,8 @@ def copy_files_to_upload_to_build_directory(
destination_stem = filepath.stem
if template_name:
destination_stem = template_name.replace(FileMetadataLoader.template_pattern, filepath.stem)
destination = build_dir / FileLoader.folder_name / f"{destination_stem}{filepath.suffix}"
destination.parent.mkdir(parents=True, exist_ok=True)
new_source = filepath.parent / f"{destination_stem}{filepath.suffix}"
destination = state.create_destination_path(new_source, FileLoader.folder_name, module_dir, build_dir)
shutil.copyfile(filepath, destination)

@staticmethod
Expand Down Expand Up @@ -731,7 +743,8 @@ class _BuildState:
variables_by_module_path: dict[str, dict[str, str]] = field(default_factory=dict)
source_by_build_path: dict[Path, Path] = field(default_factory=dict)
hash_by_source_path: dict[Path, str] = field(default_factory=dict)
number_by_resource_type: dict[str, int] = field(default_factory=lambda: defaultdict(int))
index_by_resource_type_counter: Counter[str] = field(default_factory=Counter)
index_by_filepath_stem: dict[Path, int] = field(default_factory=dict)
printed_function_warning: bool = False
ids_by_resource_type: dict[type[ResourceLoader], dict[Hashable, Path]] = field(
default_factory=lambda: defaultdict(dict)
Expand All @@ -749,15 +762,35 @@ def local_variables(self) -> Mapping[str, str]:
def update_local_variables(self, module_dir: Path) -> None:
self._local_variables = _Helpers.create_local_config(self.variables_by_module_path, module_dir)

def create_file_name(self, filepath: Path, resource_directory: str) -> str:
filename = filepath.name
if filepath.suffix in EXCL_INDEX_SUFFIX:
return filename
def create_destination_path(
self, source_path: Path, resource_directory: str, module_dir: Path, build_dir: Path
) -> Path:
"""Creates the filepath in the build directory for the given source path.
Note that this is a complex operation as the modules in the source are nested while the build directory is flat.
This means that we lose information and risk having duplicate filenames. To avoid this, we prefix the filename
with a number to ensure uniqueness.
"""
filename = source_path.name
# Get rid of the local index
filename = re.sub("^[0-9]+\\.", "", filename)
self.number_by_resource_type[resource_directory] += 1
filename = f"{self.number_by_resource_type[resource_directory]}.{filename}"
return filename
filename = INDEX_PATTERN.sub("", filename)

relative_stem = module_dir.name / source_path.relative_to(module_dir).parent / source_path.stem
if relative_stem in self.index_by_filepath_stem:
# Ensure extra files (.sql, .pdf) with the same stem gets the same index as the
# main YAML file. The Transformation Loader expects this.
index = self.index_by_filepath_stem[relative_stem]
else:
# Increment to ensure we do not get duplicate filenames when we flatten the file
# structure from the module to the build directory.
self.index_by_resource_type_counter[resource_directory] += 1
index = self.index_by_resource_type_counter[resource_directory]
self.index_by_filepath_stem[relative_stem] = index

filename = f"{index}.{filename}"
destination_path = build_dir / resource_directory / filename
destination_path.parent.mkdir(parents=True, exist_ok=True)
return destination_path

def replace_variables(self, content: str, file_suffix: str = ".yaml") -> str:
for name, variable in self.local_variables.items():
Expand Down
6 changes: 4 additions & 2 deletions cognite_toolkit/_cdf_tk/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from pathlib import Path

try:
Expand Down Expand Up @@ -27,8 +28,6 @@

# Add any other files below that should be included in a build
EXCL_FILES = ["README.md", DEFAULT_CONFIG_FILE]
# Which suffixes to exclude when we create indexed files (i.e., they are bundled with their main config file)
EXCL_INDEX_SUFFIX = frozenset([".sql", ".csv", ".parquet"])
# Files to search for variables.
SEARCH_VARIABLES_SUFFIX = frozenset([".yaml", "yml", ".sql", ".csv"])
# Which files to process for template variable replacement
Expand All @@ -39,6 +38,9 @@
COGNITE_MODULES_PATH = ROOT_PATH / COGNITE_MODULES

SUPPORT_MODULE_UPGRADE_FROM_VERSION = "0.1.0"
# This is used in the build directory to keep track of order and flatten the
# module directory structure with accounting for duplicated names.
INDEX_PATTERN = re.compile("^[0-9]+\\.")


class URL:
Expand Down
9 changes: 5 additions & 4 deletions cognite_toolkit/_cdf_tk/loaders/_data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cognite.client.data_classes import FileMetadataWrite, FileMetadataWriteList, capabilities
from cognite.client.data_classes.capabilities import Capability, FilesAcl, RawAcl, TimeSeriesAcl

from cognite_toolkit._cdf_tk.constants import INDEX_PATTERN
from cognite_toolkit._cdf_tk.utils import CDFToolConfig

from ._base_loaders import DataLoader
Expand Down Expand Up @@ -104,11 +105,11 @@ def upload(self, datafile: Path, ToolGlobals: CDFToolConfig, dry_run: bool) -> t
elif isinstance(loaded, FileMetadataWriteList):
self.meta_data_list.extend(loaded)
self.has_loaded_metadata = True

meta_data = next((meta for meta in self.meta_data_list if meta.name == datafile.name), None)
source_file_name = INDEX_PATTERN.sub("", datafile.name)
meta_data = next((meta for meta in self.meta_data_list if meta.name == source_file_name), None)
if meta_data is None:
raise ValueError(
f"Missing metadata for file {datafile.name}. Please provide a yaml file with metadata "
f"Missing metadata for file {source_file_name}. Please provide a yaml file with metadata "
"with an entry with the same name."
)
external_id = meta_data.external_id
Expand Down Expand Up @@ -137,7 +138,7 @@ def get_required_capability(cls, ToolGlobals: CDFToolConfig) -> Capability:
return RawAcl([RawAcl.Action.Read, RawAcl.Action.Write], RawAcl.Scope.All())

def upload(self, datafile: Path, ToolGlobals: CDFToolConfig, dry_run: bool) -> tuple[str, int]:
pattern = re.compile(rf"^(\d+\.)?{datafile.stem}\.(yml|yaml)$")
pattern = re.compile(rf"{datafile.stem}\.(yml|yaml)$")
metadata_file = next((filepath for filepath in datafile.parent.glob("*") if pattern.match(filepath.name)), None)
if metadata_file is not None:
raw = yaml.safe_load(metadata_file.read_text())
Expand Down
42 changes: 27 additions & 15 deletions cognite_toolkit/_cdf_tk/loaders/_resource_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import itertools
import json
import re
from abc import ABC
from collections import defaultdict
from collections.abc import Callable, Hashable, Iterable, Sequence, Sized
Expand Down Expand Up @@ -159,6 +158,7 @@
from rich import print

from cognite_toolkit._cdf_tk._parameters import ANY_INT, ANY_STR, ANYTHING, ParameterSpec, ParameterSpecSet
from cognite_toolkit._cdf_tk.constants import INDEX_PATTERN
from cognite_toolkit._cdf_tk.exceptions import (
ToolkitFileNotFoundError,
ToolkitInvalidParameterNameError,
Expand Down Expand Up @@ -1797,13 +1797,21 @@ def _are_equal(

return self._return_are_equal(local_dumped, cdf_dumped, return_dumped)

def _get_query_file(self, filepath: Path, transformation_external_id: str | None) -> Path | None:
file_name = re.sub(r"\d+\.", "", filepath.stem)
query_file = filepath.parent / f"{file_name}.sql"
@staticmethod
def _get_query_file(filepath: Path, transformation_external_id: str | None) -> Path | None:
query_file = filepath.parent / f"{filepath.stem}.sql"
if not query_file.exists() and transformation_external_id:
query_file = filepath.parent / f"{transformation_external_id}.sql"
if not query_file.exists():
found_query_file = next(
(
f
for f in filepath.parent.iterdir()
if f.is_file() and f.name.endswith(f"{transformation_external_id}.sql")
),
None,
)
if found_query_file is None:
return None
query_file = found_query_file
return query_file

def load_resource(
Expand Down Expand Up @@ -2501,6 +2509,13 @@ def load_resource(
self, filepath: Path, ToolGlobals: CDFToolConfig, skip_validation: bool
) -> FileMetadataWrite | FileMetadataWriteList:
loaded = load_yaml_inject_variables(filepath, ToolGlobals.environment_variables())

file_to_upload_by_source_name: dict[str, Path] = {
INDEX_PATTERN.sub("", file.name): file
for file in filepath.parent.glob("*")
if file.suffix not in {".yaml", ".yml"}
}

is_file_template = (
isinstance(loaded, list) and len(loaded) == 1 and "$FILENAME" in loaded[0].get("externalId", "")
)
Expand All @@ -2511,16 +2526,13 @@ def load_resource(
if "name" in template and "$FILENAME" in template["name"]:
template_prefix, template_suffix = template["name"].split("$FILENAME", maxsplit=1)
loaded_list: list[dict[str, Any]] = []
for file in filepath.parent.glob("*"):
if file.suffix in [".yaml", ".yml"]:
continue
for source_name, file in file_to_upload_by_source_name.items():
# Deep Copy
new_file = json.loads(json.dumps(template))

# We modify the filename in the build command, we clean the name here to get the original filename
filename_in_module = (
re.sub("^[0-9]+\\.", "", file.name).removeprefix(template_prefix).removesuffix(template_suffix)
)
new_file["name"] = file.name
filename_in_module = source_name.removeprefix(template_prefix).removesuffix(template_suffix)
new_file["name"] = source_name
new_file["externalId"] = new_file["externalId"].replace("$FILENAME", filename_in_module)
loaded_list.append(new_file)

Expand All @@ -2546,8 +2558,8 @@ def load_resource(

files_metadata: FileMetadataWriteList = FileMetadataWriteList.load(loaded_list)
for meta in files_metadata:
if meta.name and not Path(filepath.parent / meta.name).exists():
raise ToolkitFileNotFoundError(f"Could not find file {meta.name} referenced " f"in filepath {filepath}")
if meta.name and meta.name not in file_to_upload_by_source_name:
raise ToolkitFileNotFoundError(f"Could not find file {meta.name} referenced in filepath {filepath}")
return files_metadata

def _are_equal(
Expand Down
2 changes: 1 addition & 1 deletion cognite_toolkit/_system.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ packages:
- example_pump_data_model

# This part is used by cdf-toolkit to keep track of the version and help you upgrade.
cdf_toolkit_version: 0.2.10
cdf_toolkit_version: 0.2.11
2 changes: 1 addition & 1 deletion cognite_toolkit/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.2.10"
__version__ = "0.2.11"
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ implements:
version: v1
properties:
pumps:
connectionType: multi_edge_connection
type:
space: '{{model_space}}'
externalId: LiftStation.pumps
Expand Down
Loading

0 comments on commit 538bb97

Please sign in to comment.