Skip to content

Commit

Permalink
[CDF-24141] 👻 Credentials hash (#1468)
Browse files Browse the repository at this point in the history
* refactor: added flag

* build; enable flag

* feat: implemented workflow trigger hash storing

* tests: updated tests

* feat; hash workflow trigger

* tests: update approval client

* feat: store hash transformations

* fix: introduced bug

* fix: first passing

* tests: extend test to detect changes

* tests: extend test

* tests: added failing test

* feat: hash function schedule

* refactor: cleanup

* docs

* tests: update outdated

* fix: typos
  • Loading branch information
doctrino authored Feb 19, 2025
1 parent 8e45d1f commit e4c7e70
Show file tree
Hide file tree
Showing 12 changed files with 306 additions and 94 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.cdf-tk.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ Changes are grouped as follows:
- [alpha feature] When the flag `strict-validation` is set to `true` in the `cdf.toml` file, the Toolkit will
no longer use its own authentication as a fallback when deploying WorkflowTriggers and FunctionSchedules in projects
where `validation-type` is set to anything other than `dev`. This will be the default behavior from version `0.5.0`.
- [alpha feature] When the flag `credentials-hash` is set to `true` in the `cdf.toml` file, the Toolkit will hash
the credentials of Transformations (if present), FunctionSchedules, and WorkflowTriggers before deploying them. This
will be used to detect if the credentials have been changed when running `cdf deploy`.

### Improved

Expand Down
3 changes: 3 additions & 0 deletions cdf.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ module-repeat = true
dump-extended = true
populate = true
strict-validation = true
# Setting this to true wil change all snapshots for WorkflowTriggers/FunctionSchedules.
# For simplicity, (avoid keeping track of two sets of snapshots), we keep this to false in development.
credentials-hash = false

[plugins]
run = true
Expand Down
5 changes: 5 additions & 0 deletions cognite_toolkit/_cdf_tk/feature_flags.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ class Flags(Enum):
"visible": True,
"description": "For Workflow/Transformations/Function do not fallback to Toolkit credentials when validation-type != 'dev'",
}
CREDENTIALS_HASH: ClassVar[dict[str, Any]] = { # type: ignore[misc]
"visible": True,
"description": "Stores a hash of the credentials of Workflow/Transformation/Function in the resources such that"
" the resource is updated when the credentials change",
}

def is_enabled(self) -> bool:
return FeatureFlag.is_enabled(self)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pathlib import Path
from typing import Any, cast, final

from cognite.client.credentials import OAuthClientCredentials
from cognite.client.data_classes import (
ClientCredentials,
Function,
Expand Down Expand Up @@ -34,7 +33,6 @@
from cognite_toolkit._cdf_tk.exceptions import (
ResourceCreationError,
ToolkitRequiredValueError,
ToolkitTypeError,
)
from cognite_toolkit._cdf_tk.feature_flags import Flags
from cognite_toolkit._cdf_tk.loaders._base_loaders import ResourceLoader
Expand All @@ -44,6 +42,7 @@
calculate_secure_hash,
calculate_str_or_file_hash,
)
from cognite_toolkit._cdf_tk.utils.cdf import read_auth

from .auth_loaders import GroupAllScopedLoader
from .data_organization_loaders import DataSetsLoader
Expand Down Expand Up @@ -351,6 +350,9 @@ class FunctionScheduleLoader(
parent_resource = frozenset({FunctionLoader})
support_update = False

_hash_key = "cdf-auth"
_description_character_limit = 500

def __init__(self, client: ToolkitClient, build_path: Path | None, console: Console | None):
super().__init__(client, build_path, console)
self.authentication_by_id: dict[FunctionScheduleID, ClientCredentials] = {}
Expand Down Expand Up @@ -401,32 +403,34 @@ def get_dependent_items(cls, item: dict) -> Iterable[tuple[type[ResourceLoader],
if "functionExternalId" in item:
yield FunctionLoader, item["functionExternalId"]

def load_resource(self, resource: dict[str, Any], is_dry_run: bool = False) -> FunctionScheduleWrite:
identifier = self.get_id(resource)
auth = resource.pop("authentication", None)
if auth is None:
if (self.client.config.is_strict_validation and Flags.STRICT_VALIDATION.is_enabled()) or not isinstance(
self.client.config.credentials, OAuthClientCredentials
):
raise ToolkitRequiredValueError(f"Authentication is missing for schedule {identifier!r}.")
else:
HighSeverityWarning(
f"Authentication is missing for schedule {identifier!r}. Falling back to the Toolkit credentials"
).print_warning(console=self.console)
credentials = ClientCredentials(
self.client.config.credentials.client_id, self.client.config.credentials.client_secret
)
elif not isinstance(auth, dict):
raise ToolkitTypeError(f"Authentication must be a dictionary for schedule {identifier!r}")
elif "clientId" not in auth or "clientSecret" not in auth:
raise ToolkitRequiredValueError(
f"Authentication must contain clientId and clientSecret for schedule {identifier!r}"
)
else:
credentials = ClientCredentials(auth["clientId"], auth["clientSecret"])
self.authentication_by_id[identifier] = credentials
def load_resource_file(
self, filepath: Path, environment_variables: dict[str, str | None] | None = None
) -> list[dict[str, Any]]:
resources = super().load_resource_file(filepath, environment_variables)
# We need to the auth hash calculation here, as the output of the load_resource_file
# is used to compare with the CDF resource.
for resource in resources:
identifier = self.get_id(resource)
credentials = read_auth(identifier, resource, self.client, "function schedule", self.console)
self.authentication_by_id[identifier] = credentials
if Flags.CREDENTIALS_HASH.is_enabled():
auth_hash = calculate_secure_hash(credentials.dump(camel_case=True), shorten=True)
extra_str = f" {self._hash_key}: {auth_hash}"
if "description" not in resource:
resource["description"] = extra_str[1:]
elif len(resource["description"]) + len(extra_str) < self._description_character_limit:
resource["description"] += f"{extra_str}"
else:
LowSeverityWarning(
f"Description is too long for schedule {identifier!r}. Truncating..."
).print_warning(console=self.console)
truncation = self._description_character_limit - len(extra_str) - 3
resource["description"] = f"{resource['description'][:truncation]}...{extra_str}"
return resources

def load_resource(self, resource: dict[str, Any], is_dry_run: bool = False) -> FunctionScheduleWrite:
if "functionId" in resource:
identifier = self.get_id(resource)
LowSeverityWarning(f"FunctionId will be ignored in the schedule {identifier!r}").print_warning(
console=self.console
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@
ToolkitTypeError,
ToolkitYAMLFormatError,
)
from cognite_toolkit._cdf_tk.feature_flags import Flags
from cognite_toolkit._cdf_tk.loaders._base_loaders import ResourceLoader
from cognite_toolkit._cdf_tk.utils import (
calculate_secure_hash,
in_dict,
load_yaml_inject_variables,
quote_int_value_by_key_in_yaml,
Expand Down Expand Up @@ -114,6 +116,7 @@ class TransformationLoader(
}
)
_doc_url = "Transformations/operation/createTransformations"
_hash_key = "-- cdf-auth"

@property
def display_name(self) -> str:
Expand Down Expand Up @@ -225,6 +228,22 @@ def load_resource_file(
)
elif query_file:
item["query"] = safe_read(query_file)

if Flags.CREDENTIALS_HASH.is_enabled():
auth_dict: dict[str, Any] = {}
for key in [
"authentication",
"sourceOidcCredentials",
"destinationOidcCredentials",
"sourceNonce",
"destinationNonce",
]:
if key in item:
auth_dict[key] = item[key]
if auth_dict:
auth_hash = calculate_secure_hash(auth_dict, shorten=True)
if "query" in item:
item["query"] = f"{self._hash_key}: {auth_hash}\n{item['query']}"
return raw_list

def load_resource(self, resource: dict[str, Any], is_dry_run: bool = False) -> TransformationWrite:
Expand Down Expand Up @@ -274,9 +293,11 @@ def dump_resource(self, resource: Transformation, local: dict[str, Any] | None =
local = local or {}
if data_set_id := dumped.pop("dataSetId", None):
dumped["dataSetExternalId"] = self.client.lookup.data_sets.external_id(data_set_id)
if "isPublic" in dumped and "isPublic" not in local:
# Default set from server side.
dumped.pop("isPublic")
if "authentication" in local:
# Todo: Need a way to detect changes in credentials instead of just assuming
# that the credentials are always the same.
# The hash added to the beginning of the query detects the change in the authentication
dumped["authentication"] = local["authentication"]
return dumped

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from pathlib import Path
from typing import Any, final

from cognite.client.credentials import OAuthClientCredentials
from cognite.client.data_classes import (
ClientCredentials,
Workflow,
Expand Down Expand Up @@ -49,17 +48,16 @@
from cognite_toolkit._cdf_tk.client import ToolkitClient
from cognite_toolkit._cdf_tk.exceptions import (
ToolkitRequiredValueError,
ToolkitTypeError,
)
from cognite_toolkit._cdf_tk.feature_flags import Flags
from cognite_toolkit._cdf_tk.loaders._base_loaders import ResourceLoader
from cognite_toolkit._cdf_tk.tk_warnings import (
HighSeverityWarning,
LowSeverityWarning,
MissingReferencedWarning,
ToolkitWarning,
)
from cognite_toolkit._cdf_tk.utils import humanize_collection, to_directory_compatible
from cognite_toolkit._cdf_tk.utils import calculate_secure_hash, humanize_collection, to_directory_compatible
from cognite_toolkit._cdf_tk.utils.cdf import read_auth
from cognite_toolkit._cdf_tk.utils.diff_list import diff_list_hashable, diff_list_identifiable

from .auth_loaders import GroupAllScopedLoader
Expand Down Expand Up @@ -451,6 +449,9 @@ class WorkflowTriggerLoader(

_doc_url = "Workflow-triggers/operation/CreateOrUpdateTriggers"

class _MetadataKey:
secret_hash = "cognite-toolkit-auth-hash"

def __init__(self, client: ToolkitClient, build_dir: Path | None, console: Console | None = None):
super().__init__(client, build_dir, console)
self._authentication_by_id: dict[str, ClientCredentials] = {}
Expand Down Expand Up @@ -584,43 +585,38 @@ def get_dependent_items(cls, item: dict) -> Iterable[tuple[type[ResourceLoader],
if "workflowVersion" in item:
yield WorkflowVersionLoader, WorkflowVersionId(item["workflowExternalId"], item["workflowVersion"])

def load_resource_file(
self, filepath: Path, environment_variables: dict[str, str | None] | None = None
) -> list[dict[str, Any]]:
resources = super().load_resource_file(filepath, environment_variables)

# We need to the auth hash calculation here, as the output of the load_resource_file
# is used to compare with the CDF resource.
for resource in resources:
identifier = self.get_id(resource)
credentials = read_auth(identifier, resource, self.client, "workflow trigger", self.console)
self._authentication_by_id[identifier] = credentials
if Flags.CREDENTIALS_HASH.is_enabled():
if "metadata" not in resource:
resource["metadata"] = {}
resource["metadata"][self._MetadataKey.secret_hash] = calculate_secure_hash(
credentials.dump(camel_case=True), shorten=True
)
return resources

def load_resource(self, resource: dict[str, Any], is_dry_run: bool = False) -> WorkflowTriggerUpsert:
if isinstance(resource.get("data"), dict):
resource["data"] = json.dumps(resource["data"])

identifier = self.get_id(resource)
auth = resource.pop("authentication", None)
if auth is None:
if (self.client.config.is_strict_validation and Flags.STRICT_VALIDATION.is_enabled()) or not isinstance(
self.client.config.credentials, OAuthClientCredentials
):
raise ToolkitRequiredValueError(f"Authentication is missing for workflow trigger {identifier!r}.")
else:
HighSeverityWarning(
f"Authentication is missing for workflow trigger {identifier!r}. Falling back to the Toolkit credentials"
).print_warning(console=self.console)
credentials = ClientCredentials(
self.client.config.credentials.client_id, self.client.config.credentials.client_secret
)
elif not isinstance(auth, dict):
raise ToolkitTypeError(f"Authentication must be a dictionary for workflow trigger {identifier!r}")
elif "clientId" not in auth or "clientSecret" not in auth:
raise ToolkitRequiredValueError(
f"Authentication must contain clientId and clientSecret for workflow trigger {identifier!r}"
)
else:
credentials = ClientCredentials(auth["clientId"], auth["clientSecret"])

self._authentication_by_id[self.get_id(resource)] = credentials
return WorkflowTriggerUpsert._load(resource)

def dump_resource(self, resource: WorkflowTrigger, local: dict[str, Any] | None = None) -> dict[str, Any]:
dumped = resource.as_write().dump()
local = local or {}
if isinstance(dumped.get("data"), str) and isinstance(local.get("data"), dict):
dumped["data"] = json.loads(dumped["data"])

if "authentication" in local:
# Note that change in the authentication will not be detected, and thus,
# will require a forced redeployment.
# Changes in auth will be detected by the hash. We need to do this to ensure
# that the pull command works.
dumped["authentication"] = local["authentication"]
return dumped
45 changes: 43 additions & 2 deletions cognite_toolkit/_cdf_tk/utils/cdf.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,25 @@
from collections.abc import Iterator
from collections.abc import Hashable, Iterator
from typing import Any, Literal, overload

from cognite.client.credentials import OAuthClientCredentials
from cognite.client.data_classes import (
ClientCredentials,
)
from cognite.client.data_classes.data_modeling import Edge, Node, ViewId
from cognite.client.data_classes.filters import SpaceFilter
from cognite.client.exceptions import CogniteAPIError
from rich.console import Console

from cognite_toolkit._cdf_tk.client import ToolkitClient
from cognite_toolkit._cdf_tk.tk_warnings import MediumSeverityWarning
from cognite_toolkit._cdf_tk.exceptions import (
ToolkitRequiredValueError,
ToolkitTypeError,
)
from cognite_toolkit._cdf_tk.feature_flags import Flags
from cognite_toolkit._cdf_tk.tk_warnings import (
HighSeverityWarning,
MediumSeverityWarning,
)


@overload
Expand Down Expand Up @@ -76,3 +88,32 @@ def iterate_instances(
if next_cursor is None:
break
body["cursor"] = next_cursor


def read_auth(
identifier: Hashable,
resource: dict[str, Any],
client: ToolkitClient,
resource_name: str,
console: Console | None = None,
) -> ClientCredentials:
auth = resource.get("authentication")
if auth is None:
if (client.config.is_strict_validation and Flags.STRICT_VALIDATION.is_enabled()) or not isinstance(
client.config.credentials, OAuthClientCredentials
):
raise ToolkitRequiredValueError(f"Authentication is missing for {resource_name} {identifier!r}.")
else:
HighSeverityWarning(
f"Authentication is missing for {resource_name} {identifier!r}. Falling back to the Toolkit credentials"
).print_warning(console=console)
credentials = ClientCredentials(client.config.credentials.client_id, client.config.credentials.client_secret)
elif not isinstance(auth, dict):
raise ToolkitTypeError(f"Authentication must be a dictionary for {resource_name} {identifier!r}")
elif "clientId" not in auth or "clientSecret" not in auth:
raise ToolkitRequiredValueError(
f"Authentication must contain clientId and clientSecret for {resource_name} {identifier!r}"
)
else:
credentials = ClientCredentials(auth["clientId"], auth["clientSecret"])
return credentials
7 changes: 5 additions & 2 deletions cognite_toolkit/_cdf_tk/utils/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,14 @@ def calculate_directory_hash(
return calculated


def calculate_secure_hash(item: dict[str, Any]) -> str:
def calculate_secure_hash(item: dict[str, Any], shorten: bool = False) -> str:
"""Calculate a secure hash of a dictionary"""
sha256_hash = hashlib.sha512(usedforsecurity=True)
sha256_hash.update(json.dumps(item, sort_keys=True).encode("utf-8"))
return sha256_hash.hexdigest()
calculated_hash = sha256_hash.hexdigest()
if shorten:
return calculated_hash[:8]
return calculated_hash


def calculate_str_or_file_hash(content: str | Path, shorten: bool = False) -> str:
Expand Down
7 changes: 6 additions & 1 deletion tests/test_unit/approval_client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,10 +138,15 @@ class ApprovalToolkitClient:
def __init__(self, mock_client: ToolkitClientMock):
self._return_verify_resources = False
self.mock_client = mock_client
credentials = MagicMock(spec=OAuthClientCredentials)
credentials.client_id = "toolkit-client-id"
credentials.client_secret = "toolkit-client-secret"
credentials.token_url = "https://toolkit.auth.com/oauth/token"
credentials.scopes = ["ttps://pytest-field.cognitedata.com/.default"]
self.mock_client.config = ToolkitClientConfig(
client_name=CLIENT_NAME,
project="pytest-project",
credentials=MagicMock(spec=OAuthClientCredentials),
credentials=credentials,
is_strict_validation=False,
)
# This is used to simulate the existing resources in CDF
Expand Down
Loading

0 comments on commit e4c7e70

Please sign in to comment.