From 7482a805fb9cd520b247fa45e611fa51a215f8b3 Mon Sep 17 00:00:00 2001 From: Allen Porter Date: Tue, 31 Dec 2024 11:44:59 -0800 Subject: [PATCH] Improve HelmRelease dependency tracking for diffs (#821) Improve HelmRelease dependency tracking for diffs by tracking changes in dependencies like ConfigMaps and HelmReleases. Fixes #817 --- flux_local/kustomize.py | 7 +- flux_local/manifest.py | 52 +++++++++- flux_local/resource_diff.py | 81 ++++++++++++++- flux_local/tool/diff.py | 10 +- flux_local/visitor.py | 9 ++ tests/test_manifest.py | 26 +++++ tests/test_resource_diff.py | 200 ++++++++++++++++++++++++++++++++++++ 7 files changed, 371 insertions(+), 14 deletions(-) create mode 100644 tests/test_resource_diff.py diff --git a/flux_local/kustomize.py b/flux_local/kustomize.py index 512d8728..2f912956 100644 --- a/flux_local/kustomize.py +++ b/flux_local/kustomize.py @@ -49,7 +49,7 @@ KustomizeException, KustomizePathException, ) -from .manifest import Kustomization +from .manifest import Kustomization, HELM_RELEASE _LOGGER = logging.getLogger(__name__) @@ -61,7 +61,6 @@ KUSTOMIZE_BIN = "kustomize" FLUX_BIN = "flux" -HELM_RELEASE_KIND = "HelmRelease" # Used to limit access to specific resources _LOCK_MAP: dict[str, asyncio.Lock] = {} @@ -98,10 +97,10 @@ def grep_helm_release( return ( self.grep(f"metadata.namespace=^{helm_release.namespace}$") .grep(f"metadata.name=^{helm_release.name}$") - .grep(f"kind=^{HELM_RELEASE_KIND}$") + .grep(f"kind=^{HELM_RELEASE}$") ) if invert: - return self.grep(f"kind=^{HELM_RELEASE_KIND}$", invert=True) + return self.grep(f"kind=^{HELM_RELEASE}$", invert=True) raise InputException("Must specify either helm_release or invert") async def run(self) -> str: diff --git a/flux_local/manifest.py b/flux_local/manifest.py index d2e5a435..0d47bac4 100644 --- a/flux_local/manifest.py +++ b/flux_local/manifest.py @@ -46,6 +46,7 @@ CONFIG_MAP_KIND = "ConfigMap" DEFAULT_NAMESPACE = "flux-system" VALUE_PLACEHOLDER_TEMPLATE = "..PLACEHOLDER_{name}.." +HELM_RELEASE = "HelmRelease" HELM_REPOSITORY = "HelmRepository" GIT_REPOSITORY = "GitRepository" OCI_REPOSITORY = "OCIRepository" @@ -88,6 +89,21 @@ class Config(BaseConfig): omit_none = True +@dataclass(frozen=True, order=True) +class NamedResource: + """Identifier for a kubernetes resource.""" + + kind: str + namespace: str | None + name: str + + @property + def namespaced_name(self) -> str: + if self.namespace: + return f"{self.namespace}/{self.name}" + return self.name + + @dataclass class HelmChart(BaseManifest): """A representation of an instantiation of a chart for a HelmRelease.""" @@ -202,10 +218,14 @@ class HelmRelease(BaseManifest): chart: HelmChart """A mapping to a specific helm chart for this HelmRelease.""" - values: Optional[dict[str, Any]] = field(metadata={"serialize": "omit"}) + values: Optional[dict[str, Any]] = field( + metadata={"serialize": "omit"}, default=None + ) """The values to install in the chart.""" - values_from: Optional[list[ValuesReference]] = field(metadata={"serialize": "omit"}) + values_from: Optional[list[ValuesReference]] = field( + metadata={"serialize": "omit"}, default=None + ) """A list of values to reference from an ConfigMap or Secret.""" images: list[str] | None = field(default=None) @@ -251,6 +271,34 @@ def namespaced_name(self) -> str: """Return the namespace and name concatenated as an id.""" return f"{self.namespace}/{self.name}" + @property + def resource_dependencies(self) -> list[NamedResource]: + """Return the list of input dependencies for the HelmRelease.""" + deps = [ + NamedResource( + kind=HELM_RELEASE, + name=self.name, + namespace=self.namespace, + ) + ] + if self.chart: + deps.append( + NamedResource( + kind=self.chart.repo_kind, + name=self.chart.repo_name, + namespace=self.chart.repo_namespace, + ) + ) + names_seen = set() + for ref in self.values_from or (): + if ref.name in names_seen: + continue + names_seen.add(ref.name) + deps.append( + NamedResource(kind=ref.kind, name=ref.name, namespace=self.namespace) + ) + return deps + @dataclass class HelmRepository(BaseManifest): diff --git a/flux_local/resource_diff.py b/flux_local/resource_diff.py index 8c043c87..ef7c4e13 100644 --- a/flux_local/resource_diff.py +++ b/flux_local/resource_diff.py @@ -14,7 +14,8 @@ from . import command -from .visitor import ObjectOutput, ResourceKey +from .visitor import ObjectOutput, ResourceKey, HelmVisitor +from .manifest import HelmRelease, NamedResource _LOGGER = logging.getLogger(__name__) @@ -152,16 +153,88 @@ def perform_yaml_diff( yield yaml.dump(diffs, sort_keys=False, explicit_start=True, default_style=None) -def get_helm_release_diff_keys(a: ObjectOutput, b: ObjectOutput) -> list[ResourceKey]: +def merge_helm_releases( + a: list[HelmRelease], b: list[HelmRelease] +) -> Generator[tuple[HelmRelease | None, HelmRelease | None], None, None]: + """Return HelmReleases joined by name.""" + a_dict = {r.release_name: r for r in a} + b_dict = {r.release_name: r for r in b} + for name in _unique_keys(a_dict, b_dict): + yield a_dict.get(name), b_dict.get(name) + + +def merge_named_resources( + a: list[NamedResource], b: list[NamedResource] +) -> Generator[NamedResource, None, None]: + """Return HelmReleases joined by name.""" + a_dict = {f"{r.kind}-{r.namespaced_name}": r for r in a} + b_dict = {f"{r.kind}-{r.namespaced_name}": r for r in b} + for name in _unique_keys(a_dict, b_dict): + # Emit either the left or right since they should be identical + value: NamedResource = a_dict.get(name) or b_dict.get(name) # type: ignore[assignment] + yield value + + +def build_helm_dependency_map( + a_visitor: HelmVisitor, b_visitor: HelmVisitor +) -> dict[NamedResource, NamedResource]: + """Return a map of all resources to the HelmRelease resource that depends on them. + + This is a mapping of all the named resources. If any of them changed, then we + need to diff the HelmRelease to look for changes in the rendered output. + """ + results: dict[NamedResource, NamedResource] = {} + for helmrelease_a, helmrelease_b in merge_helm_releases( + a_visitor.releases, b_visitor.releases + ): + resources_a = helmrelease_a.resource_dependencies if helmrelease_a else [] + resources_b = helmrelease_b.resource_dependencies if helmrelease_b else [] + hr: HelmRelease = helmrelease_a or helmrelease_b # type: ignore[assignment] + hr_named_resource = NamedResource( + kind="HelmRelease", + namespace=hr.namespace, + name=hr.name, + ) + for named_resource in merge_named_resources(resources_a, resources_b): + results[named_resource] = hr_named_resource + + return results + + +def get_helm_release_diff_keys( + a: ObjectOutput, b: ObjectOutput, dependency_map: dict[NamedResource, NamedResource] +) -> list[ResourceKey]: """Return HelmRelease resource keys with diffs, by cluster.""" - results: list[ResourceKey] = [] + + # Pass #1: Check all named resources that are upstream dependencies of a + # HelmRelease and have a content change. Save the HelmRelease name. + hrs_to_check: set[NamedResource] = set() + _LOGGER.debug("Checking HelmRelease dependencies") for kustomization_key in _unique_keys(a.content, b.content): _LOGGER.debug("Diffing results for Kustomization %s", kustomization_key) a_resources = a.content.get(kustomization_key, {}) b_resources = b.content.get(kustomization_key, {}) for resource_key in _unique_keys(a_resources, b_resources): - if resource_key.kind != "HelmRelease": + if ( + hr_named_resource := dependency_map.get(resource_key.named_resource) + ) is None: continue if a_resources.get(resource_key) != b_resources.get(resource_key): + _LOGGER.info( + "HelmRelease %s detected potential change in resource %s (Kustomization %s)", + hr_named_resource.namespaced_name, + resource_key.namespaced_name, + kustomization_key.namespaced_name, + ) + hrs_to_check.add(hr_named_resource) + + # Pass #2: Find the ResourceKey of all the changed HelmReleases + results: list[ResourceKey] = [] + for kustomization_key in _unique_keys(a.content, b.content): + a_resources = a.content.get(kustomization_key, {}) + b_resources = b.content.get(kustomization_key, {}) + for resource_key in _unique_keys(a_resources, b_resources): + if resource_key.named_resource in hrs_to_check: results.append(resource_key) + return results diff --git a/flux_local/tool/diff.py b/flux_local/tool/diff.py index 7476aaef..a199d4ee 100644 --- a/flux_local/tool/diff.py +++ b/flux_local/tool/diff.py @@ -19,6 +19,7 @@ perform_yaml_diff, perform_external_diff, perform_object_diff, + build_helm_dependency_map, ) from . import selector @@ -248,10 +249,11 @@ async def run( # type: ignore[no-untyped-def] # Find HelmRelease objects with diffs and prune all other HelmReleases from # the helm visitors. We assume that the only way for a HelmRelease output - # to have a diff is if the HelmRelease in the kustomization has a diff. - # This avoid building unnecessary resources and churn from things like - # random secret generation. - diff_resource_keys = get_helm_release_diff_keys(orig_content, content) + # to have a diff is if any of the resources that the HelmRelease + # depends on in the kustomization has a diff. This avoids needing to + # template every possible helm chart when nothing as changed. + dependency_map = build_helm_dependency_map(orig_helm_visitor, helm_visitor) + diff_resource_keys = get_helm_release_diff_keys(orig_content, content, dependency_map) diff_names = { resource_key.namespaced_name for resource_key in diff_resource_keys } diff --git a/flux_local/visitor.py b/flux_local/visitor.py index 6d28bd68..f9be6181 100644 --- a/flux_local/visitor.py +++ b/flux_local/visitor.py @@ -22,6 +22,7 @@ HelmRepository, Manifest, OCIRepository, + NamedResource, ) @@ -75,6 +76,14 @@ def namespaced_name(self) -> str: def compact_label(self) -> str: return f"{self.kind}: {self.namespaced_name}" + @property + def named_resource(self) -> NamedResource: + return NamedResource( + kind=self.kind, + name=self.name, + namespace=self.namespace, + ) + class ResourceOutput(ABC): """Helper object for implementing a git_repo.ResourceVisitor that saves content. diff --git a/tests/test_manifest.py b/tests/test_manifest.py index c05b44a2..2fe1239a 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -12,9 +12,11 @@ Manifest, read_manifest, write_manifest, + NamedResource, ) TESTDATA_DIR = Path("tests/testdata/cluster/infrastructure") +TEST_PODINFO_HELMRELEASE = Path("tests/testdata/cluster8/apps/podinfo.yaml") def test_parse_helm_release() -> None: @@ -141,3 +143,27 @@ def test_parse_helmrelease_chartref() -> None: assert release.chart.repo_name == "podinfo" assert release.chart.repo_namespace == "default" assert release.values + + +def test_helmrelease_dependencies() -> None: + """Test parsing a helm release doc.""" + + docs = list( + yaml.load_all( + TEST_PODINFO_HELMRELEASE.read_text(), + Loader=yaml.CLoader, + ) + ) + assert len(docs) == 2 + release = HelmRelease.parse_doc(docs[1]) + assert release.name == "podinfo" + assert release.namespace == "podinfo" + # Assert on all the input dependencies that might cause HelmRelease + # output to change. This means the HelmRepository, and any input + # values used for substitute references. + assert release.resource_dependencies == [ + NamedResource(kind="HelmRelease", name="podinfo", namespace="podinfo"), + NamedResource(kind="HelmRepository", name="podinfo", namespace="flux-system"), + NamedResource(kind="ConfigMap", name="podinfo-values", namespace="podinfo"), + NamedResource(kind="Secret", name="podinfo-tls-values", namespace="podinfo"), + ] diff --git a/tests/test_resource_diff.py b/tests/test_resource_diff.py new file mode 100644 index 00000000..c6971272 --- /dev/null +++ b/tests/test_resource_diff.py @@ -0,0 +1,200 @@ +"""Tests for the resource diff module.""" + +import pathlib +import shutil +import tempfile +from contextlib import contextmanager +from collections.abc import Generator + +import yaml + +from flux_local import git_repo +from flux_local.resource_diff import ( + get_helm_release_diff_keys, + merge_helm_releases, + merge_named_resources, + build_helm_dependency_map, +) +from flux_local.visitor import ObjectOutput, HelmVisitor, ResourceKey +from flux_local.manifest import HelmRelease, HelmChart, NamedResource + +TESTDATA_PATH = pathlib.Path("tests/testdata/cluster8") + + +async def visit_helm_content(path: pathlib.Path) -> tuple[ObjectOutput, HelmVisitor]: + """Visit content in the specified path.""" + + query = git_repo.ResourceSelector() + query.path = git_repo.PathSelector(path) + query.helm_release.namespace = None # All + + content = ObjectOutput([]) + query.kustomization.visitor = content.visitor() + + helm_visitor = HelmVisitor() + query.helm_repo.visitor = helm_visitor.repo_visitor() + query.helm_release.visitor = helm_visitor.release_visitor() + + await git_repo.build_manifest(selector=query) + + return content, helm_visitor + + +@contextmanager +def mirror_worktree(path: pathlib.Path) -> Generator[pathlib.Path]: + """Create a copy of the path in a temporary git repo for generating diffs.""" + with tempfile.TemporaryDirectory() as tmpdir: + tmp_path = pathlib.Path(tmpdir) + # Required by `git_repo` to find the root + shutil.copytree(".git", str(tmp_path / ".git")) + + new_path = tmp_path / path + shutil.copytree(path, new_path) + + yield new_path + + +def test_no_diff_keys_empty_input() -> None: + """Test get_helm_release_diff_keys with empty inputs.""" + + a = ObjectOutput([]) + b = ObjectOutput([]) + assert get_helm_release_diff_keys(a, b, {}) == [] + + +async def test_no_diff_keys() -> None: + """Test get_helm_release_diff_keys with no diff.""" + + content, helm_visitor = await visit_helm_content(TESTDATA_PATH) + with mirror_worktree(TESTDATA_PATH) as new_cluster_path: + content_new, helm_visitor_new = await visit_helm_content(new_cluster_path) + + dep_map = build_helm_dependency_map(helm_visitor, helm_visitor_new) + assert get_helm_release_diff_keys(content, content_new, dep_map) == [] + + +async def test_helm_release_spec_diff() -> None: + """Test get_helm_release_diff_keys with a diff in the input values.""" + + content, helm_visitor = await visit_helm_content(TESTDATA_PATH) + with mirror_worktree(TESTDATA_PATH) as new_cluster_path: + + # Generate a diff by changing a value in the HelmRelease + podinfo_path = new_cluster_path / "apps/podinfo.yaml" + podinfo_doc = list( + yaml.load_all(podinfo_path.read_text(), Loader=yaml.SafeLoader) + ) + assert len(podinfo_doc) == 2 + assert podinfo_doc[1]["kind"] == "HelmRelease" + assert podinfo_doc[1]["spec"]["values"]["redis"]["enabled"] + podinfo_doc[1]["spec"]["values"]["redis"]["enabled"] = False + podinfo_path.write_text(yaml.dump_all(podinfo_doc)) + + content_new, helm_visitor_new = await visit_helm_content(new_cluster_path) + + dep_map = build_helm_dependency_map(helm_visitor, helm_visitor_new) + assert get_helm_release_diff_keys(content, content_new, dep_map) == [ + ResourceKey( + kustomization_path=str(TESTDATA_PATH / "apps"), + kind="HelmRelease", + namespace="podinfo", + name="podinfo", + ) + ] + + +async def test_helm_repository_causes_diff() -> None: + """Test get_helm_release_diff_keys where an input HelmRepository change causes a diff.""" + + content, helm_visitor = await visit_helm_content(TESTDATA_PATH) + with mirror_worktree(TESTDATA_PATH) as new_cluster_path: + + # Generate a diff by changing a value in the HelmRelease + podinfo_path = new_cluster_path / "apps/podinfo.yaml" + podinfo_doc = list( + yaml.load_all(podinfo_path.read_text(), Loader=yaml.SafeLoader) + ) + assert len(podinfo_doc) == 2 + assert podinfo_doc[0]["kind"] == "HelmRepository" + assert podinfo_doc[0]["spec"]["url"] == "oci://ghcr.io/stefanprodan/charts" + podinfo_doc[0]["spec"]["url"] = "oci://ghcr.io/stefanprodan/charts2" + podinfo_path.write_text(yaml.dump_all(podinfo_doc)) + + content_new, helm_visitor_new = await visit_helm_content(new_cluster_path) + + dep_map = build_helm_dependency_map(helm_visitor, helm_visitor_new) + assert get_helm_release_diff_keys(content, content_new, dep_map) == [ + ResourceKey( + kustomization_path=str(TESTDATA_PATH / "apps"), + kind="HelmRelease", + namespace="podinfo", + name="podinfo", + ) + ] + + +async def test_merge_helm_releases() -> None: + """Test merge_helm_releases.""" + + chart = HelmChart( + name="chart", version="1.0.0", repo_name="example", repo_namespace="flux-system" + ) + + a = [ + HelmRelease( + name="a", namespace="default", chart=chart, values={"key": "value1"} + ), + HelmRelease( + name="b", namespace="default", chart=chart, values={"key": "value2"} + ), + ] + b = [ + HelmRelease( + name="b", namespace="default", chart=chart, values={"key": "value3"} + ), + HelmRelease( + name="c", namespace="default", chart=chart, values={"key": "value4"} + ), + ] + assert list(merge_helm_releases(a, b)) == [ + ( + HelmRelease( + name="a", namespace="default", chart=chart, values={"key": "value1"} + ), + None, + ), + ( + HelmRelease( + name="b", namespace="default", chart=chart, values={"key": "value2"} + ), + HelmRelease( + name="b", namespace="default", chart=chart, values={"key": "value3"} + ), + ), + ( + None, + HelmRelease( + name="c", namespace="default", chart=chart, values={"key": "value4"} + ), + ), + ] + + +async def test_merge_named_resources() -> None: + """Test for merging named resources.""" + a = [ + NamedResource(name="podinfo", namespace="podinfo", kind="HelmRelease"), + NamedResource(name="podinfo", namespace="podinfo", kind="ConfigMap"), + NamedResource(name="secret", namespace="podinfo", kind="Secret"), + ] + b = [ + NamedResource(name="podinfo", namespace="podinfo", kind="HelmRelease"), + NamedResource(name="podinfo", namespace="podinfo", kind="ConfigMap"), + NamedResource(name="podinfo", namespace="flux-system", kind="HelmRepository"), + ] + assert list(merge_named_resources(a, b)) == [ + NamedResource(name="podinfo", namespace="podinfo", kind="HelmRelease"), + NamedResource(name="podinfo", namespace="podinfo", kind="ConfigMap"), + NamedResource(name="secret", namespace="podinfo", kind="Secret"), + NamedResource(name="podinfo", namespace="flux-system", kind="HelmRepository"), + ]