From 515aa8fbcd50b36a99af02ccbc8f2ff6c6f28e34 Mon Sep 17 00:00:00 2001 From: Marcel Bargull Date: Wed, 22 May 2024 19:56:52 +0200 Subject: [PATCH 1/3] Fix excessive memory use in `inspect_linkages_lief` (#5348) * Fix excessive memory use in inspect_linkages_lief introduced in gh-5228 which replaced binary.name with str(file). The former returns the path to the binary whereas the latter returns the contents of the parsed binary, naturally causing high memory usage. That semantic change was unintended (misread upstream changes). Since binary's path is no longer part of the object, pass it separately. * Use consistent var name in inspect_linkages_lief to avoid variable shadowing. * Remove redundant get_uniqueness_key call --------- Signed-off-by: Marcel Bargull --- conda_build/os_utils/liefldd.py | 30 +++++++++---------- ...essive-memory-use-in-inspect_linkages_lief | 19 ++++++++++++ 2 files changed, 34 insertions(+), 15 deletions(-) create mode 100644 news/5348-fix-excessive-memory-use-in-inspect_linkages_lief diff --git a/conda_build/os_utils/liefldd.py b/conda_build/os_utils/liefldd.py index d6ee2841d6..3823ce4a9d 100644 --- a/conda_build/os_utils/liefldd.py +++ b/conda_build/os_utils/liefldd.py @@ -353,12 +353,12 @@ def _get_path_dirs(prefix): yield "/".join((prefix, "bin")) -def get_uniqueness_key(file): +def get_uniqueness_key(filename, file): binary = ensure_binary(file) if not binary: return EXE_FORMATS.UNKNOWN elif binary.format == EXE_FORMATS.MACHO: - return str(file) + return filename elif binary.format == EXE_FORMATS.ELF and ( # noqa binary.type == lief.ELF.ELF_CLASS.CLASS32 or binary.type == lief.ELF.ELF_CLASS.CLASS64 @@ -369,8 +369,8 @@ def get_uniqueness_key(file): ] if result: return result[0] - return str(file) - return str(file) + return filename + return filename def _get_resolved_location( @@ -505,13 +505,13 @@ def inspect_linkages_lief( for element in todo: todo.pop(0) filename2 = element[0] - binary = element[1] - if not binary: + binary2 = element[1] + if not binary2: continue - uniqueness_key = get_uniqueness_key(binary) + uniqueness_key = get_uniqueness_key(filename2, binary2) if uniqueness_key not in already_seen: parent_exe_dirname = None - if binary.format == EXE_FORMATS.PE: + if binary2.format == EXE_FORMATS.PE: tmp_filename = filename2 while tmp_filename: if ( @@ -527,17 +527,17 @@ def inspect_linkages_lief( if ".pyd" in filename2 or (os.sep + "DLLs" + os.sep) in filename2: parent_exe_dirname = envroot.replace(os.sep, "/") + "/DLLs" rpaths_by_binary[filename2] = get_rpaths( - binary, parent_exe_dirname, envroot.replace(os.sep, "/"), sysroot + binary2, parent_exe_dirname, envroot.replace(os.sep, "/"), sysroot ) tmp_filename = filename2 rpaths_transitive = [] - if binary.format == EXE_FORMATS.PE: + if binary2.format == EXE_FORMATS.PE: rpaths_transitive = rpaths_by_binary[tmp_filename] else: while tmp_filename: rpaths_transitive[:0] = rpaths_by_binary[tmp_filename] tmp_filename = parents_by_filename[tmp_filename] - libraries = get_libraries(binary) + libraries = get_libraries(binary2) if filename2 in libraries: # Happens on macOS, leading to cycles. libraries.remove(filename2) # RPATH is implicit everywhere except macOS, make it explicit to simplify things. @@ -546,14 +546,14 @@ def inspect_linkages_lief( "$RPATH/" + lib if not lib.startswith("/") and not lib.startswith("$") - and binary.format != EXE_FORMATS.MACHO # noqa + and binary2.format != EXE_FORMATS.MACHO # noqa else lib ) for lib in libraries ] for lib, orig in zip(libraries, these_orig): resolved = _get_resolved_location( - binary, + binary2, orig, exedir, exedir, @@ -568,7 +568,7 @@ def inspect_linkages_lief( # can be run case-sensitively if the user wishes. # """ - if binary.format == EXE_FORMATS.PE: + if binary2.format == EXE_FORMATS.PE: import random path_fixed = ( os.path.dirname(path_fixed) @@ -596,7 +596,7 @@ def inspect_linkages_lief( if recurse: if os.path.exists(resolved[0]): todo.append([resolved[0], lief.parse(resolved[0])]) - already_seen.add(get_uniqueness_key(binary)) + already_seen.add(uniqueness_key) return results diff --git a/news/5348-fix-excessive-memory-use-in-inspect_linkages_lief b/news/5348-fix-excessive-memory-use-in-inspect_linkages_lief new file mode 100644 index 0000000000..0c6184dcbe --- /dev/null +++ b/news/5348-fix-excessive-memory-use-in-inspect_linkages_lief @@ -0,0 +1,19 @@ +### Enhancements + +* + +### Bug fixes + +* Fix excessive memory use in `inspect_linkages_lief` (#5267 via #5348) + +### Deprecations + +* + +### Docs + +* + +### Other + +* From 773304327dc8f281be6e97375596dbdea70330f1 Mon Sep 17 00:00:00 2001 From: Ken Odegard Date: Thu, 23 May 2024 16:00:11 -0500 Subject: [PATCH 2/3] Fix `frozendict` usage (#5345) * Revise toposort to avoid returning frozendict which may potentially need to be modified when `outputs/files` are defined. * Add minimal reproducing test. * Add type annotations. * Deprecate conda_build.metadata.toposort in favor of conda_build.metadata._toposort_outputs. * Deprecate conda_build.metadata.check_circular_dependencies in favor of conda_build.metadata._check_circular_dependencies. --- conda_build/metadata.py | 148 +++++++++++++++--- news/5345-fix-frozendict | 20 +++ tests/test-recipes/metadata/gh-5342/meta.yaml | 15 ++ 3 files changed, 160 insertions(+), 23 deletions(-) create mode 100644 news/5345-fix-frozendict create mode 100644 tests/test-recipes/metadata/gh-5342/meta.yaml diff --git a/conda_build/metadata.py b/conda_build/metadata.py index 2552682840..1c51246264 100644 --- a/conda_build/metadata.py +++ b/conda_build/metadata.py @@ -23,6 +23,7 @@ from . import exceptions, utils from .config import Config, get_or_merge_config +from .deprecations import deprecated from .features import feature_list from .license_family import ensure_valid_license_family from .utils import ( @@ -45,7 +46,10 @@ ) if TYPE_CHECKING: - from typing import Any, Literal + from typing import Any, Literal, Self + + OutputDict = dict[str, Any] + OutputTuple = tuple[OutputDict, "MetaData"] try: import yaml @@ -408,7 +412,17 @@ def _get_all_dependencies(metadata, envs=("host", "build", "run")): return reqs -def check_circular_dependencies(render_order, config=None): +@deprecated( + "24.5.1", + "24.7.0", + addendum="Use `conda_build.metadata._check_circular_dependencies` instead.", +) +def check_circular_dependencies( + render_order: dict[dict[str, Any], MetaData], + config: Config | None = None, +): + # deprecated since the input type (render_order) changed + envs: tuple[str, ...] if config and config.host_subdir != config.build_subdir: # When cross compiling build dependencies are already built # and cannot come from the recipe as subpackages @@ -433,6 +447,39 @@ def check_circular_dependencies(render_order, config=None): raise exceptions.RecipeError(error) +def _check_circular_dependencies( + render_order: list[OutputTuple], + config: Config | None = None, +) -> None: + envs: tuple[str, ...] + if config and config.host_subdir != config.build_subdir: + # When cross compiling build dependencies are already built + # and cannot come from the recipe as subpackages + envs = ("host", "run") + else: + envs = ("build", "host", "run") + + pairs: list[tuple[str, str]] = [] + for idx, (_, metadata) in enumerate(render_order): + name = metadata.name() + for _, other_metadata in render_order[idx + 1 :]: + other_name = other_metadata.name() + if any( + name == dep.split(" ")[0] + for dep in _get_all_dependencies(other_metadata, envs=envs) + ) and any( + other_name == dep.split(" ")[0] + for dep in _get_all_dependencies(metadata, envs=envs) + ): + pairs.append((name, other_name)) + + if pairs: + error = "Circular dependencies in recipe: \n" + for pair in pairs: + error += " {} <-> {}\n".format(*pair) + raise exceptions.RecipeError(error) + + def _variants_equal(metadata, output_metadata): match = True for key, val in metadata.config.variant.items(): @@ -846,14 +893,13 @@ def _get_dependencies_from_environment(env_name_or_path): return {"requirements": {"build": bootstrap_requirements}} -def toposort(output_metadata_map): - """This function is used to work out the order to run the install scripts - for split packages based on any interdependencies. The result is just - a re-ordering of outputs such that we can run them in that order and - reset the initial set of files in the install prefix after each. This - will naturally lead to non-overlapping files in each package and also - the correct files being present during the install and test procedures, - provided they are run in this order.""" +@deprecated( + "24.5.1", + "24.7.0", + addendum="Use `conda_build.metadata.toposort_outputs` instead.", +) +def toposort(output_metadata_map: dict[OutputDict, MetaData]): + # deprecated since input type (output_metadata_map) and output changed from conda.common.toposort import _toposort # We only care about the conda packages built by this recipe. Non-conda @@ -863,9 +909,9 @@ def toposort(output_metadata_map): for output_d in output_metadata_map if output_d.get("type", "conda").startswith("conda") ] - topodict = dict() - order = dict() - endorder = set() + topodict: dict[str, set[str]] = dict() + order: dict[str, int] = dict() + endorder: set[int] = set() for idx, (output_d, output_m) in enumerate(output_metadata_map.items()): if output_d.get("type", "conda").startswith("conda"): @@ -907,6 +953,63 @@ def toposort(output_metadata_map): return result +def _toposort_outputs(output_tuples: list[OutputTuple]) -> list[OutputTuple]: + """This function is used to work out the order to run the install scripts + for split packages based on any interdependencies. The result is just + a re-ordering of outputs such that we can run them in that order and + reset the initial set of files in the install prefix after each. This + will naturally lead to non-overlapping files in each package and also + the correct files being present during the install and test procedures, + provided they are run in this order.""" + from conda.common.toposort import _toposort + + # We only care about the conda packages built by this recipe. Non-conda + # packages get sorted to the end. + conda_outputs: dict[str, list[OutputTuple]] = {} + non_conda_outputs: list[OutputTuple] = [] + for output_tuple in output_tuples: + output_d, _ = output_tuple + if output_d.get("type", "conda").startswith("conda"): + # conda packages must have a name + # the same package name may be seen multiple times (variants) + conda_outputs.setdefault(output_d["name"], []).append(output_tuple) + elif "name" in output_d: + non_conda_outputs.append(output_tuple) + else: + # TODO: is it even possible to get here? and if so should we silently ignore or error? + utils.get_logger(__name__).warn("Found an output without a name, skipping") + + # Iterate over conda packages, creating a mapping of package names to their + # dependencies to be used in toposort + name_to_dependencies: dict[str, set[str]] = {} + for name, same_name_outputs in conda_outputs.items(): + for output_d, output_metadata in same_name_outputs: + # dependencies for all of the variants + dependencies = ( + *output_metadata.get_value("requirements/run", []), + *output_metadata.get_value("requirements/host", []), + *( + output_metadata.get_value("requirements/build", []) + if not output_metadata.is_cross + else [] + ), + ) + name_to_dependencies.setdefault(name, set()).update( + dependency_name + for dependency in dependencies + if (dependency_name := dependency.split(" ")[0]) in conda_outputs + ) + + return [ + *( + output + for name in _toposort(name_to_dependencies) + for output in conda_outputs[name] + ), + *non_conda_outputs, + ] + + def get_output_dicts_from_metadata( metadata: MetaData, outputs: list[dict[str, Any]] | None = None, @@ -2268,7 +2371,7 @@ def validate_features(self): "character in your recipe." ) - def copy(self): + def copy(self: Self) -> MetaData: new = copy.copy(self) new.config = self.config.copy() new.config.variant = copy.deepcopy(self.config.variant) @@ -2520,10 +2623,10 @@ def get_output_metadata_set( permit_undefined_jinja: bool = False, permit_unsatisfiable_variants: bool = False, bypass_env_check: bool = False, - ) -> list[tuple[dict[str, Any], MetaData]]: + ) -> list[OutputTuple]: from .source import provide - out_metadata_map = {} + output_tuples: list[OutputTuple] = [] if self.final: outputs = get_output_dicts_from_metadata(self) output_tuples = [(outputs[0], self)] @@ -2579,27 +2682,26 @@ def get_output_metadata_set( } ), ] = (out, out_metadata) - out_metadata_map[deepfreeze(out)] = out_metadata + output_tuples.append((out, out_metadata)) ref_metadata.other_outputs = out_metadata.other_outputs = ( all_output_metadata ) except SystemExit: if not permit_undefined_jinja: raise - out_metadata_map = {} + output_tuples = [] - assert out_metadata_map, ( + assert output_tuples, ( "Error: output metadata set is empty. Please file an issue" " on the conda-build tracker at https://github.com/conda/conda-build/issues" ) - # format here is {output_dict: metadata_object} - render_order = toposort(out_metadata_map) - check_circular_dependencies(render_order, config=self.config) + render_order: list[OutputTuple] = _toposort_outputs(output_tuples) + _check_circular_dependencies(render_order, config=self.config) conda_packages = OrderedDict() non_conda_packages = [] - for output_d, m in render_order.items(): + for output_d, m in render_order: if not output_d.get("type") or output_d["type"] in ( "conda", "conda_v2", diff --git a/news/5345-fix-frozendict b/news/5345-fix-frozendict new file mode 100644 index 0000000000..d4a7f8c324 --- /dev/null +++ b/news/5345-fix-frozendict @@ -0,0 +1,20 @@ +### Enhancements + +* + +### Bug fixes + +* Fix issue with modifying a `frozendict` when specifying `outputs/files` in `meta.yaml`. (#5342 via #5345) + +### Deprecations + +* Mark `conda_build.metadata.toposort` as deprecated. Use `conda_build.metadata.toposort_outputs` instead. (#5342 via #5345) +* Mark `conda_build.metadata.check_circular_dependencies` as deprecated. Use `conda_build.metadata._check_circular_dependencies` instead. (#5342 via #5345) + +### Docs + +* + +### Other + +* diff --git a/tests/test-recipes/metadata/gh-5342/meta.yaml b/tests/test-recipes/metadata/gh-5342/meta.yaml new file mode 100644 index 0000000000..f083f1c95e --- /dev/null +++ b/tests/test-recipes/metadata/gh-5342/meta.yaml @@ -0,0 +1,15 @@ +{% set name = "gh-5342" %} + +package: + name: {{ name }} + version: 1.0 + +outputs: + - name: {{ name }} + build: + skip: true + + - name: {{ name }}-dev + build: + files: + - file From 4818581fcfe12ba2efb090e3da1afc677308c8fd Mon Sep 17 00:00:00 2001 From: Bianca Henderson Date: Thu, 23 May 2024 20:24:37 -0400 Subject: [PATCH 3/3] Changelog 24.5.1 (#5357) * Update .authors.yml * Update news * Updated authorship for 24.5.1 * Updated CHANGELOG for 24.5.1 * Update CHANGELOG.md --- .authors.yml | 4 ++-- CHANGELOG.md | 20 +++++++++++++++++++ news/5345-fix-frozendict | 20 ------------------- ...essive-memory-use-in-inspect_linkages_lief | 19 ------------------ 4 files changed, 22 insertions(+), 41 deletions(-) delete mode 100644 news/5345-fix-frozendict delete mode 100644 news/5348-fix-excessive-memory-use-in-inspect_linkages_lief diff --git a/.authors.yml b/.authors.yml index 6fcf901b4b..db03794b80 100644 --- a/.authors.yml +++ b/.authors.yml @@ -612,7 +612,7 @@ first_commit: 2015-08-30 06:44:37 - name: Marcel Bargull email: marcel.bargull@udo.edu - num_commits: 87 + num_commits: 88 first_commit: 2016-09-26 11:45:54 github: mbargull alternate_emails: @@ -1202,7 +1202,7 @@ alternate_emails: - clee@anaconda.com - name: Ken Odegard - num_commits: 203 + num_commits: 204 email: kodegard@anaconda.com first_commit: 2020-09-08 19:53:41 github: kenodegard diff --git a/CHANGELOG.md b/CHANGELOG.md index 44279be551..8c2a863ce4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ [//]: # (current developments) +## 24.5.1 (2024-05-23) + +### Bug fixes + +* Fix issue with modifying a `frozendict` when specifying `outputs/files` in `meta.yaml`. (#5342 via #5345) +* Fix excessive memory use in `inspect_linkages_lief`. (#5267 via #5348) + +### Deprecations + +* Mark `conda_build.metadata.toposort` as deprecated. Use `conda_build.metadata.toposort_outputs` instead. (#5342 via #5345) +* Mark `conda_build.metadata.check_circular_dependencies` as deprecated. Use `conda_build.metadata._check_circular_dependencies` instead. (#5342 via #5345) + +### Contributors + +* @beeankha +* @kenodegard +* @mbargull + + + ## 24.5.0 (2024-05-06) ### Enhancements diff --git a/news/5345-fix-frozendict b/news/5345-fix-frozendict deleted file mode 100644 index d4a7f8c324..0000000000 --- a/news/5345-fix-frozendict +++ /dev/null @@ -1,20 +0,0 @@ -### Enhancements - -* - -### Bug fixes - -* Fix issue with modifying a `frozendict` when specifying `outputs/files` in `meta.yaml`. (#5342 via #5345) - -### Deprecations - -* Mark `conda_build.metadata.toposort` as deprecated. Use `conda_build.metadata.toposort_outputs` instead. (#5342 via #5345) -* Mark `conda_build.metadata.check_circular_dependencies` as deprecated. Use `conda_build.metadata._check_circular_dependencies` instead. (#5342 via #5345) - -### Docs - -* - -### Other - -* diff --git a/news/5348-fix-excessive-memory-use-in-inspect_linkages_lief b/news/5348-fix-excessive-memory-use-in-inspect_linkages_lief deleted file mode 100644 index 0c6184dcbe..0000000000 --- a/news/5348-fix-excessive-memory-use-in-inspect_linkages_lief +++ /dev/null @@ -1,19 +0,0 @@ -### Enhancements - -* - -### Bug fixes - -* Fix excessive memory use in `inspect_linkages_lief` (#5267 via #5348) - -### Deprecations - -* - -### Docs - -* - -### Other - -*