From f10761e7abdbedd9db1550e233538e91ade08760 Mon Sep 17 00:00:00 2001 From: Martin Basti Date: Thu, 2 Jan 2025 15:20:56 +0100 Subject: [PATCH] feat(cachi2): add support for git-submodules for git-submodules pkg manager, OSBS msut hadnle all the work, cachi2 doesn't manipuilate git. Submodules has to be cloned and metadata exported into SBOM and request.json Signed-off-by: Martin Basti --- atomic_reactor/constants.py | 2 + atomic_reactor/plugins/cachi2_init.py | 10 +++ atomic_reactor/plugins/cachi2_postprocess.py | 44 ++++++++- atomic_reactor/utils/cachi2.py | 94 ++++++++++++++++++++ atomic_reactor/utils/retries.py | 8 +- requirements.in | 1 + requirements.txt | 6 ++ 7 files changed, 160 insertions(+), 5 deletions(-) diff --git a/atomic_reactor/constants.py b/atomic_reactor/constants.py index 7ae80148e..16bb1c66e 100644 --- a/atomic_reactor/constants.py +++ b/atomic_reactor/constants.py @@ -143,6 +143,8 @@ HTTP_CLIENT_STATUS_RETRY = (408, 429, 500, 502, 503, 504) # requests timeout in seconds HTTP_REQUEST_TIMEOUT = 600 +# git cmd timeout in seconds +GIT_CMD_TIMEOUT = 600 # max retries for git clone GIT_MAX_RETRIES = 3 # how many seconds should wait before another try of git clone diff --git a/atomic_reactor/plugins/cachi2_init.py b/atomic_reactor/plugins/cachi2_init.py index bf6d6b4c3..ab7f68196 100644 --- a/atomic_reactor/plugins/cachi2_init.py +++ b/atomic_reactor/plugins/cachi2_init.py @@ -28,6 +28,8 @@ from atomic_reactor.utils.cachi2 import ( remote_source_to_cachi2, clone_only, validate_paths, normalize_gomod_pkg_manager, enforce_sandbox, + has_git_submodule_manager, update_submodules, + get_submodules_sbom, get_submodules_request_json_deps, ) @@ -135,6 +137,14 @@ def process_remote_sources(self) -> List[Dict[str, Any]]: remote_source_data["ref"] ) + if has_git_submodule_manager(remote_source_data): + update_submodules(source_path_app) + git_submodules = { + "sbom": get_submodules_sbom(source_path_app), + "request_json": get_submodules_request_json_deps(source_path_app) + } + remote_source["git_submodules"] = git_submodules + enforce_sandbox(source_path_app, remove_unsafe_symlinks=False) validate_paths(source_path_app, remote_source_data.get("packages", {})) diff --git a/atomic_reactor/plugins/cachi2_postprocess.py b/atomic_reactor/plugins/cachi2_postprocess.py index e7f5faa73..67146928c 100644 --- a/atomic_reactor/plugins/cachi2_postprocess.py +++ b/atomic_reactor/plugins/cachi2_postprocess.py @@ -20,6 +20,7 @@ from atomic_reactor.constants import ( CACHITO_ENV_ARG_ALIAS, CACHITO_ENV_FILENAME, + CACHI2_BUILD_DIR, PLUGIN_CACHI2_INIT, PLUGIN_CACHI2_POSTPROCESS, REMOTE_SOURCE_DIR, @@ -110,6 +111,7 @@ def run(self) -> Optional[List[Dict[str, Any]]]: return None processed_remote_sources = self.postprocess_remote_sources() + self.postprocess_git_submodules_global_sbom() self.inject_remote_sources(processed_remote_sources) return [ @@ -117,6 +119,30 @@ def run(self) -> Optional[List[Dict[str, Any]]]: for remote_source in processed_remote_sources ] + def postprocess_git_submodules_global_sbom(self): + """atomic-reactor is responsbile for handling git-submodules. Global SBOM must be updated""" + all_sboms = [] + for remote_source in self.init_plugin_data: + git_submodules = remote_source.get('git_submodules') + if not git_submodules: + continue + + all_sboms.extend(git_submodules['sboms']) + + if not all_sboms: + return + + # TODO: deduplicate sboms? + + global_sbom_path = self.workflow.build_dir.path/CACHI2_BUILD_DIR/"bom.json" + with open(global_sbom_path, 'r') as global_sbom_f: + global_sbom_data = json.load(global_sbom_f) + global_sbom_data['components'].extend(all_sboms) + + with open(global_sbom_path, 'w') as global_sbom_f: + json.dump(global_sbom_data, global_sbom_f) + global_sbom_f.flush() + def postprocess_remote_sources(self) -> List[Cachi2RemoteSource]: """Process remote source requests and return information about the processed sources.""" @@ -132,12 +158,26 @@ def postprocess_remote_sources(self) -> List[Cachi2RemoteSource]: with open(sbom_path, 'r') as sbom_f: sbom_data = json.load(sbom_f) + # request_json must be generated before modifications to sboms are done + request_json = generate_request_json( + remote_source['remote_source'], sbom_data, json_env_data) + + # update metadata with submodules info + git_submodules = remote_source.get('git_submodules') + if git_submodules: + sbom_data['components'].extend(git_submodules['sboms']) + + with open(sbom_path, 'w') as sbom_f: + json.dump(sbom_data, sbom_f) + sbom_f.flush() + + request_json['dependencies'].extend(git_submodules['request_json']) + remote_source_obj = Cachi2RemoteSource( name=remote_source['name'], tarball_path=Path(remote_source['source_path'], 'remote-source.tar.gz'), sources_path=Path(remote_source['source_path']), - json_data=generate_request_json( - remote_source['remote_source'], sbom_data, json_env_data), + json_data=request_json, json_env_data=json_env_data, ) processed_remote_sources.append(remote_source_obj) diff --git a/atomic_reactor/utils/cachi2.py b/atomic_reactor/utils/cachi2.py index 5fdc732f2..95c885fdf 100644 --- a/atomic_reactor/utils/cachi2.py +++ b/atomic_reactor/utils/cachi2.py @@ -13,9 +13,14 @@ from typing import Any, Callable, Dict, Optional, Tuple, List from pathlib import Path import os.path +import urllib +import git from packageurl import PackageURL +from atomic_reactor import constants +from atomic_reactor.utils import retries + logger = logging.getLogger(__name__) @@ -286,4 +291,93 @@ def clone_only(remote_source: Dict[str, Any]) -> bool: if pkg_managers is not None and len(pkg_managers) == 0: return True + # only git-submodule + if pkg_managers is not None and pkg_managers == ['git-submodule']: + return True + return False + + +def has_git_submodule_manager(remote_source: Dict[str, Any]) -> bool: + """Returns true when for specific remote source git-submodule manager is requested""" + pkg_managers = remote_source.get("pkg_managers", {}) + return 'git-submodule' in pkg_managers + + +def update_submodules(repopath: Path): + """Update submodules in the given repo""" + cmd = ["git", "submodule", "update", "--init", "--filter=blob:none"] + params = { + "cwd": str(repopath), + "universal_newlines": True, + "encoding": "utf-8", + "timeout": constants.GIT_CMD_TIMEOUT, + } + retries.run_cmd(cmd, **params) + + +def get_submodules_sbom(repopath: Path) -> List[Dict]: + """Get SBOM of submodules in the specified repository""" + + def to_vcs_purl(pkg_name, repo_url, ref): + """ + Generate the vcs purl representation of the package. + + Use the most specific purl type possible, e.g. pkg:github if repo comes from + github.com. Fall back to using pkg:generic with a ?vcs_url qualifier. + + :param str pkg_name: name of package + :param str repo_url: url of git repository for package + :param str ref: git ref of package + :return: the PURL string of the Package object + :rtype: str + """ + repo_url = repo_url.rstrip("/") + parsed_url = urllib.parse.urlparse(repo_url) + + pkg_type_for_hostname = { + "github.com": "github", + "bitbucket.org": "bitbucket", + } + pkg_type = pkg_type_for_hostname.get(parsed_url.hostname, "generic") + + if pkg_type == "generic": + vcs_url = urllib.parse.quote(f"{repo_url}@{ref}", safe="") + purl = f"pkg:generic/{pkg_name}?vcs_url={vcs_url}" + else: + # pkg:github and pkg:bitbucket use the same format + namespace, repo = parsed_url.path.lstrip("/").rsplit("/", 1) + if repo.endswith(".git"): + repo = repo[: -len(".git")] + purl = f"pkg:{pkg_type}/{namespace.lower()}/{repo.lower()}@{ref}" + + return purl + + repo = git.Repo(str(repopath)) + submodules_sbom = [ + { + "type": "library", + "name": sm.name, + "version": f"{sm.url}#{sm.hexsha}", + "purl": to_vcs_purl(sm.name, sm.url, sm.hexsha) + } + for sm in repo.submodules + ] + + return submodules_sbom + + +def get_submodules_request_json_deps(repopath: Path) -> List[Dict]: + """Get dependencies for request.json from submodule""" + repo = git.Repo(str(repopath)) + submodules_request_json_dependencies = [ + { + "type": "git-submodule", + "name": sm.name, + "path": sm.name, + "version": f"{sm.url}#{sm.hexsha}", + } + for sm in repo.submodules + ] + + return submodules_request_json_dependencies diff --git a/atomic_reactor/utils/retries.py b/atomic_reactor/utils/retries.py index a46486fb7..5e843a9fe 100644 --- a/atomic_reactor/utils/retries.py +++ b/atomic_reactor/utils/retries.py @@ -89,7 +89,7 @@ def get_retrying_requests_session(client_statuses=HTTP_CLIENT_STATUS_RETRY, max_tries=SUBPROCESS_MAX_RETRIES + 1, # total tries is N retries + 1 initial attempt jitter=None, # use deterministic backoff, do not apply random jitter ) -def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes: +def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None, **params) -> bytes: """Run a subprocess command, retry on any non-zero exit status. Whenever an attempt fails, the stdout and stderr of the failed command will be logged. @@ -98,12 +98,14 @@ def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes: If a cleanup command is specified it'll be run on exception before retry. + :param params: optional params to be passed to subprocess.run function + :return: bytes, the combined stdout and stderr (if any) of the command """ logger.debug("Running %s", " ".join(cmd)) try: - process = subprocess.run(cmd, check=True, capture_output=True) + process = subprocess.run(cmd, check=True, capture_output=True, **params) except subprocess.CalledProcessError as e: logger.warning( "%s failed:\nSTDOUT:\n%s\nSTDERR:\n%s", @@ -114,7 +116,7 @@ def run_cmd(cmd: List[str], cleanup_cmd: List[str] = None) -> bytes: if cleanup_cmd: try: logger.debug("Running %s", " ".join(cleanup_cmd)) - subprocess.run(cleanup_cmd, check=True, capture_output=True) + subprocess.run(cleanup_cmd, check=True, capture_output=True, **params) except subprocess.CalledProcessError as c_e: logger.warning( "Cleanup command: %s failed:\nSTDOUT:\n%s\nSTDERR:\n%s", diff --git a/requirements.in b/requirements.in index 2377aab21..183ee4728 100644 --- a/requirements.in +++ b/requirements.in @@ -2,6 +2,7 @@ backoff dockerfile-parse>=0.0.13 flatpak-module-tools>=0.14 +gitpython jsonschema paramiko>=3.4.0 PyYAML diff --git a/requirements.txt b/requirements.txt index 3a7df5321..3ee8bb632 100644 --- a/requirements.txt +++ b/requirements.txt @@ -47,6 +47,10 @@ editables==0.5 # via hatchling flatpak-module-tools==0.14 # via -r requirements.in +gitdb==4.0.12 + # via gitpython +gitpython==3.1.44 + # via -r requirements.in googleapis-common-protos==1.60.0 # via # opentelemetry-exporter-otlp-proto-grpc @@ -176,6 +180,8 @@ six==1.16.0 # koji # osbs-client # python-dateutil +smmap==5.0.2 + # via gitdb tomli==2.0.1 # via hatchling trove-classifiers==2023.8.7