From 9daa03c606307be44c8c497ccd9f327f3e816b37 Mon Sep 17 00:00:00 2001 From: Rovanion Luckey Date: Wed, 19 Apr 2023 14:20:56 +0200 Subject: [PATCH] Build reproducible tarballs from git --- easybuild/tools/filetools.py | 14 +++++++++----- test/framework/filetools.py | 30 ++++++++++++++++++++++++------ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index daa143b46c..803c55f713 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -2707,11 +2707,15 @@ def get_source_tarball_from_git(filename, targetdir, git_config): for cmd in cmds: run.run_cmd(cmd, log_all=True, simple=True, regexp=False, path=repo_name) - # create an archive and delete the git repo directory - if keep_git_dir: - tar_cmd = ['tar', 'cfvz', targetpath, repo_name] - else: - tar_cmd = ['tar', 'cfvz', targetpath, '--exclude', '.git', repo_name] + # When CentOS 7 is phased out and tar>1.28 is everywhere, replace find-sort-pipe with tar-flag + # '--sort=name' and place LC_ALL in front of tar. Also remove flags --null, --no-recursion, and + # --files-from - from the flags to tar. See https://reproducible-builds.org/docs/archives/ + tar_cmd = ['find', repo_name, '-print0', '-path \'*/.git\' -prune' if not keep_git_dir else '', '|', + 'LC_ALL=C', 'sort', '--zero-terminated', '|', + 'GZIP=--no-name', 'tar', '--create', '--file', targetpath, '--no-recursion', + '--gzip', '--mtime="1970-01-01 00:00Z"', '--owner=0', '--group=0', + '--numeric-owner', '--format=gnu', '--null', + '--no-recursion', '--files-from -'] run.run_cmd(' '.join(tar_cmd), log_all=True, simple=True, regexp=False) # cleanup (repo_name dir does not exist in dry run mode) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 32d72c7b83..6fd86c322f 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2784,7 +2784,10 @@ def run_check(): expected = '\n'.join([ r' running command "git clone --depth 1 --branch tag_for_tests %(git_repo)s"', r" \(in .*/tmp.*\)", - r' running command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + r' running command "find testrepository -print0 -path \'*/.git\' -prune | LC_ALL=C sort --zero-terminated' + rf" | GZIP=--no-name tar --create --file {self.test_prefix}/target/test.tar.gz --no-recursion" + r' --gzip --mtime="1970-01-01 00:00Z" --owner=0 --group=0 --numeric-owner --format=gnu' + r' --null --no-recursion --files-from -"', r" \(in .*/tmp.*\)", ]) % git_repo run_check() @@ -2793,7 +2796,10 @@ def run_check(): expected = '\n'.join([ r' running command "git clone --depth 1 --branch tag_for_tests %(git_repo)s test123"', r" \(in .*/tmp.*\)", - r' running command "tar cfvz .*/target/test.tar.gz --exclude .git test123"', + r' running command "find test123 -print0 -path \'*/.git\' -prune | LC_ALL=C sort --zero-terminated' + rf" | GZIP=--no-name tar --create --file {self.test_prefix}/target/test.tar.gz --no-recursion" + r' --gzip --mtime="1970-01-01 00:00Z" --owner=0 --group=0 --numeric-owner --format=gnu' + r' --null --no-recursion --files-from -"', r" \(in .*/tmp.*\)", ]) % git_repo run_check() @@ -2803,7 +2809,10 @@ def run_check(): expected = '\n'.join([ r' running command "git clone --depth 1 --branch tag_for_tests --recursive %(git_repo)s"', r" \(in .*/tmp.*\)", - r' running command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + r' running command "find testrepository -print0 -path \'*/.git\' -prune | LC_ALL=C sort --zero-terminated' + rf" | GZIP=--no-name tar --create --file {self.test_prefix}/target/test.tar.gz --no-recursion" + r' --gzip --mtime="1970-01-01 00:00Z" --owner=0 --group=0 --numeric-owner --format=gnu' + r' --null --no-recursion --files-from -"', r" \(in .*/tmp.*\)", ]) % git_repo run_check() @@ -2812,7 +2821,10 @@ def run_check(): expected = '\n'.join([ r' running command "git clone --branch tag_for_tests --recursive %(git_repo)s"', r" \(in .*/tmp.*\)", - r' running command "tar cfvz .*/target/test.tar.gz testrepository"', + r' running command "find testrepository -print0 | LC_ALL=C sort --zero-terminated | GZIP=--no-name tar' + rf" --create --file {self.test_prefix}/target/test.tar.gz --no-recursion --gzip" + r' --mtime="1970-01-01 00:00Z" --owner=0 --group=0 --numeric-owner --format=gnu --null --no-recursion' + r' --files-from -"', r" \(in .*/tmp.*\)", ]) % git_repo run_check() @@ -2825,7 +2837,10 @@ def run_check(): r" \(in .*/tmp.*\)", r' running command "git checkout 8456f86 && git submodule update --init --recursive"', r" \(in testrepository\)", - r' running command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + r' running command "find testrepository -print0 -path \'*/.git\' -prune | LC_ALL=C sort --zero-terminated' + rf" | GZIP=--no-name tar --create --file {self.test_prefix}/target/test.tar.gz --no-recursion" + r' --gzip --mtime="1970-01-01 00:00Z" --owner=0 --group=0 --numeric-owner --format=gnu' + r' --null --no-recursion --files-from -"', r" \(in .*/tmp.*\)", ]) % git_repo run_check() @@ -2836,7 +2851,10 @@ def run_check(): r" \(in .*/tmp.*\)", r' running command "git checkout 8456f86"', r" \(in testrepository\)", - r' running command "tar cfvz .*/target/test.tar.gz --exclude .git testrepository"', + r' running command "find testrepository -print0 -path \'*/.git\' -prune | LC_ALL=C sort --zero-terminated' + rf" | GZIP=--no-name tar --create --file {self.test_prefix}/target/test.tar.gz --no-recursion" + r' --gzip --mtime="1970-01-01 00:00Z" --owner=0 --group=0 --numeric-owner --format=gnu' + r' --null --no-recursion --files-from -"', r" \(in .*/tmp.*\)", ]) % git_repo run_check()