From f56581f45bfc95e5726c31a96de3b312b629a025 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Jun 2025 13:22:12 +0200 Subject: [PATCH 01/10] Enhance documentation and test of `find_base_dir` --- easybuild/tools/filetools.py | 41 +++++++++++++++++++----------------- test/framework/filetools.py | 34 +++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index fcc9149de9..dde824bdf1 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -1444,36 +1444,39 @@ def is_sha256_checksum(value): return res -def find_base_dir(): - """ - Try to locate a possible new base directory - - this is typically a single subdir, e.g. from untarring a tarball - - when extracting multiple tarballs in the same directory, - expect only the first one to give the correct path +def _get_paths_purged(path): + """Find all files and folders in the folder + + Ignore hidden entries and e.g. log directories """ - def get_local_dirs_purged(): - # e.g. always purge the log directory - # and hidden directories - ignoredirs = ["easybuild"] + IGNORED_DIRS = ["easybuild"] - lst = os.listdir(get_cwd()) - lst = [d for d in lst if not d.startswith('.') and d not in ignoredirs] - return lst + lst = os.listdir(path) + lst = [p for p in lst if not p.startswith('.') and p not in IGNORED_DIRS] + return lst - lst = get_local_dirs_purged() - new_dir = get_cwd() + +def find_base_dir(path=None): + """ + Locate a possible new base directory from the current working directory or the specified one and change to it. + + This is typically a single subdir, e.g. from untarring a tarball. + It recurses into subfolders as long as that subfolder is the only child (file or folder) + and returns the current(ly processed) folder if multiple or no childs exist in it. + """ + new_dir = get_cwd() if path is None else path + lst = _get_paths_purged(new_dir) while len(lst) == 1: - new_dir = os.path.join(get_cwd(), lst[0]) + new_dir = os.path.join(new_dir, lst[0]) if not os.path.isdir(new_dir): break - - change_dir(new_dir) - lst = get_local_dirs_purged() + lst = _get_paths_purged(new_dir) # make sure it's a directory, and not a (single) file that was in a tarball for example while not os.path.isdir(new_dir): new_dir = os.path.dirname(new_dir) + change_dir(new_dir) _log.debug("Last dir list %s" % lst) _log.debug("Possible new dir %s found" % new_dir) return new_dir diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 7e296803cc..2c62d2f6e8 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -166,11 +166,39 @@ def test_find_base_dir(self): foodir = os.path.join(tmpdir, 'foo') os.mkdir(foodir) - os.mkdir(os.path.join(tmpdir, '.bar')) - os.mkdir(os.path.join(tmpdir, 'easybuild')) - + # No files os.chdir(tmpdir) self.assertTrue(os.path.samefile(foodir, ft.find_base_dir())) + # Uses specified path + os.chdir(self.test_prefix) + self.assertTrue(os.path.samefile(foodir, ft.find_base_dir(tmpdir))) + + # Only ignored files/folders + os.mkdir(os.path.join(tmpdir, '.bar')) + os.mkdir(os.path.join(tmpdir, 'easybuild')) + self.assertTrue(os.path.samefile(foodir, ft.find_base_dir(tmpdir))) + + # Subfolder + bardir = os.path.join(foodir, 'bar') + os.mkdir(bardir) + self.assertTrue(os.path.samefile(bardir, ft.find_base_dir(tmpdir))) + # With ignored folder in subfolder + os.mkdir(os.path.join(bardir, '.bar')) + self.assertTrue(os.path.samefile(bardir, ft.find_base_dir(tmpdir))) + + # Test recursiveness + subdir = os.path.join(bardir, 'sub') + os.mkdir(subdir) + self.assertTrue(os.path.samefile(subdir, ft.find_base_dir(tmpdir))) + # Only file(s) in subfolder + ft.write_file(os.path.join(subdir, 'src.c'), 'code') + self.assertTrue(os.path.samefile(subdir, ft.find_base_dir(tmpdir))) + ft.write_file(os.path.join(subdir, 'src2.c'), 'code') + self.assertTrue(os.path.samefile(subdir, ft.find_base_dir(tmpdir))) + + # Files and folders in subfolder + os.mkdir(os.path.join(bardir, 'subdir')) + self.assertTrue(os.path.samefile(bardir, ft.find_base_dir(tmpdir))) def test_find_glob_pattern(self): """test find_glob_pattern function""" From 15c9a4945738085027c012d6b9ef87e86efbdb0c Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Jun 2025 11:14:20 +0200 Subject: [PATCH 02/10] Enhance test_extract_file to verify returned path Create an empty target folder so the extraction can change into the extracted folder. --- test/framework/filetools.py | 44 +++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 2c62d2f6e8..80dd4d5c5e 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2506,24 +2506,26 @@ def test_extract_file(self): testdir = os.path.dirname(os.path.abspath(__file__)) toy_tarball = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'toy-0.0.tar.gz') - self.assertNotExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) + extraction_path = os.path.join(self.test_prefix, 'extraction') # New directory + toy_path = os.path.join(extraction_path, 'toy-0.0') + self.assertNotExists(os.path.join(toy_path, 'toy.source')) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=False) - self.assertExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) - self.assertTrue(os.path.samefile(path, self.test_prefix)) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertExists(os.path.join(toy_path, 'toy.source')) + self.assertTrue(os.path.samefile(path, toy_path)) # still in same directory as before if change_into_dir is set to False self.assertTrue(os.path.samefile(os.getcwd(), cwd)) - shutil.rmtree(os.path.join(path, 'toy-0.0')) + ft.remove_dir(toy_path) toy_tarball_renamed = os.path.join(self.test_prefix, 'toy_tarball') shutil.copyfile(toy_tarball, toy_tarball_renamed) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball_renamed, self.test_prefix, cmd="tar xfvz %s", change_into_dir=False) + path = ft.extract_file(toy_tarball_renamed, extraction_path, cmd="tar xfvz %s", change_into_dir=False) self.assertTrue(os.path.samefile(os.getcwd(), cwd)) - self.assertExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) - self.assertTrue(os.path.samefile(path, self.test_prefix)) - shutil.rmtree(os.path.join(path, 'toy-0.0')) + self.assertExists(os.path.join(toy_path, 'toy.source')) + self.assertTrue(os.path.samefile(path, toy_path)) + ft.remove_dir(toy_path) # also test behaviour of extract_file under --dry-run build_options = { @@ -2533,42 +2535,42 @@ def test_extract_file(self): init_config(build_options=build_options) self.mock_stdout(True) - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=False) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) txt = self.get_stdout() self.mock_stdout(False) self.assertTrue(os.path.samefile(os.getcwd(), cwd)) - self.assertTrue(os.path.samefile(path, self.test_prefix)) - self.assertNotExists(os.path.join(self.test_prefix, 'toy-0.0')) + self.assertTrue(os.path.samefile(path, extraction_path)) + self.assertNotExists(toy_path) self.assertTrue(re.search('running shell command "tar xzf .*/toy-0.0.tar.gz"', txt)) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, forced=True, change_into_dir=False) - self.assertExists(os.path.join(self.test_prefix, 'toy-0.0', 'toy.source')) - self.assertTrue(os.path.samefile(path, self.test_prefix)) + path = ft.extract_file(toy_tarball, extraction_path, forced=True, change_into_dir=False) + self.assertExists(os.path.join(toy_path, 'toy.source')) + self.assertTrue(os.path.samefile(path, toy_path)) self.assertTrue(os.path.samefile(os.getcwd(), cwd)) build_options['extended_dry_run'] = False init_config(build_options=build_options) - ft.remove_dir(os.path.join(self.test_prefix, 'toy-0.0')) + ft.remove_dir(toy_path) ft.change_dir(cwd) - self.assertFalse(os.path.samefile(os.getcwd(), self.test_prefix)) + self.assertFalse(os.path.samefile(os.getcwd(), extraction_path)) with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=True) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=True) stdout = self.get_stdout() stderr = self.get_stderr() - self.assertTrue(os.path.samefile(path, self.test_prefix)) - self.assertTrue(os.path.samefile(os.getcwd(), self.test_prefix)) + self.assertTrue(os.path.samefile(path, toy_path)) + self.assertTrue(os.path.samefile(os.getcwd(), toy_path)) self.assertFalse(stderr) self.assertTrue("running shell command" in stdout) # check whether disabling trace output works with self.mocked_stdout_stderr(): - path = ft.extract_file(toy_tarball, self.test_prefix, change_into_dir=True, trace=False) + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=True, trace=False) stdout = self.get_stdout() stderr = self.get_stderr() self.assertFalse(stderr) From 45934a39eee7db487c9e2e7de7f4d48450914d4a Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Jun 2025 12:44:10 +0200 Subject: [PATCH 03/10] Add test for `extract_file` of archive with multiple folders --- test/framework/filetools.py | 58 +++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 80dd4d5c5e..534708994a 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -40,6 +40,7 @@ import shutil import stat import sys +import tarfile import tempfile import textwrap import time @@ -2499,6 +2500,14 @@ def test_change_dir(self): foo = os.path.join(self.test_prefix, 'foo') self.assertErrorRegex(EasyBuildError, "Failed to change from .* to %s" % foo, ft.change_dir, foo) + def create_new_tarball(self, folder): + """Create new tarball with contents of folder and return path""" + tarball = tempfile.mktemp(suffix='.tar.gz') + with tarfile.open(tarball, "w:gz") as tar: + for name in glob.glob(os.path.join(folder, '*')): + tar.add(name, arcname=os.path.basename(name)) + return tarball + def test_extract_file(self): """Test extract_file""" cwd = os.getcwd() @@ -2576,6 +2585,50 @@ def test_extract_file(self): self.assertFalse(stderr) self.assertFalse(stdout) + # Test tarball with multiple folders + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'multi-1.0')) + ft.write_file(os.path.join(test_src, 'multi-1.0', 'src.c'), 'content') + ft.mkdir(os.path.join(test_src, 'multi-bonus')) + ft.write_file(os.path.join(test_src, 'multi-bonus', 'src.c'), 'content') + test_tarball = self.create_new_tarball(test_src) + # Start fresh + ft.remove_dir(extraction_path) + ft.change_dir(cwd) + with self.mocked_stdout_stderr(): + path = ft.extract_file(test_tarball, extraction_path, change_into_dir=True) + self.assertTrue(os.path.samefile(path, extraction_path)) + self.assertTrue(os.path.samefile(os.getcwd(), extraction_path)) # NOT a subfolder + self.assertExists(os.path.join(extraction_path, 'multi-1.0')) + self.assertExists(os.path.join(extraction_path, 'multi-bonus')) + + # Extract multiple files with single folder to same folder, and file only + bar_tarball = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'extensions', 'bar-0.0.tar.gz') + patch_tarball = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'toy-0.0_gzip.patch.gz') + ft.remove_dir(extraction_path) + ft.change_dir(cwd) + with self.mocked_stdout_stderr(): + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, toy_path)) + path = ft.extract_file(bar_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, os.path.join(extraction_path, 'bar-0.0'))) + # Contains no folder + path = ft.extract_file(patch_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + + # Folder and file + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'multi-1.0')) + ft.write_file(os.path.join(test_src, 'multi-1.0', 'src.c'), 'content') + ft.write_file(os.path.join(test_src, 'main.c'), 'content') + test_tarball = self.create_new_tarball(test_src) + # When there is only a file or a file next to the folder the parent dir is returned + for tarball in (patch_tarball, test_tarball): + ft.remove_dir(extraction_path) + with self.mocked_stdout_stderr(): + path = ft.extract_file(tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + def test_empty_dir(self): """Test empty_dir function""" test_dir = os.path.join(self.test_prefix, 'test123') @@ -2608,8 +2661,7 @@ def test_empty_dir(self): txt = self.get_stdout() self.mock_stdout(False) - regex = re.compile("^directory [^ ]* emptied$") - self.assertTrue(regex.match(txt), f"Pattern '{regex.pattern}' found in: {txt}") + self.assertRegEx(txt, "^directory [^ ]* emptied$") def test_remove(self): """Test remove_file, remove_dir and join remove functions.""" @@ -2912,7 +2964,7 @@ def test_search_file(self): # to avoid accidental matches in other files already present (log files, etc.) ec_dir = tempfile.mkdtemp() test_ec = os.path.join(ec_dir, 'netCDF-C++-4.2-foss-2019a.eb') - ft.write_file(test_ec, ''), + ft.write_file(test_ec, '') for pattern in ['netCDF-C++', 'CDF', 'C++', '^netCDF']: var_defs, hits = ft.search_file([ec_dir], pattern, terse=True, filename_only=True) self.assertEqual(var_defs, [], msg='For pattern ' + pattern) From dc4dd5e5d9a933d60c290968070412ec6ed7f744 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Jun 2025 13:47:21 +0200 Subject: [PATCH 04/10] Correctly detect final path when extracting multiple tarballs The current implementation of `extract_file` detected folders/files from the first tarball when extracting the second. Due to the definition of `find_base_dir` it will then return the parent path (usually `builddir`) which is used as the `finalpath` for sources. This leads issues requiring workarounds in e.g. the Bundle easyblock where sources from all components are extracted into the same folder. Fix by storing the old state of the target folder and detect of extraction resulted in a single (top-level) folder. --- easybuild/tools/filetools.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index dde824bdf1..2e0258eeb2 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -537,10 +537,19 @@ def extract_file(fn, dest, cmd=None, extra_options=None, overwrite=False, forced if extra_options: cmd = f"{cmd} {extra_options}" + previous_paths = set(_get_paths_purged(abs_dest)) run_shell_cmd(cmd, in_dry_run=forced, hidden=not trace) - - # note: find_base_dir also changes into the base dir! - base_dir = find_base_dir() + new_paths = set(_get_paths_purged(abs_dest)) - previous_paths + if len(new_paths) == 1: + new_path = new_paths.pop() + if os.path.isdir(new_path): + # note: find_base_dir also changes into the base dir! + base_dir = find_base_dir(new_path) + else: + base_dir = abs_dest + else: + base_dir = abs_dest + _log.debug(f"Multiple new folder/files found after extraction: {new_paths}. Using {base_dir} as base dir.") # if changing into obtained directory is not desired, # change back to where we came from (unless that was a non-existing directory) @@ -1464,7 +1473,7 @@ def find_base_dir(path=None): It recurses into subfolders as long as that subfolder is the only child (file or folder) and returns the current(ly processed) folder if multiple or no childs exist in it. """ - new_dir = get_cwd() if path is None else path + new_dir = get_cwd() if path is None else os.path.abspath(path) lst = _get_paths_purged(new_dir) while len(lst) == 1: new_dir = os.path.join(new_dir, lst[0]) From 67ae64fa988d4b440e988285e7bba73bdbfd8f92 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Jun 2025 14:20:38 +0200 Subject: [PATCH 05/10] Fix download of repo in *-from-pr Adapt for `extract_dir` now returning the subfolder which it previously did not because the downloaded tar file is in the same folder. --- easybuild/tools/github.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/easybuild/tools/github.py b/easybuild/tools/github.py index fa6e5519eb..9f4540678e 100644 --- a/easybuild/tools/github.py +++ b/easybuild/tools/github.py @@ -430,8 +430,10 @@ def download_repo(repo=GITHUB_EASYCONFIGS_REPO, branch=None, commit=None, accoun else: _log.debug("%s downloaded to %s, extracting now", base_name, path) - base_dir = extract_file(target_path, path, forced=True, change_into_dir=False, trace=False) - extracted_path = os.path.join(base_dir, extracted_dir_name) + extracted_path = extract_file(target_path, path, forced=True, change_into_dir=False, trace=False) + if extracted_path != expected_path: + raise EasyBuildError(f"Unexpected directory '{extracted_path} for extracted repo. Expected: {expected_path}", + exit_code=EasyBuildExit.FAIL_EXTRACT) # check if extracted_path exists if not os.path.isdir(extracted_path): From e23ec0d2895f3d9fa48660cda1115a9114ecf94d Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Fri, 13 Jun 2025 15:14:08 +0200 Subject: [PATCH 06/10] Adapt test_apply_patch --- test/framework/filetools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 534708994a..c17f5f4585 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -1894,7 +1894,7 @@ def test_apply_patch(self): for with_backup in (True, False): update_build_option('backup_patched_files', with_backup) self.assertTrue(ft.apply_patch(toy_patch, path)) - src_file = os.path.join(path, 'toy-0.0', 'toy.source') + src_file = os.path.join(path, 'toy.source') backup_file = src_file + '.orig' patched = ft.read_file(src_file) pattern = "I'm a toy, and very proud of it" @@ -1911,7 +1911,7 @@ def test_apply_patch(self): toy_patch_gz = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'toy-0.0_gzip.patch.gz') with self.mocked_stdout_stderr(): self.assertTrue(ft.apply_patch(toy_patch_gz, path)) - patched_gz = ft.read_file(os.path.join(path, 'toy-0.0', 'toy.source')) + patched_gz = ft.read_file(os.path.join(path, 'toy.source')) pattern = "I'm a toy, and very very proud of it" self.assertIn(pattern, patched_gz) @@ -1922,7 +1922,7 @@ def test_apply_patch(self): with self.mocked_stdout_stderr(): ft.apply_patch(toy_patch_gz, path, options=' --reverse') # Change was really removed - self.assertNotIn(pattern, ft.read_file(os.path.join(path, 'toy-0.0', 'toy.source'))) + self.assertNotIn(pattern, ft.read_file(os.path.join(path, 'toy.source'))) # test copying of files, both to an existing directory and a non-existing location test_file = os.path.join(self.test_prefix, 'foo.txt') From 3106d86174b68c9d6e83ad0c98016a6c7e7deeb2 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 18 Jun 2025 10:34:05 +0200 Subject: [PATCH 07/10] Remove uses for binaries from test_extract_file --- test/framework/filetools.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index c17f5f4585..86ebba0279 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2500,9 +2500,12 @@ def test_change_dir(self): foo = os.path.join(self.test_prefix, 'foo') self.assertErrorRegex(EasyBuildError, "Failed to change from .* to %s" % foo, ft.change_dir, foo) - def create_new_tarball(self, folder): + def create_new_tarball(self, folder, filename=None): """Create new tarball with contents of folder and return path""" - tarball = tempfile.mktemp(suffix='.tar.gz') + if filename is None: + tarball = tempfile.mktemp(suffix='.tar.gz') + else: + tarball = os.path.join(tempfile.mkdtemp(), filename) with tarfile.open(tarball, "w:gz") as tar: for name in glob.glob(os.path.join(folder, '*')): tar.add(name, arcname=os.path.basename(name)) @@ -2512,8 +2515,10 @@ def test_extract_file(self): """Test extract_file""" cwd = os.getcwd() - testdir = os.path.dirname(os.path.abspath(__file__)) - toy_tarball = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'toy-0.0.tar.gz') + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'toy-0.0')) + ft.write_file(os.path.join(test_src, 'toy-0.0', 'toy.source'), 'content') + toy_tarball = self.create_new_tarball(test_src, filename='toy-0.0.tar.gz') extraction_path = os.path.join(self.test_prefix, 'extraction') # New directory toy_path = os.path.join(extraction_path, 'toy-0.0') @@ -2603,8 +2608,15 @@ def test_extract_file(self): self.assertExists(os.path.join(extraction_path, 'multi-bonus')) # Extract multiple files with single folder to same folder, and file only - bar_tarball = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'extensions', 'bar-0.0.tar.gz') - patch_tarball = os.path.join(testdir, 'sandbox', 'sources', 'toy', 'toy-0.0_gzip.patch.gz') + test_src = tempfile.mkdtemp() + ft.mkdir(os.path.join(test_src, 'bar-0.0')) + ft.write_file(os.path.join(test_src, 'bar-0.0', 'bar.source'), 'content') + bar_tarball = self.create_new_tarball(test_src) + + test_src = tempfile.mkdtemp() + ft.write_file(os.path.join(test_src, 'main.source'), 'content') + file_tarball = self.create_new_tarball(test_src) + ft.remove_dir(extraction_path) ft.change_dir(cwd) with self.mocked_stdout_stderr(): @@ -2613,7 +2625,7 @@ def test_extract_file(self): path = ft.extract_file(bar_tarball, extraction_path, change_into_dir=False) self.assertTrue(os.path.samefile(path, os.path.join(extraction_path, 'bar-0.0'))) # Contains no folder - path = ft.extract_file(patch_tarball, extraction_path, change_into_dir=False) + path = ft.extract_file(file_tarball, extraction_path, change_into_dir=False) self.assertTrue(os.path.samefile(path, extraction_path)) # Folder and file @@ -2623,7 +2635,7 @@ def test_extract_file(self): ft.write_file(os.path.join(test_src, 'main.c'), 'content') test_tarball = self.create_new_tarball(test_src) # When there is only a file or a file next to the folder the parent dir is returned - for tarball in (patch_tarball, test_tarball): + for tarball in (file_tarball, test_tarball): ft.remove_dir(extraction_path) with self.mocked_stdout_stderr(): path = ft.extract_file(tarball, extraction_path, change_into_dir=False) From 6ccb1f2edb48e46e1dcb4cbdfabbff803c6c16ac Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 17 Sep 2025 16:21:38 +0200 Subject: [PATCH 08/10] Test extracting just a file --- test/framework/filetools.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 86ebba0279..ecc38e7d1f 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2624,7 +2624,12 @@ def test_extract_file(self): self.assertTrue(os.path.samefile(path, toy_path)) path = ft.extract_file(bar_tarball, extraction_path, change_into_dir=False) self.assertTrue(os.path.samefile(path, os.path.join(extraction_path, 'bar-0.0'))) - # Contains no folder + + # Contains just a file + path = ft.extract_file(file_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + # Same behavior when only a file is extracted + ft.remove_dir(extraction_path) path = ft.extract_file(file_tarball, extraction_path, change_into_dir=False) self.assertTrue(os.path.samefile(path, extraction_path)) From 27c88ab64309b490b79e44c711cb48a4a6ddda38 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Wed, 17 Sep 2025 16:29:54 +0200 Subject: [PATCH 09/10] Handle extracting the same file to the same destination When a source is added multiple times the content after the 2nd extraction will be the same and no new files/folders are detected. So `extract_file` would return the parent folder. However if there was a clear result after the first extraction the same result should be returned. This works trivially until something else is extracted to the same folder between the 2 extraction operations. --- easybuild/tools/filetools.py | 7 ++++++- test/framework/filetools.py | 9 +++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/easybuild/tools/filetools.py b/easybuild/tools/filetools.py index 2e0258eeb2..d540e29660 100644 --- a/easybuild/tools/filetools.py +++ b/easybuild/tools/filetools.py @@ -540,7 +540,12 @@ def extract_file(fn, dest, cmd=None, extra_options=None, overwrite=False, forced previous_paths = set(_get_paths_purged(abs_dest)) run_shell_cmd(cmd, in_dry_run=forced, hidden=not trace) new_paths = set(_get_paths_purged(abs_dest)) - previous_paths - if len(new_paths) == 1: + if len(new_paths) == 0: + _log.warning(f"No new folder/files found after extraction of {fn} in {abs_dest}, " + f"verify that the same file is not extracted multiple times! " + f"Existing paths: {', '.join(previous_paths)}") + base_dir = find_base_dir() # Fallback + elif len(new_paths) == 1: new_path = new_paths.pop() if os.path.isdir(new_path): # note: find_base_dir also changes into the base dir! diff --git a/test/framework/filetools.py b/test/framework/filetools.py index ecc38e7d1f..5940866e63 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2622,9 +2622,18 @@ def test_extract_file(self): with self.mocked_stdout_stderr(): path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) self.assertTrue(os.path.samefile(path, toy_path)) + # Extracting the same tarball again detects the same folder + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, toy_path)) + path = ft.extract_file(bar_tarball, extraction_path, change_into_dir=False) self.assertTrue(os.path.samefile(path, os.path.join(extraction_path, 'bar-0.0'))) + # Extracting the same tarball again now can't detect the same folder as there is another one next to it. + # Should fall back to the parent dir + path = ft.extract_file(toy_tarball, extraction_path, change_into_dir=False) + self.assertTrue(os.path.samefile(path, extraction_path)) + # Contains just a file path = ft.extract_file(file_tarball, extraction_path, change_into_dir=False) self.assertTrue(os.path.samefile(path, extraction_path)) From c0b91e0a58550bfa405ed90753f9d6daf5d98268 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 7 Oct 2025 16:03:32 +0200 Subject: [PATCH 10/10] Fix typo --- test/framework/filetools.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/filetools.py b/test/framework/filetools.py index 5940866e63..b4b4a67d96 100644 --- a/test/framework/filetools.py +++ b/test/framework/filetools.py @@ -2687,7 +2687,7 @@ def test_empty_dir(self): txt = self.get_stdout() self.mock_stdout(False) - self.assertRegEx(txt, "^directory [^ ]* emptied$") + self.assertRegex(txt, "^directory [^ ]* emptied$") def test_remove(self): """Test remove_file, remove_dir and join remove functions."""