diff --git a/archive_path/common.py b/archive_path/common.py index 2d8f1d5..431e942 100644 --- a/archive_path/common.py +++ b/archive_path/common.py @@ -81,6 +81,5 @@ def match_glob(base: str, pattern: str, iterator: Iterable[str]) -> Iterable[str continue if name_parts[:at_parts_len] != at_parts: continue - print(name, name_parts, at_parts_len, match) if fnmatch(name_parts[-1], match): yield name diff --git a/archive_path/tar_path.py b/archive_path/tar_path.py index e15239e..3d93b0d 100644 --- a/archive_path/tar_path.py +++ b/archive_path/tar_path.py @@ -372,6 +372,50 @@ def puttree( ) self._tarfile.add(str(subpath), tarpath, recursive=False) + def extract_tree( + self, + outpath: Union[str, Path], + *, + pattern: str = "**/*", + allow_dev: bool = False, + allow_symlink: bool = False, + callback: Optional[Callable[[str, Any], None]] = None, + cb_descript: str = "Extracting objects", + ): + """Extract the archive path (and recursive children) to an external path. + + :param outpath: The path to output to + :param pattern: the glob pattern for selecting children to extract + :param allow_dev: output block devices + :param allow_symlink: output symlinks + + :param callback: a callback to report on the process, ``callback(action, value)``, + with the following callback signatures: + + - ``callback('init', {'total': , 'description': })``, + to signal the start of a process, its total iterations and description + - ``callback('update', )``, + to signal an update to the process and the number of iterations to progress + + :param cb_descript: the description to return in the callback + + """ + if callback is None: + callback = lambda action, value: None # noqa: E731 + else: + callback("init", {"total": 1, "description": "Counting objects to extract"}) + count = sum(1 for _ in self.glob(pattern, include_virtual=False)) + callback("init", {"total": count, "description": cb_descript}) + + for path in self.glob(pattern, include_virtual=False): + callback("update", 1) + info = self._tarfile.getmember(path.at) + if (not allow_dev) and info.isdev(): + continue + if (not allow_symlink) and (info.islnk() or info.issym()): + continue + self._tarfile.extract(path=outpath, member=info) + def read_file_in_tar( filepath: str, path: str, encoding: Optional[str] = "utf8", mode="r:*" diff --git a/archive_path/zip_path.py b/archive_path/zip_path.py index ab2def1..a155909 100644 --- a/archive_path/zip_path.py +++ b/archive_path/zip_path.py @@ -387,6 +387,47 @@ def puttree( ) self._zipfile.write(subpath, zippath) + def extract_tree( + self, + outpath: Union[str, Path], + *, + pattern: str = "**/*", + callback: Optional[Callable[[str, Any], None]] = None, + cb_descript: str = "Extracting objects", + ): + """Extract the archive path (and recursive children) to an external path. + + :param outpath: The path to output to + :param pattern: the glob pattern for selecting children to extract + + :param callback: a callback to report on the process, ``callback(action, value)``, + with the following callback signatures: + + - ``callback('init', {'total': , 'description': })``, + to signal the start of a process, its total iterations and description + - ``callback('update', )``, + to signal an update to the process and the number of iterations to progress + + :param cb_descript: the description to return in the callback + + """ + outpath = cast(str, os.path.abspath(outpath)) + + if callback is None: + callback = lambda action, value: None # noqa: E731 + else: + callback("init", {"total": 1, "description": "Counting objects to extract"}) + count = sum(1 for _ in self.glob(pattern, include_virtual=False)) + callback("init", {"total": count, "description": cb_descript}) + + for path in self.glob(pattern, include_virtual=False): + callback("update", 1) + try: + info = self._zipfile.getinfo(path.at) + except KeyError: + info = self._zipfile.getinfo(path.at + "/") + self._zipfile.extract(path=outpath, member=info) + class FileList(Sequence): """A list of ``zipfile.ZipInfo`` which mirrors the ``zipfile.ZipFile.NameToInfo`` mapping. diff --git a/tests/test_basic.py b/tests/test_basic.py index b09b291..aa16955 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -6,6 +6,8 @@ # For further information on the license, see the LICENSE file # ########################################################################### """Test compression utilities""" +from typing import Type, Union + import pytest from archive_path import TarPath, ZipPath, read_file_in_tar, read_file_in_zip @@ -19,7 +21,14 @@ ], ids=("zip", "tar.gz"), ) -def test_path(tmp_path, klass, filename, write_mode, read_mode, read_func): +def test_path( + tmp_path, + klass: Union[Type[TarPath], Type[ZipPath]], + filename, + write_mode, + read_mode, + read_func, +): """Test basic functionality and equivalence of ``ZipPath`` and ``TarPath``.""" # test write @@ -133,3 +142,42 @@ def test_path(tmp_path, klass, filename, write_mode, read_mode, read_func): assert (klass(tmp_path / filename) / "a") == klass(tmp_path / filename).joinpath( "a" ) + + # test extract_tree + file_read.extract_tree(tmp_path / "extract_tree_all") + assert { + p.as_posix().replace(tmp_path.as_posix(), "") + for p in (tmp_path / "extract_tree_all").glob("**/*") + } == { + "/extract_tree_all/new_file.txt", + "/extract_tree_all/other_folder", + "/extract_tree_all/other_folder/sub_file.txt", + "/extract_tree_all/folder", + "/extract_tree_all/folder/other_file.txt", + "/extract_tree_all/other_folder/nested1", + "/extract_tree_all/bytes.exe", + "/extract_tree_all/other_folder/nested1/nested2", + "/extract_tree_all/bytes2.exe", + "/extract_tree_all/other_folder/sub_folder", + } + + file_read.joinpath("folder").extract_tree(tmp_path / "extract_tree_folder") + assert { + p.as_posix().replace(tmp_path.as_posix(), "") + for p in (tmp_path / "extract_tree_folder").glob("**/*") + } == { + "/extract_tree_folder/folder", + "/extract_tree_folder/folder/other_file.txt", + } + + file_read.extract_tree(tmp_path / "extract_tree_txt", pattern="**/*.txt") + assert { + p.as_posix().replace(tmp_path.as_posix(), "") + for p in (tmp_path / "extract_tree_txt").glob("**/*") + } == { + "/extract_tree_txt/new_file.txt", + "/extract_tree_txt/other_folder", + "/extract_tree_txt/other_folder/sub_file.txt", + "/extract_tree_txt/folder", + "/extract_tree_txt/folder/other_file.txt", + }