Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add no-archive format for fast environment clone #320

Merged
merged 5 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion conda_pack/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def build_parser():
"default value is 'el7'. This value cannot have any hyphens.")
parser.add_argument("--format",
choices=['infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2',
'tbz2', 'tar.xz', 'txz', 'tar', 'parcel', 'squashfs'],
'tbz2', 'tar.xz', 'txz', 'tar', 'parcel', 'squashfs',
'no-archive'],
default='infer',
help=("The archival format to use. By default this is "
"inferred by the output file extension."))
Expand Down
10 changes: 8 additions & 2 deletions conda_pack/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ def _output_and_format(self, output=None, format="infer"):
format = "tar"
elif output.endswith(".squashfs"):
format = "squashfs"
elif output.endswith('.no-archive'):
format = 'no-archive'
else:
raise CondaPackException("Unknown file extension %r" % output)
elif format not in {
Expand All @@ -254,6 +256,7 @@ def _output_and_format(self, output=None, format="infer"):
"tar",
"parcel",
"squashfs",
"no-archive",
}:
raise CondaPackException("Unknown format %r" % format)
elif output is not None and output.endswith(".parcel"):
Expand Down Expand Up @@ -323,7 +326,8 @@ def pack(
to the basename of the ``dest_prefix`` value, if supplied; otherwise to
the basename of the environment. The suffix will be determined by the
output format (e.g. ``my_env.tar.gz``).
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel', 'squashfs'}
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel', 'squashfs',
'no-archive'}
The archival format to use. By default this is inferred from the
output file extension, and defaults to ``tar.gz`` if this is not supplied.
arcroot : str, optional
Expand Down Expand Up @@ -417,6 +421,7 @@ def pack(
zip_64=zip_64,
n_threads=n_threads,
verbose=verbose,
output=output,
) as arc:
packer = Packer(self.prefix, arc, dest_prefix, parcel)

Expand Down Expand Up @@ -501,7 +506,8 @@ def pack(
output : str, optional
The path of the output file. Defaults to the environment name with a
suffix determined by the format; e.g. ``my_env.tar.gz``.
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel'}, optional
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel',
'no-archive'}, optional
The archival format to use. By default, this is inferred from the output
file extension, and defaults to ``tar.gz`` if ``output`` is not supplied.
arcroot : str, optional
Expand Down
48 changes: 47 additions & 1 deletion conda_pack/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _parse_n_threads(n_threads=1):


def archive(fileobj, path, arcroot, format, compress_level=4, zip_symlinks=False,
zip_64=True, n_threads=1, verbose=False):
zip_64=True, n_threads=1, verbose=False, output=None):

n_threads = _parse_n_threads(n_threads)

Expand Down Expand Up @@ -68,6 +68,8 @@ def archive(fileobj, path, arcroot, format, compress_level=4, zip_symlinks=False
elif format == "squashfs":
return SquashFSArchive(fileobj, path, arcroot, n_threads, verbose=verbose,
compress_level=compress_level)
elif format == "no-archive":
return NoArchive(output, arcroot)
else: # format == 'tar'
mode = 'w'
close_file = False
Expand Down Expand Up @@ -470,3 +472,47 @@ def _add_bytes(self, source, sourcebytes, target):
with open(target_abspath, "wb") as f:
shutil.copystat(source, target_abspath)
f.write(sourcebytes)


# Copies files to the output directory
class NoArchive(ArchiveBase):
def __init__(self, output, arcroot):
self.output = output
self.arcroot = arcroot
self.copy_func = None

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
return self

def _absolute_path(self, path):
return os.path.normpath(os.path.join(self.output, path))

def _ensure_parent(self, path):
dir_path = os.path.dirname(path)
os.makedirs(dir_path, exist_ok=True)

def _add(self, source, target):
target_abspath = self._absolute_path(target)
self._ensure_parent(target_abspath)

# hardlink instead of copy is faster, but it doesn't work across devices
if self.copy_func is None:
if os.lstat(source).st_dev == os.lstat(os.path.dirname(target_abspath)).st_dev:
self.copy_func = partial(os.link, follow_symlinks=False)
else:
self.copy_func = partial(shutil.copy2, follow_symlinks=False)

if os.path.isfile(source) or os.path.islink(source):
self.copy_func(source, target_abspath)
else:
os.mkdir(target_abspath)

def _add_bytes(self, source, sourcebytes, target):
target_abspath = self._absolute_path(target)
self._ensure_parent(target_abspath)
with open(target_abspath, "wb") as f:
shutil.copystat(source, target_abspath)
f.write(sourcebytes)
9 changes: 5 additions & 4 deletions conda_pack/tests/test_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def has_tar_cli():
@pytest.mark.parametrize('format, zip_symlinks', [
('zip', True), ('zip', False),
('tar.gz', False), ('tar.bz2', False), ('tar.xz', False), ('tar', False),
('squashfs', False)
('squashfs', False), ('no-archive', False),
])
def test_format(tmpdir, format, zip_symlinks, root_and_paths):
if format == 'zip':
Expand All @@ -164,7 +164,8 @@ def test_format(tmpdir, format, zip_symlinks, root_and_paths):
os.mkdir(spill_dir)

with open(packed_env_path, mode='wb') as fil:
with archive(fil, packed_env_path, '', format, zip_symlinks=zip_symlinks) as arc:
with archive(fil, packed_env_path, '', format, zip_symlinks=zip_symlinks,
output=spill_dir) as arc:
for rel in paths:
arc.add(join(root, rel), rel)
arc.add_bytes(join(root, "file"),
Expand Down Expand Up @@ -194,7 +195,7 @@ def test_format(tmpdir, format, zip_symlinks, root_and_paths):
else:
cmd = ["squashfuse", packed_env_path, spill_dir]
subprocess.check_output(cmd)
else:
elif format != "no-archive":
with tarfile.open(packed_env_path) as out:
out.extractall(spill_dir)

Expand Down Expand Up @@ -236,7 +237,7 @@ def test_format_parallel(tmpdir, format, root_and_paths):

baseline = threading.active_count()
with open(out_path, mode='wb') as fil:
with archive(fil, out_path, '', format, n_threads=2) as arc:
with archive(fil, out_path, '', format, n_threads=2, output=out_dir) as arc:
for rel in paths:
arc.add(join(root, rel), rel)
timeout = 5
Expand Down
Loading