Skip to content

Commit

Permalink
Add no-archive format for fast environment clone
Browse files Browse the repository at this point in the history
Existing formats do not provide a straightforward way to clone the environment without making an intermediate archive.
Even with disabled compression, they impact execution time and use extra disk space.
The no-archive is a new format that addresses both aspects.
It is 4x faster for local environment cloning on basic scenarios (i.e. github.com/idamlaj/dist-demo) and uses no extra disk space.
  • Loading branch information
Andriy Yurchuk committed Mar 29, 2024
1 parent 71130b0 commit 9e24590
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 4 deletions.
3 changes: 2 additions & 1 deletion conda_pack/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ def build_parser():
"default value is 'el7'. This value cannot have any hyphens.")
parser.add_argument("--format",
choices=['infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2',
'tbz2', 'tar.xz', 'txz', 'tar', 'parcel', 'squashfs'],
'tbz2', 'tar.xz', 'txz', 'tar', 'parcel', 'squashfs',
'no-archive'],
default='infer',
help=("The archival format to use. By default this is "
"inferred by the output file extension."))
Expand Down
10 changes: 8 additions & 2 deletions conda_pack/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,8 @@ def _output_and_format(self, output=None, format="infer"):
format = "tar"
elif output.endswith(".squashfs"):
format = "squashfs"
elif output.endswith('.no-archive'):
format = 'no-archive'
else:
raise CondaPackException("Unknown file extension %r" % output)
elif format not in {
Expand All @@ -254,6 +256,7 @@ def _output_and_format(self, output=None, format="infer"):
"tar",
"parcel",
"squashfs",
"no-archive",
}:
raise CondaPackException("Unknown format %r" % format)
elif output is not None and output.endswith(".parcel"):
Expand Down Expand Up @@ -323,7 +326,8 @@ def pack(
to the basename of the ``dest_prefix`` value, if supplied; otherwise to
the basename of the environment. The suffix will be determined by the
output format (e.g. ``my_env.tar.gz``).
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel', 'squashfs'}
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel', 'squashfs',
'no-archive'}
The archival format to use. By default this is inferred from the
output file extension, and defaults to ``tar.gz`` if this is not supplied.
arcroot : str, optional
Expand Down Expand Up @@ -408,6 +412,7 @@ def pack(
try:
with os.fdopen(fd, "wb") as temp_file:
with archive(
output,
temp_file,
temp_path,
arcroot,
Expand Down Expand Up @@ -501,7 +506,8 @@ def pack(
output : str, optional
The path of the output file. Defaults to the environment name with a
suffix determined by the format; e.g. ``my_env.tar.gz``.
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel'}, optional
format : {'infer', 'zip', 'tar.gz', 'tgz', 'tar.bz2', 'tbz2', 'tar', 'parcel',
'no-archive'}, optional
The archival format to use. By default, this is inferred from the output
file extension, and defaults to ``tar.gz`` if ``output`` is not supplied.
arcroot : str, optional
Expand Down
48 changes: 47 additions & 1 deletion conda_pack/formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def _parse_n_threads(n_threads=1):
return n_threads


def archive(fileobj, path, arcroot, format, compress_level=4, zip_symlinks=False,
def archive(output, fileobj, path, arcroot, format, compress_level=4, zip_symlinks=False,
zip_64=True, n_threads=1, verbose=False):

n_threads = _parse_n_threads(n_threads)
Expand Down Expand Up @@ -68,6 +68,8 @@ def archive(fileobj, path, arcroot, format, compress_level=4, zip_symlinks=False
elif format == "squashfs":
return SquashFSArchive(fileobj, path, arcroot, n_threads, verbose=verbose,
compress_level=compress_level)
elif format == "no-archive":
return NoArchive(output, arcroot)
else: # format == 'tar'
mode = 'w'
close_file = False
Expand Down Expand Up @@ -470,3 +472,47 @@ def _add_bytes(self, source, sourcebytes, target):
with open(target_abspath, "wb") as f:
shutil.copystat(source, target_abspath)
f.write(sourcebytes)


# Copies files to the output directory
class NoArchive(ArchiveBase):
def __init__(self, output, arcroot):
self.output = output
self.arcroot = arcroot
self.copy_func = None

def __enter__(self):
return self

def __exit__(self, exc_type, exc_value, traceback):
return self

def _absolute_path(self, path):
return os.path.normpath(os.path.join(self.output, path))

def _ensure_parent(self, path):
dir_path = os.path.dirname(path)
os.makedirs(dir_path, exist_ok=True)

def _add(self, source, target):
target_abspath = self._absolute_path(target)
self._ensure_parent(target_abspath)

# hardlink instead of copy is faster, but it doesn't work across devices
if self.copy_func is None:
if os.lstat(source).st_dev == os.lstat(os.path.dirname(target_abspath)).st_dev:
self.copy_func = partial(os.link, follow_symlinks=False)
else:
self.copy_func = partial(shutil.copy2, follow_symlinks=False)

if os.path.isfile(source) or os.path.islink(source):
self.copy_func(source, target_abspath)
else:
os.mkdir(target_abspath)

def _add_bytes(self, source, sourcebytes, target):
target_abspath = self._absolute_path(target)
self._ensure_parent(target_abspath)
with open(target_abspath, "wb") as f:
shutil.copystat(source, target_abspath)
f.write(sourcebytes)

0 comments on commit 9e24590

Please sign in to comment.