Skip to content

Commit

Permalink
Update the hashing semantics of the pyxis-enroot modifier
Browse files Browse the repository at this point in the history
This commit updates the pyxis-enroot modifier to cache squashfs hashes,
to avoid hashing large files multiple times.

Also, a `no_provenance` mode is added, to avoid hashing all together.
  • Loading branch information
douglasjacobsen committed Oct 30, 2024
1 parent eacd650 commit a847bc0
Showing 1 changed file with 33 additions and 16 deletions.
49 changes: 33 additions & 16 deletions var/ramble/repos/builtin/modifiers/pyxis-enroot/modifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,19 @@ class PyxisEnroot(BasicModifier):

container_extension = "sqsh"

container_hash_file_extension = "sha256"

name = "pyxis-enroot"

tags("container", "slurm")

maintainers("douglasjacobsen")

mode("standard", description="Standard execution mode for pyxis-enroot")
mode(
"no_provenance",
description="Standard execution mode without provenance tracking",
)
mode("disabled", description="Disabled execution mode for pyxis-enroot")
default_mode("standard")

Expand All @@ -59,43 +65,43 @@ class PyxisEnroot(BasicModifier):
"container_mounts",
default="",
description="Comma delimited list of mount points for the container. Filled in by modifier",
modes=["standard"],
modes=["standard", "no_provenance"],
)

modifier_variable(
"container_env_vars",
default="",
description="Comma delimited list of environments to import into container. Filled in by modifier",
modes=["standard"],
modes=["standard", "no_provenance"],
)

modifier_variable(
"container_dir",
default="{workload_input_dir}",
description="Directory where the container sqsh will be stored",
modes=["standard"],
modes=["standard", "no_provenance"],
)

modifier_variable(
"container_extract_dir",
default="{workload_input_dir}",
description="Directory where the extracted paths will be stored",
modes=["standard"],
modes=["standard", "no_provenance"],
)

modifier_variable(
"container_path",
default="{container_dir}/{container_name}." + container_extension,
description="Full path to the container sqsh file",
modes=["standard"],
modes=["standard", "no_provenance"],
)

modifier_variable(
"container_extract_paths",
default="[]",
description="List of paths to extract from the sqsh file into the {workload_input_dir}. "
+ "Will have paths of {workload_input_dir}/enroot_extractions/{path_basename}",
modes=["standard"],
modes=["standard", "no_provenance"],
track_used=False,
)

Expand Down Expand Up @@ -192,7 +198,6 @@ def extract_names(itr, name_set=set()):
"container_mounts",
modification=prefix + exp_mount,
method="append",
separator=",",
mode=self._usage_mode,
)

Expand Down Expand Up @@ -258,10 +263,6 @@ def _extract_from_sqsh(self, workspace, app_inst=None):
)
container_path = self.expander.expand_var_name("container_path")

if not os.path.exists(container_extract_dir):
if not workspace.dry_run:
fs.mkdirp(container_extract_dir)

unsquash_args = [
"-f",
"-dest",
Expand Down Expand Up @@ -291,7 +292,10 @@ def artifact_inventory(self, workspace, app_inst=None):
container_uri = self.expander.expand_var_name("container_uri")
inventory = []

if self._usage_mode == "disabled":
if (
self._usage_mode == "disabled"
or self._usage_mode == "no_provenance"
):
return inventory

inventory.append(
Expand All @@ -302,11 +306,24 @@ def artifact_inventory(self, workspace, app_inst=None):
)

if os.path.isfile(container_path):

hash_file_path = (
self.expander.expand_var_name("container_path")
+ self.container_hash_file_extension
)

if os.path.exists(hash_file_path):
with open(hash_file_path, "r") as f:
container_hash = f.read()

else:
container_hash = hash_file(container_path)

with open(hash_file_path, "w+") as f:
f.write(container_hash)

inventory.append(
{
"container_name": container_name,
"digest": hash_file(container_path),
}
{"container_name": container_name, "digest": container_hash}
)

return inventory
Expand Down

0 comments on commit a847bc0

Please sign in to comment.