From fd990ce8bbf5794abd1aeb6a4f9a021235f1df00 Mon Sep 17 00:00:00 2001 From: Francesco Di Natale Date: Sat, 29 Sep 2018 21:03:53 -0700 Subject: [PATCH] Addition of user enabled workspace hashing (#145) * Addition of hashing to Study parameterization. * Addition of the hashws option to argparse. * Addition of a warning note for users who use labels in steps. --- maestrowf/datastructures/core/study.py | 17 ++++++++++++----- maestrowf/maestro.py | 6 +++++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/maestrowf/datastructures/core/study.py b/maestrowf/datastructures/core/study.py index e70d6e58a..b57bb1300 100644 --- a/maestrowf/datastructures/core/study.py +++ b/maestrowf/datastructures/core/study.py @@ -29,6 +29,7 @@ """Class related to the construction of study campaigns.""" import copy +from hashlib import md5 import logging import os import pickle @@ -355,7 +356,7 @@ def setup_environment(self): self.environment.acquire_environment() def configure_study(self, submission_attempts=1, restart_limit=1, - throttle=0, use_tmp=False): + throttle=0, use_tmp=False, hash_ws=False): """ Perform initial configuration of a study. @@ -376,6 +377,7 @@ def configure_study(self, submission_attempts=1, restart_limit=1, self._restart_limit = restart_limit self._submission_throttle = throttle self._use_tmp = use_tmp + self._hash_ws = hash_ws logger.info( "\n------------------------------------------\n" @@ -384,9 +386,10 @@ def configure_study(self, submission_attempts=1, restart_limit=1, "Submission restart limit = %d\n" "Submission throttle limit = %d\n" "Use temporary directory = %s\n" + "Hash workspaces = %s\n" "------------------------------------------", self._out_path, submission_attempts, restart_limit, throttle, - use_tmp + use_tmp, hash_ws ) def _stage_parameterized(self, dag): @@ -579,9 +582,13 @@ def _stage_parameterized(self, dag): str(combo)) # Compute this step's combination name and workspace. combo_str = combo.get_param_string(self.used_params[step]) - workspace = \ - make_safe_path(self._out_path, step, combo_str) - logger.debug("Workspace: %s", workspace) + if self._hash_ws: + workspace = make_safe_path( + self._out_path, step, md5(combo_str).hexdigest()) + else: + workspace = \ + make_safe_path(self._out_path, step, combo_str) + logger.debug("Workspace: %s", workspace) combo_str = "{}_{}".format(step, combo_str) self.workspaces[combo_str] = workspace diff --git a/maestrowf/maestro.py b/maestrowf/maestro.py index 4a4d3c13c..318ec9bdc 100644 --- a/maestrowf/maestro.py +++ b/maestrowf/maestro.py @@ -216,7 +216,7 @@ def run_study(args): study.setup_environment() study.configure_study( throttle=args.throttle, submission_attempts=args.attempts, - restart_limit=args.rlimit, use_tmp=args.usetmp) + restart_limit=args.rlimit, use_tmp=args.usetmp, hash_ws=args.hashws) # Stage the study. path, exec_dag = study.stage() @@ -319,6 +319,10 @@ def setup_argparser(): run.add_argument("-fg", action="store_true", default=False, help="Runs the backend conductor in the foreground " "instead of using nohup. [Default: %(default)s]") + run.add_argument("--hashws", action="store_true", default=False, + help="Enable hashing of subdirectories in parameterized " + "studies (NOTE: breaks commands that use parameter labels" + " to search directories). [Default: %(default)s]") prompt_opts = run.add_mutually_exclusive_group() prompt_opts.add_argument(