From ca7dd3a6a830ae6ca68ee84094a75d340577757b Mon Sep 17 00:00:00 2001 From: Sameeul Bashir Samee Date: Fri, 8 Nov 2024 11:23:11 -0500 Subject: [PATCH] Separate dir creation from compilation (#293) * Separate dir creation from compilation * Fix lints and update test * Minor updates --- src/sophios/api/pythonapi.py | 2 +- src/sophios/compiler.py | 5 ---- src/sophios/main.py | 2 +- src/sophios/post_compile.py | 46 +++++++++++++++++++++++++++++++++++- tests/test_examples.py | 3 ++- 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/src/sophios/api/pythonapi.py b/src/sophios/api/pythonapi.py index 2d0f8665..3a96baea 100644 --- a/src/sophios/api/pythonapi.py +++ b/src/sophios/api/pythonapi.py @@ -758,7 +758,7 @@ def run(self) -> None: post_compile.cwl_docker_extract(args, self.process_name) rose_tree = post_compile.remove_entrypoints(args, rose_tree) - + post_compile.find_and_create_output_dirs(rose_tree) # Do NOT capture stdout and/or stderr and pipe warnings and errors into a black hole. retval = run_local_module.run_local(args, rose_tree, args.cachedir, args.cwl_runner, True) diff --git a/src/sophios/compiler.py b/src/sophios/compiler.py index e81e843b..62e1aaba 100644 --- a/src/sophios/compiler.py +++ b/src/sophios/compiler.py @@ -880,11 +880,6 @@ def compile_workflow_once(yaml_tree_ast: YamlTree, newval['format'] = in_format new_keyval = {key: newval} elif 'Directory' == in_dict['type']: - if not args.ignore_dir_path: - ldir = Path(in_dict['value']) - if not ldir.is_absolute(): - ldir = Path('autogenerated') / ldir - ldir.mkdir(parents=True, exist_ok=True) newval = {'class': 'Directory', 'location': in_dict['value']} new_keyval = {key: newval} # TODO: Check for all valid types? diff --git a/src/sophios/main.py b/src/sophios/main.py index 14ec0707..85622a38 100644 --- a/src/sophios/main.py +++ b/src/sophios/main.py @@ -196,7 +196,7 @@ def main() -> None: pc.cwl_docker_extract(args, yaml_stem) rose_tree = pc.remove_entrypoints(args, rose_tree) io.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) - + pc.find_and_create_output_dirs(rose_tree) run_local.run_local(args, rose_tree, args.cachedir, args.cwl_runner, False) # Finally, since there is an output file copying bug in cwltool, diff --git a/src/sophios/post_compile.py b/src/sophios/post_compile.py index e5bb126d..406b9570 100644 --- a/src/sophios/post_compile.py +++ b/src/sophios/post_compile.py @@ -1,11 +1,55 @@ import argparse from pathlib import Path import subprocess as sub - +from typing import Dict, Union from . import plugins from .wic_types import RoseTree +def find_output_dirs(data: Union[RoseTree, Dict, list]) -> list: + """ + Recursively searches through a nested structure and finds all dictionaries + that contain the key 'location', and a key 'class' with a value of 'Directory'. + + Args: + data (any): The data to search through, which can be a dictionary, list, + or any other structure. + + Returns: + list: A list of location values. + """ + results = [] + if isinstance(data, Dict): + if "class" in data and data["class"] == "Directory" and "location" in data: + results.append(data["location"]) + for value in data.values(): + results.extend(find_output_dirs(value)) + elif isinstance(data, list): + for item in data: + results.extend(find_output_dirs(item)) + + return results + + +def create_output_dirs(output_dirs: list, basepath: str = 'autogenerated') -> None: + """ + Creates all the directories that are needed for the outputs of a workflow. + """ + for output_dir in output_dirs: + dir_path = Path(output_dir) + if not dir_path.is_absolute(): + dir_path = Path(basepath) / dir_path + dir_path.mkdir(parents=True, exist_ok=True) + + +def find_and_create_output_dirs(rose_tree: RoseTree, basepath: str = 'autogenerated') -> None: + """ + Finds all output directories in the workflow and creates them. + """ + output_dirs = find_output_dirs(rose_tree.data.workflow_inputs_file) + create_output_dirs(output_dirs, basepath) + + def cwl_docker_extract(args: argparse.Namespace, file_name: str) -> None: """Helper function to do the cwl_docker_extract""" # cwl-docker-extract recursively `docker pull`s all images in all subworkflows. diff --git a/tests/test_examples.py b/tests/test_examples.py index 7603a2ca..91e5077d 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -22,7 +22,7 @@ from sophios import auto_gen_header from sophios.cli import get_args from sophios.utils_yaml import wic_loader -from sophios.post_compile import cwl_docker_extract, remove_entrypoints +from sophios.post_compile import cwl_docker_extract, remove_entrypoints, find_and_create_output_dirs from sophios.wic_types import NodeData, StepId, Yaml, YamlTree, Json from sophios.utils_graphs import get_graph_reps @@ -223,6 +223,7 @@ def run_workflows(yml_path_str: str, yml_path: Path, cwl_runner: str, args: argp rose_tree = sophios.plugins.cwl_update_outputs_optional_rosetree(rose_tree) sophios.input_output.write_to_disk(rose_tree, Path('autogenerated/'), True, args.inputs_file) # NOTE: Do not use --cachedir; we want to actually test everything. + find_and_create_output_dirs(rose_tree) retval = sophios.run_local.run_local(args, rose_tree, None, cwl_runner, True) assert retval == 0