Skip to content

Commit

Permalink
Merge pull request #207 from jfennick/source_means_var_not_val
Browse files Browse the repository at this point in the history
Source means var not val
  • Loading branch information
jfennick authored Apr 9, 2024
2 parents 17c4b74 + 6784e45 commit e6f59d6
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 77 deletions.
16 changes: 8 additions & 8 deletions src/wic/api/pythonapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,9 @@ def set_input_Step_Workflow(process_self: Any, __name: str, __value: Any) -> Any
# (Very useful for regression testing!)
# NOTE: process_name is either clt name or workflow name
tmp = __value.value if __value.value else f"{__name}{process_self.process_name}"
alias_dict = {'wic_alias': {'key': tmp}}
alias_dict = {'wic_alias': tmp}
local_input._set_value(alias_dict, linked=True)
anchor_dict = {'wic_anchor': {'key': tmp}}
anchor_dict = {'wic_anchor': tmp}
__value._set_value(anchor_dict, linked=True)
except BaseException as exc:
raise exc
Expand All @@ -223,7 +223,7 @@ def set_input_Step_Workflow(process_self: Any, __name: str, __value: Any) -> Any
__value._set_value(f"{tmp}", linked=True)
else:
anchor_dict = __value.value
alias_dict = {'wic_alias': {'key': anchor_dict['wic_anchor']['key']}}
alias_dict = {'wic_alias': anchor_dict['wic_anchor']}
local_input._set_value(alias_dict, linked=True)
except BaseException as exc:
raise exc
Expand All @@ -237,7 +237,7 @@ def set_input_Step_Workflow(process_self: Any, __name: str, __value: Any) -> Any
f"got {__value.__class__.__name__}, "
f"expected {obj.inp_type.__name__}"
)
ii_dict = {'wic_inline_input': {'key': __value}}
ii_dict = {'wic_inline_input': __value}
process_self.inputs[index]._set_value(ii_dict)


Expand Down Expand Up @@ -382,12 +382,12 @@ def _yml(self) -> dict:
if isinstance(inp.value, Path):
# Special case for Path since it does not inherit from YAMLObject
in_dict[inp.name] = str(inp.value)
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_alias', {}).get('key', {}), Path):
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_alias', {}), Path):
# Special case for Path since it does not inherit from YAMLObject
in_dict[inp.name] = {'wic_alias': {'key': str(inp.value['wic_alias']['key'])}}
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_inline_input', {}).get('key', {}), Path):
in_dict[inp.name] = {'wic_alias': str(inp.value['wic_alias'])}
elif isinstance(inp.value, dict) and isinstance(inp.value.get('wic_inline_input', {}), Path):
# Special case for Path since it does not inherit from YAMLObject
in_dict[inp.name] = {'wic_inline_input': {'key': str(inp.value['wic_inline_input']['key'])}}
in_dict[inp.name] = {'wic_inline_input': str(inp.value['wic_inline_input'])}
elif isinstance(inp.value, str):
in_dict[inp.name] = inp.value # Obviously strings are serializable
elif isinstance(inp.value, yaml.YAMLObject):
Expand Down
4 changes: 2 additions & 2 deletions src/wic/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,12 @@ def python_script_generate_cwl(yaml_tree_tuple: YamlTree,
yml_args = copy.deepcopy(steps[i][step_key]['in'])
python_script_path = yml_args.get('script', '')
if isinstance(python_script_path, dict) and 'wic_inline_input' in python_script_path:
python_script_path = python_script_path['wic_inline_input']['key']
python_script_path = python_script_path['wic_inline_input']
# NOTE: The existence of the script: tag should now be guaranteed in the schema
del yml_args['script']
python_script_docker_pull = yml_args.get('dockerPull', '') # Optional
if isinstance(python_script_docker_pull, dict) and 'wic_inline_input' in python_script_docker_pull:
python_script_docker_pull = python_script_docker_pull['wic_inline_input']['key']
python_script_docker_pull = python_script_docker_pull['wic_inline_input']
if 'dockerPull' in yml_args:
del yml_args['dockerPull']
del steps[i][step_key]['in']['dockerPull']
Expand Down
42 changes: 20 additions & 22 deletions src/wic/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
out_key = keys[0]
out_val = out_val[out_key]
if isinstance(out_val, Dict) and 'wic_anchor' in out_val:
out_val = out_val['wic_anchor']
edgedef = out_val['key']
edgedef = out_val['wic_anchor']

# NOTE: There can only be one definition, but multiple call sites.
if not explicit_edge_defs_copy.get(edgedef):
Expand All @@ -469,13 +468,12 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
# Extract input value into separate yml file
# Replace it here with a new variable name
arg_val = steps[i][step_key]['in'][arg_key]

# Convert native YAML to a JSON-encoded string for specific tags.
tags = ['config']
if arg_key in tags and isinstance(arg_val, Dict) and ('wic_inline_input' in arg_val):
# Do NOT wrap config: in {'source': ...}
arg_val = {'wic_inline_input': {'key': json.dumps(arg_val['wic_inline_input']['key'])}}
elif isinstance(arg_val, str):
arg_val = {'source': arg_val}
arg_val = {'wic_inline_input': json.dumps(arg_val['wic_inline_input'])}

# Use triple underscore for namespacing so we can split later
in_name = f'{step_name_i}___{arg_key}' # {step_name_i}_input___{arg_key}

Expand All @@ -495,21 +493,21 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
# NOTE: Exclude cwl_watcher from explicit edge dereferences.
# Since cwl_watcher requires explicit filenames for globbing,
# we do not want to replace them with internal CWL dependencies!
if not explicit_edge_defs_copy.get(arg_val['key']):
if not explicit_edge_defs_copy.get(arg_val):
if is_root and not testing:
# Even if is_root, we don't want to raise an Exception
# here because in test_cwl_embedding_independence, we
# recompile all subworkflows as if they were root. That
# will cause this code path to be taken but it is not
# actually an error. Add a CWL input for testing only.
raise Exception(f"Error! No definition found for &{arg_val['key']}!")
raise Exception(f"Error! No definition found for &{arg_val}!")
inputs_workflow.update({in_name: in_dict})
steps[i][step_key]['in'][arg_key] = {'source': in_name}
# Add a 'dummy' value to explicit_edge_calls anyway, because
# that determines sub_args_provided when the recursion returns.
explicit_edge_calls_copy.update({in_name: (namespaces + [step_name_i], arg_key)})
else:
(nss_def_init, var) = explicit_edge_defs_copy[arg_val['key']]
(nss_def_init, var) = explicit_edge_defs_copy[arg_val]

nss_def_embedded = var.split('___')[:-1]
nss_call_embedded = arg_key.split('___')[:-1]
Expand Down Expand Up @@ -546,7 +544,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
elif len(nss_call_tails) > 1:
inputs_workflow.update({in_name: in_dict})
# Store explicit edge call site info up through the recursion.
d = {in_name: explicit_edge_defs_copy[arg_val['key']]}
d = {in_name: explicit_edge_defs_copy[arg_val]}
# d = {in_name, (namespaces + [step_name_i], var)} # ???
explicit_edge_calls_copy.update(d)
steps[i][step_key]['in'][arg_key] = {'source': in_name}
Expand Down Expand Up @@ -601,7 +599,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,

utils_graphs.add_graph_edge(args, graph_init, nss_def, nss_call, label, color='blue')
elif isinstance(arg_val, Dict) and 'wic_inline_input' in arg_val:
arg_val = arg_val['wic_inline_input']['key']
arg_val = arg_val['wic_inline_input']

if arg_key in steps[i][step_key].get('scatter', []):
# Promote scattered input types to arrays
Expand All @@ -624,6 +622,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
graphdata.nodes.append((input_node_name, attrs))
graphdata.edges.append((input_node_name, step_node_name, {}))
else:
arg_var: str = arg_val
# Leave un-evaluated, i.e. allow the user to inject raw CWL.
# The un-evaluated string should refer to either an inputs: variable
# or an internal CWL dependency, i.e. an output from a previous step.
Expand All @@ -638,20 +637,20 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
# (yet) be inlined. Somehow, if they are not marked with
# inlineable: False, test_inline_subworkflows can still pass.
# This Exception will (correctly) cause such inlineing tests to fail.
if arg_val['source'] not in yaml_tree.get('inputs', {}):
if arg_var not in yaml_tree.get('inputs', {}):
if not args.allow_raw_cwl:
print(f"Warning! Did you forget to use !ii before {arg_val['source']} in {yaml_stem}.yml?")
print(f"Warning! Did you forget to use !ii before {arg_var} in {yaml_stem}.yml?")
print('If you want to compile the workflow anyway, use --allow_raw_cwl')
sys.exit(1)

inputs = yaml_tree.get('inputs', {})
unbound_lit_var = 'Error! Unbound literal variable'
if inputs == {}:
raise Exception(f"{unbound_lit_var}{arg_val['source']} not in inputs: tag in {yaml_stem}.yml")
raise Exception(f"{unbound_lit_var}{arg_var} not in inputs: tag in {yaml_stem}.yml")
inputs_dump = yaml.dump({'inputs': inputs})
raise Exception(f"{unbound_lit_var}{arg_val['source']} not in\n{inputs_dump}\nin {yaml_stem}.yml")
raise Exception(f"{unbound_lit_var}{arg_var} not in\n{inputs_dump}\nin {yaml_stem}.yml")

inputs_key_dict = yaml_tree['inputs'][arg_val['source']]
inputs_key_dict = yaml_tree['inputs'][arg_var]
if 'doc' in inputs_key_dict:
inputs_key_dict['doc'] += '\\n' + in_dict.get('doc', '')
else:
Expand All @@ -661,12 +660,12 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
else:
inputs_key_dict['label'] = in_dict.get('label', '')

if arg_val['source'] in input_mapping_copy:
input_mapping_copy[arg_val['source']].append(in_name)
if arg_var in input_mapping_copy:
input_mapping_copy[arg_var].append(in_name)
else:
input_mapping_copy[arg_val['source']] = [in_name]
input_mapping_copy[arg_var] = [in_name]
# TODO: We can use un-evaluated variable names for input mapping; no notation for output mapping!
steps[i][step_key]['in'][arg_key] = arg_val # Leave un-evaluated
steps[i][step_key]['in'][arg_key] = {'source': arg_var} # Leave un-evaluated

for arg_key in args_required:
# print('arg_key', arg_key)
Expand Down Expand Up @@ -854,8 +853,7 @@ def compile_workflow_once(yaml_tree_ast: YamlTree,
else:
# We cannot store string values as a dict, so use type: ignore
arg_val = in_dict['value']
new_val = arg_val['source'] if isinstance(arg_val, Dict) and 'source' in arg_val else arg_val
new_keyval = {key: new_val}
new_keyval = {key: arg_val}
# else:
# raise Exception(f"Error! Unknown type: {in_dict['type']}")
yaml_inputs.update(new_keyval)
Expand Down
2 changes: 0 additions & 2 deletions src/wic/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,6 @@ def perform_edge_inference(args: argparse.Namespace,
else:
vars_workflow_output_internal.append(f'{step_name_j}/{out_key}')

# arg_val = {'source': f'{step_name_j}/{out_key}'}
arg_val = f'{step_name_j}/{out_key}'
arg_keyval = {arg_key: arg_val}
steps_i = utils_cwl.add_yamldict_keyval_in(steps[i], step_key, arg_keyval)
Expand Down Expand Up @@ -332,7 +331,6 @@ def perform_edge_inference(args: argparse.Namespace,
# which should match in the parent workflow.
inputs_workflow.update({in_name: in_dict})

# arg_keyval = {arg_key: {'source': in_name}}
arg_keyval = {arg_key: in_name}
steps_i = utils_cwl.add_yamldict_keyval_in(steps[i], step_key, arg_keyval)
return steps_i
Expand Down
22 changes: 4 additions & 18 deletions src/wic/input_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,20 +68,6 @@ def write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool) -> N
cwl_tree = node_data.compiled_cwl
yaml_inputs = node_data.workflow_inputs_file

# NOTE: As part of the scatter feature we introduced the use of 'source',
# but in some cases (biobb 'config' tag) it is not being removed correctly
# in the compiler, so as a last resort remove it here.
yaml_inputs_no_source = {}
for key, val in yaml_inputs.items():
try:
if isinstance(val, str):
val_dict = json.loads(val)
if 'source' in val_dict:
val = val_dict['source']
except Exception as e:
pass
yaml_inputs_no_source[key] = val

path.mkdir(parents=True, exist_ok=True)
if relative_run_path:
filename_cwl = f'{yaml_stem}.cwl'
Expand All @@ -98,7 +84,7 @@ def write_to_disk(rose_tree: RoseTree, path: Path, relative_run_path: bool) -> N
w.write(auto_gen_header)
w.write(''.join(yaml_content))

yaml_content = yaml.dump(yaml_inputs_no_source, sort_keys=False, line_break='\n', indent=2, Dumper=NoAliasDumper)
yaml_content = yaml.dump(yaml_inputs, sort_keys=False, line_break='\n', indent=2, Dumper=NoAliasDumper)
with open(path / filename_yml, mode='w', encoding='utf-8') as inp:
inp.write(auto_gen_header)
inp.write(yaml_content)
Expand Down Expand Up @@ -220,10 +206,10 @@ def write_absolute_yaml_tags(args: argparse.Namespace, in_dict_in: Yaml, namespa
# we don't want users' home directories in the yml files.
cachedir_path = Path(args.cachedir).absolute()
# print('setting cachedir_path to', cachedir_path)
in_dict_in['root_workflow_yml_path'] = {'wic_inline_input': {'key': str(Path(args.yaml).parent.absolute())}}
in_dict_in['root_workflow_yml_path'] = {'wic_inline_input': str(Path(args.yaml).parent.absolute())}

in_dict_in['cachedir_path'] = {'wic_inline_input': {'key': str(cachedir_path)}}
in_dict_in['homedir'] = {'wic_inline_input': {'key': args.homedir}}
in_dict_in['cachedir_path'] = {'wic_inline_input': str(cachedir_path)}
in_dict_in['homedir'] = {'wic_inline_input': args.homedir}

# Add a 'dummy' values to explicit_edge_calls, because
# that determines sub_args_provided when the recursion returns.
Expand Down
23 changes: 5 additions & 18 deletions src/wic/schemas/wic_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,17 +173,11 @@ def cwl_schema(name: str, cwl: Json, id_prefix: str) -> Json:
anytype: Dict[Any, Any] = {}

# See utils_yaml.py
aliasprops = default_schema()
aliasprops['properties'] = {'key': str_nonempty}
aliasprops['required'] = ['key']
alias = default_schema()
alias['properties'] = {'wic_alias': aliasprops} # !*
alias['properties'] = {'wic_alias': str_nonempty} # !*

iiprops = default_schema()
iiprops['properties'] = {'key': anytype}
iiprops['required'] = ['key']
ii = default_schema()
ii['properties'] = {'wic_inline_input': iiprops} # !ii
ii['properties'] = {'wic_inline_input': anytype} # !ii

# required = []
for key, val in cwl['inputs'].items():
Expand Down Expand Up @@ -211,11 +205,8 @@ def cwl_schema(name: str, cwl: Json, id_prefix: str) -> Json:
if key == 'config' and name == 'config_tag_mdp':
grompp = config_schemas.get('grompp', {})

iiprops_mdp = default_schema()
iiprops_mdp['properties'] = {'key': grompp}
iiprops_mdp['required'] = ['key']
ii_mdp = default_schema()
ii_mdp['properties'] = {'wic_inline_input': iiprops_mdp} # !ii
ii_mdp['properties'] = {'wic_inline_input': grompp} # !ii

inputs_props[key] = ii_mdp
continue
Expand Down Expand Up @@ -272,10 +263,8 @@ def cwl_schema(name: str, cwl: Json, id_prefix: str) -> Json:
outputs['properties'] = outputs_props

# See utils_yaml.py
anchorprops = default_schema()
anchorprops['properties'] = {'key': str_nonempty}
anchor = default_schema()
anchor['properties'] = {'wic_anchor': anchorprops} # !&
anchor['properties'] = {'wic_anchor': str_nonempty} # !&

keys_anchors: Json = {}
for key in cwl['outputs'].keys():
Expand Down Expand Up @@ -461,10 +450,8 @@ def wic_main_schema(tools_cwl: Tools, yml_stems: List[str], schema_store: Dict[s
in_schema['properties'] = {'script': str_nonempty}

# See utils_yaml.py
anchorprops = default_schema()
anchorprops['properties'] = {'key': str_nonempty}
anchor = default_schema()
anchor['properties'] = {'wic_anchor': anchorprops} # !&
anchor['properties'] = {'wic_anchor': str_nonempty} # !&

# NOTE: We do not know the specific keys statically, so we have to use str_nonempty
out_schema: Json = {'type': 'array', 'items': {'anyOf': [str_nonempty, anchor]}}
Expand Down
14 changes: 7 additions & 7 deletions src/wic/utils_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
# because then these constructors will fire again.


def anchor_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Dict[str, Any]]:
key = loader.construct_scalar(node)
def anchor_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Any]:
val = loader.construct_scalar(node)
name = 'wic_anchor' # NOT '!&'
return {name: {'key': key}}
return {name: val}


def alias_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Dict[str, Any]]:
key = loader.construct_scalar(node)
def alias_constructor(loader: yaml.SafeLoader, node: yaml.nodes.ScalarNode) -> Dict[str, Any]:
val = loader.construct_scalar(node)
name = 'wic_alias' # NOT '!*'
return {name: {'key': key}}
return {name: val}


def inlineinput_constructor(loader: yaml.SafeLoader, node: yaml.nodes.Node) -> Dict[str, Dict[str, Any]]:
Expand All @@ -37,7 +37,7 @@ def inlineinput_constructor(loader: yaml.SafeLoader, node: yaml.nodes.Node) -> D
else:
raise Exception(f'Unknown yaml node type! {node}')
name = 'wic_inline_input' # NOT '!ii'
return {name: {'key': val}}
return {name: val}


def wic_loader() -> Type[yaml.SafeLoader]:
Expand Down

0 comments on commit e6f59d6

Please sign in to comment.