Skip to content

Commit

Permalink
env variable bugfix (#247)
Browse files Browse the repository at this point in the history
* removed expansion of env variables in provenance specs

* added to CHANGELOG

* corrected unit tests

* added todos

* working on expand_env_var function

* fixed type bug

* removed comments, prints, fixed style

* added to CHANGELOG

* tweaks

* fixed output path

* allowed study name to use env vars

* fixed style

* added to CHANGELOG, added support for 'restart'

* Update CHANGELOG.md

* added docs strings

* added to docs

* updated comment

* fixed CHANGELOG
  • Loading branch information
ben-bay committed Jul 16, 2020
1 parent 353ad62 commit 8ff4ffd
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 24 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Fixed Docker bug pertaining to Ubuntu.

### Fixed
- Removed expansion of env variables in shell sections (`cmd` and `restart`) of provenance
specs. This allows the shell command itself to expand environment variables, and gives
users greater flexibility.
- Allowed environment variables to be properly expanded in study `description.name`.
- Tilde (~) now properly expands as part of a path in non-shell sections.

## [1.6.2]

### Added
Expand Down
53 changes: 52 additions & 1 deletion docs/source/merlin_variables.rst
Original file line number Diff line number Diff line change
Expand Up @@ -93,17 +93,68 @@ Variables defined by a specification file in the ``env`` section, as in this exa
env:
variables:
ID: 42
EXAMPLE_VAR: hello
As long as they're defined in order, you can nest user variables like this:

Like all other Merlin variables, these may be used within specification steps as below:
.. code-block:: yaml
env:
variables:
EXAMPLE_VAR: hello
WORKER_NAME: $(EXAMPLE_VAR)_worker
Like all other Merlin variables, user variables may be used anywhere (as a yaml key or value) within a specification as below:

.. code-block:: yaml
cmd: echo "$(EXAMPLE_VAR), world!"
...
$(WORKER_NAME):
args: ...
If you want to programmatically define the study name, you can include variables
in the ``description.name`` field as long as it makes a valid filename:

.. code-block:: yaml
description:
name: my_$(EXAMPLE_VAR)_study_$(ID)
description: example of programmatic study name
The above would produce a study called ``my_hello_study_42``.

Environment variables
---------------------
Merlin expands Unix environment variables for you. The values of the user variables below would be expanded:

.. code-block:: yaml
env:
variables:
MY_HOME: ~/
MY_PATH: $PATH
USERNAME: ${USER}
However, Merlin leaves environment variables found in shell scripts (think ``cmd`` and ``restart``) alone.
So this step:

.. code-block:: yaml
- name: step1
description: an example
run:
cmd: echo $PATH ; echo $(MY_PATH)
...would be expanded as:

.. code-block:: yaml
- name: step1
description: an example
run:
cmd: echo $PATH ; echo /an/example/:/path/string/
Step return variables
-----------------------------------
Expand Down
45 changes: 38 additions & 7 deletions merlin/spec/expansion.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@

import logging
from collections import ChainMap
from copy import deepcopy
from os.path import expanduser, expandvars

from merlin.common.abstracts.enums import ReturnCode
Expand Down Expand Up @@ -75,17 +76,22 @@ def var_ref(string):
return f"$({string})"


def expand_line(line, var_dict):
def expand_line(line, var_dict, env_vars=False):
"""
Expand one line of text by substituting environment
and user variables, as well as variables in 'var_dict'.
Expand one line of text by substituting user variables,
optionally environment variables, as well as variables in 'var_dict'.
"""
line = expandvars(expanduser(line))
if not contains_token(line):
if (
(not contains_token(line))
and (not contains_shell_ref(line))
and ("~" not in line)
):
return line
for key, val in var_dict.items():
if key in line:
line = line.replace(var_ref(key), str(val))
if env_vars:
line = expandvars(expanduser(line))
return line


Expand All @@ -102,6 +108,32 @@ def expand_by_line(text, var_dict):
return result


def expand_env_vars(spec):
"""
Expand environment variables for all sections of a spec, except
for values with the key 'cmd' or 'restart' (these are executable
shell scripts, so environment variable expansion would be redundant).
"""
def recurse(section):
if section is None:
return section
if isinstance(section, str):
return expandvars(expanduser(section))
if isinstance(section, dict):
for k, v in section.items():
if k in ["cmd", "restart"]:
continue
section[k] = recurse(v)
elif isinstance(section, list):
for i, elem in enumerate(deepcopy(section)):
section[i] = recurse(elem)
return section

for name, section in spec.sections.items():
setattr(spec, name, recurse(section))
return spec


def determine_user_variables(*user_var_dicts):
"""
Given an arbitrary number of dictionaries, determine them
Expand Down Expand Up @@ -137,8 +169,7 @@ def determine_user_variables(*user_var_dicts):
new_val = new_val.replace(
var_determined_key, determined_results[determined_key]
)
if contains_shell_ref(new_val):
new_val = expandvars(new_val)
new_val = expandvars(expanduser(new_val))
determined_results[key.upper()] = new_val
return determined_results

Expand Down
49 changes: 34 additions & 15 deletions merlin/study/study.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,21 @@

from merlin.common.abstracts.enums import ReturnCode
from merlin.spec import defaults
from merlin.spec.expansion import determine_user_variables, expand_by_line, expand_line
from merlin.spec.expansion import (
determine_user_variables,
expand_by_line,
expand_env_vars,
expand_line,
)
from merlin.spec.override import dump_with_overrides, error_override_vars
from merlin.spec.specification import MerlinSpec
from merlin.study.dag import DAG
from merlin.utils import contains_token, get_flux_cmd, load_array_file
from merlin.utils import (
contains_shell_ref,
contains_token,
get_flux_cmd,
load_array_file,
)


LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -116,6 +126,9 @@ def __init__(
self.load_dag()

def write_original_spec(self, filename):
"""
Copy the original spec into merlin_info/ as '<name>.orig.yaml'.
"""
spec_name = os.path.join(self.info, filename + ".orig.yaml")
shutil.copyfile(self.original_spec.path, spec_name)

Expand Down Expand Up @@ -166,7 +179,8 @@ def get_expanded_spec(self):
# expand reserved words
new_spec_text = expand_by_line(new_spec_text, self.special_vars)

return MerlinSpec.load_spec_from_string(new_spec_text)
result = MerlinSpec.load_spec_from_string(new_spec_text)
return expand_env_vars(result)

@property
def samples(self):
Expand Down Expand Up @@ -275,7 +289,7 @@ def output_path(self):
):
output_path = str(self.override_vars["OUTPUT_PATH"])

output_path = expand_line(output_path, self.user_vars)
output_path = expand_line(output_path, self.user_vars, env_vars=True)
output_path = os.path.abspath(output_path)
if not os.path.isdir(output_path):
os.makedirs(output_path)
Expand Down Expand Up @@ -344,23 +358,27 @@ def expanded_spec(self):
expanded_filepath = os.path.join(self.info, expanded_name)

# expand provenance spec filename
if contains_token(self.original_spec.name):
if contains_token(self.original_spec.name) or contains_shell_ref(
self.original_spec.name
):
name = f"{result.description['name'].replace(' ', '_')}_{self.timestamp}"
name = expand_line(name, {}, env_vars=True)
if "/" in name:
raise ValueError(
f"Expanded value '{name}' for field 'name' in section 'description' is not a valid filename."
)
expanded_workspace = os.path.join(self.output_path, name)

sample_file = result.merlin["samples"]["file"]
if sample_file.startswith(self.workspace):
new_samples_file = sample_file.replace(
self.workspace, expanded_workspace
)
result.merlin["samples"]["generate"]["cmd"] = result.merlin["samples"][
"generate"
]["cmd"].replace(self.workspace, expanded_workspace)
result.merlin["samples"]["file"] = new_samples_file
if result.merlin["samples"]:
sample_file = result.merlin["samples"]["file"]
if sample_file.startswith(self.workspace):
new_samples_file = sample_file.replace(
self.workspace, expanded_workspace
)
result.merlin["samples"]["generate"]["cmd"] = result.merlin[
"samples"
]["generate"]["cmd"].replace(self.workspace, expanded_workspace)
result.merlin["samples"]["file"] = new_samples_file

shutil.move(self.workspace, expanded_workspace)
self.workspace = expanded_workspace
Expand All @@ -372,6 +390,7 @@ def expanded_spec(self):
result.dump(), MerlinStudy.get_user_vars(result)
)
result = MerlinSpec.load_spec_from_string(new_spec_text)
result = expand_env_vars(result)

# pgen
if self.pgen_file:
Expand Down Expand Up @@ -404,7 +423,7 @@ def expanded_spec(self):
@cached_property
def flux_command(self):
"""
Returns a the flux version
Returns the flux version.
"""
flux_bin = "flux"
if "flux_path" in self.expanded_spec.batch.keys():
Expand Down
6 changes: 5 additions & 1 deletion tests/study/test_study.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
env:
variables:
OUTPUT_PATH: ./studies
PATH_VAR: $PATH
labels:
SHARED: $(SPECROOT)/../shared
Expand Down Expand Up @@ -264,9 +265,12 @@ def test_expanded_spec(self):
assert not TestMerlinStudy.file_contains_string(
self.study.expanded_spec.path, "$(OUTPUT_PATH)"
)
assert not TestMerlinStudy.file_contains_string(
assert TestMerlinStudy.file_contains_string(
self.study.expanded_spec.path, "$PATH"
)
assert not TestMerlinStudy.file_contains_string(
self.study.expanded_spec.path, "PATH_VAR: $PATH"
)

def test_column_label_conflict(self):
"""
Expand Down

0 comments on commit 8ff4ffd

Please sign in to comment.