Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2451 schema build update defaults #2479

Merged
merged 15 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

### General

- Update `schema build` functionality to automatically update defaults which have changed in the `nextflow.config`([#2479](https://github.com/nf-core/tools/pull/2479))

# [v2.10 - Nickel Ostrich](https://github.com/nf-core/tools/releases/tag/2.10) + [2023-09-25]

### Template
Expand Down
66 changes: 52 additions & 14 deletions nf_core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(self):
self.pipeline_dir = None
self.schema_filename = None
self.schema_defaults = {}
self.schema_params = []
self.schema_params = {}
self.input_params = {}
self.pipeline_params = {}
self.invalid_nextflow_config_default_parameters = {}
Expand Down Expand Up @@ -110,7 +110,7 @@ def load_schema(self):
with open(self.schema_filename, "r") as fh:
self.schema = json.load(fh)
self.schema_defaults = {}
self.schema_params = []
self.schema_params = {}
log.debug(f"JSON file loaded: {self.schema_filename}")

def sanitise_param_default(self, param):
Expand Down Expand Up @@ -141,6 +141,9 @@ def sanitise_param_default(self, param):
param["default"] = float(param["default"])
return param

if param["default"] is None:
return param

# Strings
param["default"] = str(param["default"])
return param
Expand All @@ -154,18 +157,20 @@ def get_schema_defaults(self):
"""
# Top level schema-properties (ungrouped)
for p_key, param in self.schema.get("properties", {}).items():
self.schema_params.append(p_key)
self.schema_params[p_key] = ("properties", p_key)
if "default" in param:
param = self.sanitise_param_default(param)
self.schema_defaults[p_key] = param["default"]
if param["default"] is not None:
self.schema_defaults[p_key] = param["default"]

# Grouped schema properties in subschema definitions
for _, definition in self.schema.get("definitions", {}).items():
for defn_name, definition in self.schema.get("definitions", {}).items():
for p_key, param in definition.get("properties", {}).items():
self.schema_params.append(p_key)
self.schema_params[p_key] = ("definitions", defn_name, "properties", p_key)
if "default" in param:
param = self.sanitise_param_default(param)
self.schema_defaults[p_key] = param["default"]
if param["default"] is not None:
self.schema_defaults[p_key] = param["default"]

def save_schema(self, suppress_logging=False):
"""Save a pipeline schema to a file"""
Expand Down Expand Up @@ -239,9 +244,9 @@ def validate_default_params(self):
except jsonschema.exceptions.ValidationError as e:
raise AssertionError(f"Default parameters are invalid: {e.message}")
for param, default in self.schema_defaults.items():
if default in ("null", "", None, "None"):
if default in ("null", "", None, "None") or default is False:
log.warning(
f"[yellow][!] Default parameter '{param}' is empty or null. It is advisable to remove the default from the schema"
f"[yellow][!] Default parameter '{param}' is empty, null, or False. It is advisable to remove the default from the schema"
)
log.info("[green][✓] Default parameters match schema validation")

Expand Down Expand Up @@ -762,12 +767,15 @@ def prompt_remove_schema_notfound_config(self, p_key):
def add_schema_found_configs(self):
"""
Add anything that's found in the Nextflow params that's missing in the pipeline schema
Update defaults if they have changed
"""
params_added = []
params_ignore = self.pipeline_params.get("validationSchemaIgnoreParams", "").strip("\"'").split(",")
params_ignore.append("validationSchemaIgnoreParams")
for p_key, p_val in self.pipeline_params.items():
s_key = self.schema_params.get(p_key)
# Check if key is in schema parameters
# Key is in pipeline but not in schema or ignored from schema
if p_key not in self.schema_params and p_key not in params_ignore:
if (
self.no_prompts
Expand All @@ -782,7 +790,35 @@ def add_schema_found_configs(self):
self.schema["properties"][p_key] = self.build_schema_param(p_val)
log.debug(f"Adding '{p_key}' to pipeline schema")
params_added.append(p_key)

# Param has a default that does not match the schema
elif p_key in self.schema_defaults and (s_def := self.schema_defaults[p_key]) != (
p_def := self.build_schema_param(p_val).get("default")
):
if self.no_prompts or Confirm.ask(
f":sparkles: Default for [bold]'params.{p_key}'[/] in the pipeline config does not match schema. (schema: '{s_def}' | config: '{p_def}'). "
"[blue]Update pipeline schema?"
):
s_key_def = s_key + ("default",)
if p_def is None:
nf_core.utils.nested_delitem(self.schema, s_key_def)
log.debug(f"Removed '{p_key}' default from pipeline schema")
else:
nf_core.utils.nested_setitem(self.schema, s_key_def, p_def)
log.debug(f"Updating '{p_key}' default to '{p_def}' in pipeline schema")
# There is no default in schema but now there is a default to write
elif (
s_key
and (p_key not in self.schema_defaults)
and (p_key not in params_ignore)
and (p_def := self.build_schema_param(p_val).get("default"))
):
if self.no_prompts or Confirm.ask(
f":sparkles: Default for [bold]'params.{p_key}'[/] is not in schema (def='{p_def}'). "
"[blue]Update pipeline schema?"
):
s_key_def = s_key + ("default",)
nf_core.utils.nested_setitem(self.schema, s_key_def, p_def)
log.debug(f"Updating '{p_key}' default to '{p_def}' in pipeline schema")
return params_added

def build_schema_param(self, p_val):
Expand All @@ -806,13 +842,15 @@ def build_schema_param(self, p_val):
p_val = None

# Booleans
if p_val in ["True", "False"]:
p_val = p_val == "True" # Convert to bool
if p_val in ["true", "false", "True", "False"]:
p_val = p_val in ["true", "True"] # Convert to bool
p_type = "boolean"

p_schema = {"type": p_type, "default": p_val}
# Don't return a default for anything false-y except 0
if not p_val and not (p_val == 0 and p_val is not False):
return {"type": p_type}

return p_schema
return {"type": p_type, "default": p_val}

def launch_web_builder(self):
"""
Expand Down
27 changes: 27 additions & 0 deletions nf_core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1131,3 +1131,30 @@ def validate_file_md5(file_name, expected_md5hex):
raise IOError(f"{file_name} md5 does not match remote: {expected_md5hex} - {file_md5hex}")

return True


def nested_setitem(d, keys, value):
"""Sets the value in a nested dict using a list of keys to traverse

Args:
d (dict): the nested dictionary to traverse
keys (list[Any]): A list of keys to iteratively traverse
value (Any): The value to be set for the last key in the chain
"""
current = d
for k in keys[:-1]:
current = current[k]
current[keys[-1]] = value


def nested_delitem(d, keys):
"""Deletes a key from a nested dictionary

Args:
d (dict): the nested dictionary to traverse
keys (list[Any]): A list of keys to iteratively traverse, deleting the final one
"""
current = d
for k in keys[:-1]:
current = current[k]
del current[keys[-1]]
14 changes: 14 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,3 +207,17 @@ def test_validate_file_md5():
nf_core.utils.validate_file_md5(test_file, different_md5)
with pytest.raises(ValueError):
nf_core.utils.validate_file_md5(test_file, non_hex_string)


def test_nested_setitem():
d = {"a": {"b": {"c": "value"}}}
nf_core.utils.nested_setitem(d, ["a", "b", "c"], "value new")
assert d["a"]["b"]["c"] == "value new"
assert d == {"a": {"b": {"c": "value new"}}}


def test_nested_delitem():
d = {"a": {"b": {"c": "value"}}}
nf_core.utils.nested_delitem(d, ["a", "b", "c"])
assert "c" not in d["a"]["b"]
assert d == {"a": {"b": {}}}