Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix default data handling of optional file inputs for WPS-1 execution #344

Merged
merged 3 commits into from
Oct 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,18 @@ Changes:

Fixes:
------
- No change.
- Fix handling of default *format* field of `WPS` input definition incorrectly resolved as default *data* by ``PyWPS``
for `Process` that allows optional (``minOccurs=0``) inputs of ``Complex`` type. Specific case is detected with
relevant erroneous data and dropped silently because it should not be present (since omitted in `WPS` request) and
should not generate a `WPS` input (relates to `geopython/pywps#633 <https://github.com/geopython/pywps/issues/633>`_).
- Fix resolution of `CWL` field ``default`` value erroneously inserted as ``"null"`` literal string for inputs generated
from `WPS` definition to avoid potential confusion with valid ``"null"`` input or default string. Default behaviour to
drop or ignore *omitted* inputs are handled by ``"null"`` within ``type`` field in `CWL` definitions.
- Fix ``Wps1Process`` job runner for dispatched execution of `WPS-1 Process` assuming all provided inputs contain data
or reference. Skip omitted optional inputs that are resolved with ``None`` value following above fixes.
- Resolve execution failure of `WPS-1 Process` ``ncdump`` under ``hummingbird`` `Provider`
(fixes issue identified in output logs from notebook in
`PR pavics-sdi#230 <https://github.com/Ouranosinc/pavics-sdi/pull/230>`_).

`4.1.0 <https://github.com/crim-ca/weaver/tree/4.1.0>`_ (2021-09-29)
========================================================================
Expand Down
6 changes: 5 additions & 1 deletion tests/functional/test_wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,11 @@ def test_enum_array_and_multi_format_inputs_from_wps_xml_reference(self):
assert all(isinstance(s, str) for s in pkg["inputs"][0]["type"][2]["items"]["symbols"])
# second input
assert pkg["inputs"][1]["id"] == "mosaic"
assert pkg["inputs"][1]["default"] == "null"
# note: modified by https://github.com/crim-ca/weaver/pull/344
# explicit 'null' should not be reported as 'default', causing CWL error seeing as string with "null" value
# must be in 'type' instead to define it as optional, as tested below
# assert pkg["inputs"][1]["default"] == "null"
assert "null" not in pkg["inputs"][1]
assert "format" not in pkg["inputs"][1]
assert isinstance(pkg["inputs"][1]["type"], list), "default 'null' result type formed with it"
assert len(pkg["inputs"][1]["type"]) == 2
Expand Down
6 changes: 4 additions & 2 deletions weaver/processes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,8 +493,10 @@ def _get_cwl_fmt_details(wps_fmt):
is_min_null = wps_min_occ in [0, "0"]
if wps_default != null and not isinstance(wps_default, dict):
cwl_io["default"] = wps_default
elif is_min_null:
cwl_io["default"] = "null"
# NOTE:
# Don't set any 'default' field here (neither 'null' string or 'None' type) if no value was provided
# since those are interpreted by CWL as literal string 'null' (for 'string' type) or null object.
# Instead, 'null' entry is added to 'type' to indicate drop/ignore missing input.

wps_max_occ = get_field(wps_io, "max_occurs", search_variations=True)
if wps_max_occ != null and (wps_max_occ == "unbounded" or wps_max_occ > 1):
Expand Down
5 changes: 5 additions & 0 deletions weaver/processes/wps1_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,11 @@ def get_input_values(self, process, workflow_inputs):
wps_inputs = []
for input_key in inputs_provided_keys:
input_val = workflow_inputs[input_key]

# ignore optional inputs resolved as omitted
if input_val is None:
continue

# in case of array inputs, must repeat (id,value)
# in case of complex input (File), obtain location, otherwise get data value
if not isinstance(input_val, list):
Expand Down
38 changes: 31 additions & 7 deletions weaver/processes/wps_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,12 +1264,14 @@ def make_inputs(self,
is_array, elem_type, _, _ = is_cwl_array_type(cwl_inputs_info[input_id])
if isinstance(input_i, ComplexInput) or elem_type == "File":
# extend array data that allow max_occur > 1
# drop invalid inputs returned as None
if is_array:
cwl_inputs[input_id] = [
self.make_location_input(elem_type, input_def) for input_def in input_occurs
]
input_href = [self.make_location_input(elem_type, input_def) for input_def in input_occurs]
input_href = [cwl_input for cwl_input in input_href if cwl_input is not None]
else:
cwl_inputs[input_id] = self.make_location_input(elem_type, input_i)
input_href = self.make_location_input(elem_type, input_i)
if input_href:
cwl_inputs[input_id] = input_href
elif isinstance(input_i, (LiteralInput, BoundingBoxInput)):
# extend array data that allow max_occur > 1
if is_array:
Expand All @@ -1282,7 +1284,7 @@ def make_inputs(self,
return cwl_inputs

def make_location_input(self, input_type, input_definition):
# type: (str, ComplexInput) -> JSON
# type: (str, ComplexInput) -> Optional[JSON]
"""
Generates the JSON content required to specify a `CWL` ``File`` input definition from a location.

Expand All @@ -1306,8 +1308,9 @@ def make_location_input(self, input_type, input_definition):
# since href is already handled (pulled and staged locally), use it directly to avoid double fetch with CWL
# validate using the internal '_file' instead of 'file' otherwise we trigger the fetch
# normally, file should be pulled an this check should fail
if input_definition._iohandler._file and os.path.isfile(input_definition._iohandler._file): # noqa: W0212
input_location = input_definition._iohandler._file # noqa: W0212
input_definition_file = input_definition._iohandler._file # noqa: W0212
if input_definition_file and os.path.isfile(input_definition_file):
input_location = input_definition_file
# if source type is data, we actually need to call 'data' (without fetch of remote file, already fetched)
# value of 'file' in this case points to a local file path where the wanted link was dumped as raw data
if input_definition.source_type == SOURCE_TYPE.DATA:
Expand All @@ -1319,6 +1322,27 @@ def make_location_input(self, input_type, input_definition):
else:
# last option, could not resolve 'lazily' so will fetch data if needed
input_location = input_definition.data
# FIXME: PyWPS bug (https://github.com/geopython/pywps/issues/633)
# Optional File inputs receive 'data' content that correspond to 'default format' definition if not provided.
# This is invalid since input is not provided, it should not be there at all (default format != default data).
# Patch with a combination of available detection methods to be safe:
# - The 'file' attribute gets resolved to the process '{workdir}/input' temporary file.
# This 'file' is instead named 'input_{uuid}' when it is actually resolved to real input href/data contents.
# The IO handler better reports 'None' in its internal '_file' attribute.
# - For even more robustness, verify that erroneous 'data' matches the 'default format'.
# The media-type should match and 'default' argument should True since it resolve with '_default' argument.
default_format_def = getattr(input_definition, "_default", None)
if (
isinstance(default_format_def, dict) and
input_location == default_format_def and
input_definition_file is None and
# input_definition.size == 0 and # not reliable, sometimes fails because 'data' is dict instead of str
default_format_def.get("default") is True and
any(default_format_def.get("mimeType") == fmt.mime_type and fmt.mime_type is not None
for fmt in input_definition.supported_formats)
):
self.logger.debug("File input (%s) DROPPED. Detected default format as data.", input_definition.identifier)
return None
if self.must_fetch(input_location):
self.logger.info("File input (%s) ATTEMPT fetch: [%s]", input_definition.identifier, input_location)
input_location = fetch_file(input_location, input_definition.workdir, settings=self.settings)
Expand Down
2 changes: 1 addition & 1 deletion weaver/typedefs.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
CWL_IO_ArrayType = TypedDict("CWL_IO_ArrayType", {"type": str, "items": str}) # "items" => type of every item
CWL_IO_MultiType = List[str, CWL_IO_ArrayType, CWL_IO_EnumType] # single string allowed for "null"
CWL_IO_DataType = Union[str, CWL_IO_ArrayType, CWL_IO_EnumType, CWL_IO_MultiType]
CWL_Input_Type = TypedDict("CWL_Input_Type", {"id": str, "type": CWL_IO_DataType}, total=False)
CWL_Input_Type = TypedDict("CWL_Input_Type", {"id": str, "type": CWL_IO_DataType, "default": AnyValue}, total=False)
CWL_Output_Type = TypedDict("CWL_Output_Type", {"id": str, "type": CWL_IO_DataType}, total=False)
CWL_Inputs = Union[List[CWL_Input_Type], Dict[str, CWL_Input_Type]]
CWL_Outputs = Union[List[CWL_Output_Type], Dict[str, CWL_Output_Type]]
Expand Down