diff --git a/CHANGES.rst b/CHANGES.rst index 5fb29a20c..2759a7dcd 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -14,7 +14,18 @@ Changes: Fixes: ------ -- No change. +- Fix handling of default *format* field of `WPS` input definition incorrectly resolved as default *data* by ``PyWPS`` + for `Process` that allows optional (``minOccurs=0``) inputs of ``Complex`` type. Specific case is detected with + relevant erroneous data and dropped silently because it should not be present (since omitted in `WPS` request) and + should not generate a `WPS` input (relates to `geopython/pywps#633 `_). +- Fix resolution of `CWL` field ``default`` value erroneously inserted as ``"null"`` literal string for inputs generated + from `WPS` definition to avoid potential confusion with valid ``"null"`` input or default string. Default behaviour to + drop or ignore *omitted* inputs are handled by ``"null"`` within ``type`` field in `CWL` definitions. +- Fix ``Wps1Process`` job runner for dispatched execution of `WPS-1 Process` assuming all provided inputs contain data + or reference. Skip omitted optional inputs that are resolved with ``None`` value following above fixes. +- Resolve execution failure of `WPS-1 Process` ``ncdump`` under ``hummingbird`` `Provider` + (fixes issue identified in output logs from notebook in + `PR pavics-sdi#230 `_). `4.1.0 `_ (2021-09-29) ======================================================================== diff --git a/tests/functional/test_wps_package.py b/tests/functional/test_wps_package.py index b2bcea8ed..36ef74dae 100644 --- a/tests/functional/test_wps_package.py +++ b/tests/functional/test_wps_package.py @@ -1888,7 +1888,11 @@ def test_enum_array_and_multi_format_inputs_from_wps_xml_reference(self): assert all(isinstance(s, str) for s in pkg["inputs"][0]["type"][2]["items"]["symbols"]) # second input assert pkg["inputs"][1]["id"] == "mosaic" - assert pkg["inputs"][1]["default"] == "null" + # note: modified by https://github.com/crim-ca/weaver/pull/344 + # explicit 'null' should not be reported as 'default', causing CWL error seeing as string with "null" value + # must be in 'type' instead to define it as optional, as tested below + # assert pkg["inputs"][1]["default"] == "null" + assert "null" not in pkg["inputs"][1] assert "format" not in pkg["inputs"][1] assert isinstance(pkg["inputs"][1]["type"], list), "default 'null' result type formed with it" assert len(pkg["inputs"][1]["type"]) == 2 diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py index 8f65e3dbc..89e9d87f2 100644 --- a/weaver/processes/convert.py +++ b/weaver/processes/convert.py @@ -493,8 +493,10 @@ def _get_cwl_fmt_details(wps_fmt): is_min_null = wps_min_occ in [0, "0"] if wps_default != null and not isinstance(wps_default, dict): cwl_io["default"] = wps_default - elif is_min_null: - cwl_io["default"] = "null" + # NOTE: + # Don't set any 'default' field here (neither 'null' string or 'None' type) if no value was provided + # since those are interpreted by CWL as literal string 'null' (for 'string' type) or null object. + # Instead, 'null' entry is added to 'type' to indicate drop/ignore missing input. wps_max_occ = get_field(wps_io, "max_occurs", search_variations=True) if wps_max_occ != null and (wps_max_occ == "unbounded" or wps_max_occ > 1): diff --git a/weaver/processes/wps1_process.py b/weaver/processes/wps1_process.py index bb939deb6..32c9eac39 100644 --- a/weaver/processes/wps1_process.py +++ b/weaver/processes/wps1_process.py @@ -71,6 +71,11 @@ def get_input_values(self, process, workflow_inputs): wps_inputs = [] for input_key in inputs_provided_keys: input_val = workflow_inputs[input_key] + + # ignore optional inputs resolved as omitted + if input_val is None: + continue + # in case of array inputs, must repeat (id,value) # in case of complex input (File), obtain location, otherwise get data value if not isinstance(input_val, list): diff --git a/weaver/processes/wps_package.py b/weaver/processes/wps_package.py index fc3778f5e..b2f86ea6f 100644 --- a/weaver/processes/wps_package.py +++ b/weaver/processes/wps_package.py @@ -1264,12 +1264,14 @@ def make_inputs(self, is_array, elem_type, _, _ = is_cwl_array_type(cwl_inputs_info[input_id]) if isinstance(input_i, ComplexInput) or elem_type == "File": # extend array data that allow max_occur > 1 + # drop invalid inputs returned as None if is_array: - cwl_inputs[input_id] = [ - self.make_location_input(elem_type, input_def) for input_def in input_occurs - ] + input_href = [self.make_location_input(elem_type, input_def) for input_def in input_occurs] + input_href = [cwl_input for cwl_input in input_href if cwl_input is not None] else: - cwl_inputs[input_id] = self.make_location_input(elem_type, input_i) + input_href = self.make_location_input(elem_type, input_i) + if input_href: + cwl_inputs[input_id] = input_href elif isinstance(input_i, (LiteralInput, BoundingBoxInput)): # extend array data that allow max_occur > 1 if is_array: @@ -1282,7 +1284,7 @@ def make_inputs(self, return cwl_inputs def make_location_input(self, input_type, input_definition): - # type: (str, ComplexInput) -> JSON + # type: (str, ComplexInput) -> Optional[JSON] """ Generates the JSON content required to specify a `CWL` ``File`` input definition from a location. @@ -1306,8 +1308,9 @@ def make_location_input(self, input_type, input_definition): # since href is already handled (pulled and staged locally), use it directly to avoid double fetch with CWL # validate using the internal '_file' instead of 'file' otherwise we trigger the fetch # normally, file should be pulled an this check should fail - if input_definition._iohandler._file and os.path.isfile(input_definition._iohandler._file): # noqa: W0212 - input_location = input_definition._iohandler._file # noqa: W0212 + input_definition_file = input_definition._iohandler._file # noqa: W0212 + if input_definition_file and os.path.isfile(input_definition_file): + input_location = input_definition_file # if source type is data, we actually need to call 'data' (without fetch of remote file, already fetched) # value of 'file' in this case points to a local file path where the wanted link was dumped as raw data if input_definition.source_type == SOURCE_TYPE.DATA: @@ -1319,6 +1322,27 @@ def make_location_input(self, input_type, input_definition): else: # last option, could not resolve 'lazily' so will fetch data if needed input_location = input_definition.data + # FIXME: PyWPS bug (https://github.com/geopython/pywps/issues/633) + # Optional File inputs receive 'data' content that correspond to 'default format' definition if not provided. + # This is invalid since input is not provided, it should not be there at all (default format != default data). + # Patch with a combination of available detection methods to be safe: + # - The 'file' attribute gets resolved to the process '{workdir}/input' temporary file. + # This 'file' is instead named 'input_{uuid}' when it is actually resolved to real input href/data contents. + # The IO handler better reports 'None' in its internal '_file' attribute. + # - For even more robustness, verify that erroneous 'data' matches the 'default format'. + # The media-type should match and 'default' argument should True since it resolve with '_default' argument. + default_format_def = getattr(input_definition, "_default", None) + if ( + isinstance(default_format_def, dict) and + input_location == default_format_def and + input_definition_file is None and + # input_definition.size == 0 and # not reliable, sometimes fails because 'data' is dict instead of str + default_format_def.get("default") is True and + any(default_format_def.get("mimeType") == fmt.mime_type and fmt.mime_type is not None + for fmt in input_definition.supported_formats) + ): + self.logger.debug("File input (%s) DROPPED. Detected default format as data.", input_definition.identifier) + return None if self.must_fetch(input_location): self.logger.info("File input (%s) ATTEMPT fetch: [%s]", input_definition.identifier, input_location) input_location = fetch_file(input_location, input_definition.workdir, settings=self.settings) diff --git a/weaver/typedefs.py b/weaver/typedefs.py index 99566d222..4b0e60390 100644 --- a/weaver/typedefs.py +++ b/weaver/typedefs.py @@ -53,7 +53,7 @@ CWL_IO_ArrayType = TypedDict("CWL_IO_ArrayType", {"type": str, "items": str}) # "items" => type of every item CWL_IO_MultiType = List[str, CWL_IO_ArrayType, CWL_IO_EnumType] # single string allowed for "null" CWL_IO_DataType = Union[str, CWL_IO_ArrayType, CWL_IO_EnumType, CWL_IO_MultiType] - CWL_Input_Type = TypedDict("CWL_Input_Type", {"id": str, "type": CWL_IO_DataType}, total=False) + CWL_Input_Type = TypedDict("CWL_Input_Type", {"id": str, "type": CWL_IO_DataType, "default": AnyValue}, total=False) CWL_Output_Type = TypedDict("CWL_Output_Type", {"id": str, "type": CWL_IO_DataType}, total=False) CWL_Inputs = Union[List[CWL_Input_Type], Dict[str, CWL_Input_Type]] CWL_Outputs = Union[List[CWL_Output_Type], Dict[str, CWL_Output_Type]]