Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add element_identifier and ext to inputs config file export #17357

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 15 additions & 3 deletions lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -6513,9 +6513,12 @@ A contrived example of a tool that uses this is the test tool

By default this file will not contain paths for data or collection inputs. To include simple
paths for data or collection inputs set the ``data_style`` attribute to ``paths`` (see [inputs_as_json_with_paths.xml](https://github.com/galaxyproject/galaxy/blob/dev/test/functional/tools/inputs_as_json_with_paths.xml) for an example).
To include a dictionary with staging paths, paths and metadata files set the ``data_style`` attribute to ``staging_path_and_source_path``.
To include a dictionary with element identifiers, datatypes, staging paths, paths and metadata files set the ``data_style`` attribute to ``staging_path_and_source_path`` (element identifiers and datatypes are available since 24.0).
An example tool that uses ``staging_path_and_source_path`` is [inputs_as_json_with_staging_path_and_source_path.xml](https://github.com/galaxyproject/galaxy/blob/dev/test/functional/tools/inputs_as_json_with_staging_path_and_source_path.xml)

Note that the element identifiers are stored as lists, where the last element is the actual element identifier of the dataset
and the other elements the identifiers of the collections containing the dataset.

For tools with profile >= 20.05 a select with ``multiple="true"`` is rendered as an array which is empty if nothing is selected. For older profile versions select lists are rendered as comma separated strings or a literal ``null`` in case nothing is selected.
]]></xs:documentation>
</xs:annotation>
Expand All @@ -6534,9 +6537,9 @@ response to this directive.
<xs:documentation xml:lang="en">Path relative to the working directory of the tool for the inputs JSON file created in response to this directive.</xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="data_style" type="xs:string">
<xs:attribute name="data_style" type="InputsConfigfileDatastyleType">
<xs:annotation>
<xs:documentation xml:lang="en">Set to 'paths' to include dataset paths in the resulting file. Set to 'staging_path_and_source_path' to include a staging path, a source path and all metadata files.</xs:documentation>
<xs:documentation xml:lang="en">Set to 'paths' to include dataset paths in the resulting file. Set to 'staging_path_and_source_path' to include a element identifiers, datatype, staging path, a source path and all metadata files.</xs:documentation>
</xs:annotation>
</xs:attribute>
</xs:extension>
Expand Down Expand Up @@ -7598,4 +7601,13 @@ the only supported options.</xs:documentation>
<xs:pattern value="(list|paired)([:,](list|paired))*"/>
</xs:restriction>
</xs:simpleType>
<xs:simpleType name="InputsConfigfileDatastyleType">
<xs:annotation>
<xs:documentation xml:lang="en">Allowed collection types</xs:documentation>
</xs:annotation>
<xs:restriction base="xs:string">
<xs:enumeration value="paths"/>
<xs:enumeration value="staging_path_and_source_path"/>
</xs:restriction>
</xs:simpleType>
</xs:schema>
30 changes: 25 additions & 5 deletions lib/galaxy/tools/parameters/wrapped_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@
Dict,
List,
Sequence,
TYPE_CHECKING,
)

from packaging.version import Version

if TYPE_CHECKING:
from galaxy.tools.parameters.wrappers import (
DatasetCollectionWrapper,
DatasetFilenameWrapper,
)

log = logging.getLogger(__name__)

SKIP_INPUT = object()
Expand Down Expand Up @@ -40,28 +47,41 @@ def data_collection_input_to_path(v):


def data_collection_input_to_staging_path_and_source_path(
v, invalid_chars: Sequence[str] = ("/",), include_collection_name: bool = False
v: "DatasetCollectionWrapper", invalid_chars: Sequence[str] = ("/",), include_collection_name: bool = False
) -> List[Dict[str, Any]]:
staging_paths = v.get_all_staging_paths(
invalid_chars=invalid_chars, include_collection_name=include_collection_name
)
source_paths = v.all_paths
metadata_files = v.all_metadata_files
if v.element_identifiers_extensions_paths_and_metadata_files:
element_identifiers, extensions, source_paths, metadata_files = zip(
*v.element_identifiers_extensions_paths_and_metadata_files
)
else:
element_identifiers, extensions, source_paths, metadata_files = (), (), (), ()
return [
{
"element_identifier": element_identifier,
"ext": extension,
"staging_path": staging_path,
"source_path": source_path,
"metadata_files": [
{"staging_path": f"{staging_path}.{mf[0]}", "source_path": mf[1]} for mf in metadata_files
],
}
for staging_path, source_path, metadata_files in zip(staging_paths, source_paths, metadata_files)
for element_identifier, extension, staging_path, source_path, metadata_files in zip(
element_identifiers, extensions, staging_paths, source_paths, metadata_files
)
]


def data_input_to_staging_path_and_source_path(v, invalid_chars: Sequence[str] = ("/",)) -> Dict[str, Any]:
def data_input_to_staging_path_and_source_path(
v: "DatasetFilenameWrapper", invalid_chars: Sequence[str] = ("/",)
) -> Dict[str, Any]:
staging_path = v.get_staging_path(invalid_chars=invalid_chars)
# note that the element identifier should be always a list
return {
"element_identifier": [v.element_identifier],
"ext": v.file_ext,
"staging_path": staging_path,
"source_path": data_input_to_path(v),
"metadata_files": [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,23 @@ import sys
input_json_path = sys.argv[1]
as_dict = json.load(open(input_json_path, "r"))

data_input_with_staging_details = as_dict["data_input"]
assert data_input_with_staging_details['element_identifier'] == ["1.tabular"]
assert data_input_with_staging_details['ext'] == "tabular"
assert data_input_with_staging_details['staging_path'] == "1.tabular.tabular"
assert len(data_input_with_staging_details['metadata_files']) == 0

multiple_data_input_with_staging_details = as_dict["multiple_data_input"]
assert len(multiple_data_input_with_staging_details) == 2
assert multiple_data_input_with_staging_details[0]['element_identifier'] == ["simple_line.txt"]
assert multiple_data_input_with_staging_details[0]['ext'] == "txt"
assert multiple_data_input_with_staging_details[0]['staging_path'] == "simple_line.txt.txt"
assert len(multiple_data_input_with_staging_details[0]['metadata_files']) == 0

collection_input_with_staging_details = as_dict["collection_input"]
## element identifier and ext are available since 24.x
assert collection_input_with_staging_details[0]['element_identifier'] == ["list", "element1"]
assert collection_input_with_staging_details[0]['ext'] == "bam"
assert collection_input_with_staging_details[0]['staging_path'] == "list/element1.bam"
assert collection_input_with_staging_details[0]['metadata_files'][0]['staging_path'] == "list/element1.bam.bai"

Expand All @@ -22,13 +38,15 @@ with open("output", "w") as f:
<inputs>
<param name="data_input" type="data" optional="true" />
<param name="multiple_data_input" type="data" optional="true" multiple="true" />
<param name="collection_input" type="data_collection" optional="true"/>
<param name="collection_input" type="data_collection" collection_type="list:list" optional="true"/>
</inputs>
<outputs>
<data name="out_file1" from_work_dir="output" format="txt" />
</outputs>
<tests>
<test>
<param name="data_input" value="1.tabular" ftype="tabular"/>
<param name="multiple_data_input" value="simple_line.txt,simple_line_alternative.txt" ftype="txt"/>
<param name="collection_input">
<collection type="list:list">
<element name="list">
Expand All @@ -38,6 +56,11 @@ with open("output", "w") as f:
</element>
</collection>
</param>
<output name="out_file1">
<assert_contents>
<has_text text="okay"/>
</assert_contents>
</output>
</test>
</tests>
<help>
Expand Down
Loading