From 335703664005af259911ddff1e6624d50fa90b2a Mon Sep 17 00:00:00 2001 From: Matthias Bernt Date: Wed, 23 Mar 2022 12:37:47 +0100 Subject: [PATCH] check 5k prefix for class for checking potential yaml and cwl tools --- lib/galaxy/tool_util/loader_directory.py | 46 ++++++++++++++---------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/lib/galaxy/tool_util/loader_directory.py b/lib/galaxy/tool_util/loader_directory.py index c3d22c0d12c4..e9514fdd0403 100644 --- a/lib/galaxy/tool_util/loader_directory.py +++ b/lib/galaxy/tool_util/loader_directory.py @@ -188,22 +188,41 @@ def looks_like_a_data_manager_xml(path): return looks_like_xml(path=path, regex=DATA_MANAGER_REGEX) -def is_a_yaml_with_class(path, classes): - """Determine if a file is a valid YAML with a supplied ``class`` entry.""" - if not _has_extension(path, YAML_EXTENSIONS): - return False +def as_dict_if_looks_like_yaml_or_cwl_with_class(path, classes): + """ + get a dict from yaml file if it contains `class: CLASS`, where CLASS is + any string given in CLASSES. must appear in the first 5k and also load + properly in total. + """ + with open(path, encoding="utf-8") as f: + try: + start_contents = f.read(5 * 1024) + except UnicodeDecodeError: + return None + if re.search(rf"^class:\s+({'|'.join(classes)})\s*$", start_contents) is None: + return None with open(path) as f: try: as_dict = yaml.safe_load(f) except Exception: - return False + return None if not isinstance(as_dict, dict): - return False + return None file_class = as_dict.get("class", None) - return file_class in classes + if file_class not in classes: + return None + + return as_dict + + +def is_a_yaml_with_class(path, classes): + """Determine if a file is a valid YAML with a supplied ``class`` entry.""" + if not _has_extension(path, YAML_EXTENSIONS): + return False + return as_dict_if_looks_like_yaml_or_cwl_with_class(path, classes) is not None def looks_like_a_tool_yaml(path): @@ -216,17 +235,8 @@ def looks_like_a_cwl_artifact(path, classes=None): if not _has_extension(path, CWL_EXTENSIONS): return False - with open(path) as f: - try: - as_dict = yaml.safe_load(f) - except Exception: - return False - - if not isinstance(as_dict, dict): - return False - - file_class = as_dict.get("class", None) - if classes is not None and file_class not in classes: + as_dict = as_dict_if_looks_like_yaml_or_cwl_with_class(path, classes) + if as_dict is None: return False file_cwl_version = as_dict.get("cwlVersion", None)