Skip to content

Commit

Permalink
Merge pull request #13373 from bernt-matthias/fix/macro_parsing
Browse files Browse the repository at this point in the history
[22.01] Fix macro parsing
  • Loading branch information
mvdbeek authored Feb 16, 2022
2 parents 2914a71 + 7157102 commit c14eca9
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 84 deletions.
4 changes: 0 additions & 4 deletions lib/galaxy/tool_util/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,13 @@ class XMLLintMessageLine(LintMessage):
def __init__(self, level: str, message: str, node: Optional[etree.Element] = None):
super().__init__(level, message)
self.line = None
self.fname = None
if node is not None:
self.line = node.sourceline
self.fname = node.base

def __str__(self) -> str:
rval = super().__str__()
if self.line is not None:
rval += " ("
if self.fname:
rval += f"{self.fname}:"
rval += str(self.line)
rval += ")"
return rval
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,11 @@
LXML_AVAILABLE = True
try:
from lxml import etree
Element = etree._Element
except ImportError:
LXML_AVAILABLE = False
import xml.etree.ElementTree as etree # type: ignore[assignment,no-redef]
Element = etree.Element
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

Expand Down
124 changes: 52 additions & 72 deletions lib/galaxy/util/xml_macros.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import os
from copy import copy, deepcopy
from copy import deepcopy
from typing import Dict, List

from galaxy.util import parse_xml
from galaxy.util import (
Element,
parse_xml,
)

REQUIRED_PARAMETER = object()

Expand All @@ -15,36 +19,29 @@ def load_with_references(path):
tree = raw_xml_tree(path)
root = tree.getroot()

macro_paths = _import_macros(root, path)

# temporarily remove the children of the macros node
# and create a copy. this is done because this allows
# to iterate over all expand nodes of the tree
# that are not included in the macros node
macros_el = _macros_el(root)
if macros_el is not None:
macros_copy = copy(macros_el)
macros_el.clear()
else:
macros_copy = None
if macros_el is None:
return tree, []

macros: Dict[str, List[Element]] = {}
macro_paths = _import_macros(macros_el, path, macros)
macros_el.clear()

# Collect tokens
tokens = _macros_of_type(macros_copy, 'token', lambda el: el.text or '')
tokens = {}
for m in macros.get('token', []):
tokens[m.get("name")] = m.text or ""
tokens = expand_nested_tokens(tokens)

# Expand xml macros
macro_dict = _macros_of_type(macros_copy, 'xml', lambda el: XmlMacroDef(el))
macro_dict = {}
for m in macros.get('xml', []):
macro_dict[m.get("name")] = XmlMacroDef(m)
_expand_macros([root], macro_dict, tokens)

# readd the stashed children of the macros node
# TODO is this this really necesary? Since macro nodes are removed anyway just below.
if macros_copy is not None:
_xml_set_children(macros_el, list(macros_copy))

for el in root.xpath('//macro'):
if el.get('type') != 'template':
# Only keep template macros
el.getparent().remove(el)
# reinsert template macro which are used during tool execution
for m in macros.get("template", []):
macros_el.append(m)
_expand_tokens_for_el(root, tokens)
return tree, macro_paths

Expand Down Expand Up @@ -80,16 +77,15 @@ def imported_macro_paths(root):
return _imported_macro_paths_from_el(macros_el)


def _import_macros(root, path):
def _import_macros(macros_el, path, macros):
"""
root the parsed XML tree
path the path to the main xml document
"""
xml_base_dir = os.path.dirname(path)
macros_el = _macros_el(root)
if macros_el is not None:
macro_els, macro_paths = _load_macros(macros_el, xml_base_dir)
_xml_set_children(macros_el, macro_els)
macro_paths = _load_macros(macros_el, xml_base_dir, macros)
# _xml_set_children(macros_el, macro_els)
return macro_paths


Expand Down Expand Up @@ -159,22 +155,22 @@ def _expand_macros(elements, macros, tokens, visited=None):
if not macros and not tokens:
return

if visited is None:
v = []
else:
v = visited

for element in elements:
while True:
expand_el = element.find('.//expand')
if expand_el is None:
break
if visited is None:
v = []
else:
v = visited
_expand_macro(expand_el, macros, tokens, v)


def _expand_macro(expand_el, macros, tokens, visited):
macro_name = expand_el.get('macro')
assert macro_name is not None, "Attempted to expand macro with no 'macro' attribute defined."

# check for cycles in the nested macro expansion
assert macro_name not in visited, f"Cycle in nested macros: already expanded {visited} can't expand '{macro_name}' again"
visited.append(macro_name)
Expand Down Expand Up @@ -219,55 +215,48 @@ def _expand_yield_statements(macro_def, expand_el):
_xml_replace(yield_el, expand_el_children)


def _load_macros(macros_el, xml_base_dir):
macros = []
def _load_macros(macros_el, xml_base_dir, macros):
# Import macros from external files.
imported_macros, macro_paths = _load_imported_macros(macros_el, xml_base_dir)
macros.extend(imported_macros)
macro_paths = _load_imported_macros(macros_el, xml_base_dir, macros)
# Load all directly defined macros.
macros.extend(_load_embedded_macros(macros_el, xml_base_dir))
return macros, macro_paths

_load_embedded_macros(macros_el, xml_base_dir, macros)
return macro_paths

def _load_embedded_macros(macros_el, xml_base_dir):
macros = []

macro_els = []
def _load_embedded_macros(macros_el, xml_base_dir, macros):
if macros_el is None:
return
# attribute typed macro
if macros_el is not None:
macro_els = macros_el.findall("macro")
for macro in macro_els:
for macro in macros_el.iterfind("macro"):
if 'type' not in macro.attrib:
macro.attrib['type'] = 'xml'
macros.append(macro)
try:
macros[macro.attrib['type']].append(macro)
except KeyError:
macros[macro.attrib['type']] = [macro]

# type shortcuts (<xml> is a shortcut for <macro type="xml",
# likewise for <template>.
typed_tag = ['template', 'xml', 'token']
for tag in typed_tag:
macro_els = []
if macros_el is not None:
macro_els = macros_el.findall(tag)
for macro_el in macro_els:
for tag in ['template', 'xml', 'token']:
for macro_el in macros_el.iterfind(tag):
macro_el.attrib['type'] = tag
macro_el.tag = 'macro'
macros.append(macro_el)

return macros
try:
macros[tag].append(macro_el)
except KeyError:
macros[tag] = [macro_el]


def _load_imported_macros(macros_el, xml_base_dir):
macros = []
def _load_imported_macros(macros_el, xml_base_dir, macros):
macro_paths = []

for tool_relative_import_path in _imported_macro_paths_from_el(macros_el):
import_path = \
os.path.join(xml_base_dir, tool_relative_import_path)
macro_paths.append(import_path)
file_macros, current_macro_paths = _load_macro_file(import_path, xml_base_dir)
macros.extend(file_macros)
current_macro_paths = _load_macro_file(import_path, xml_base_dir, macros)
macro_paths.extend(current_macro_paths)
return macros, macro_paths
return macro_paths


def _imported_macro_paths_from_el(macros_el):
Expand All @@ -281,19 +270,10 @@ def _imported_macro_paths_from_el(macros_el):
return imported_macro_paths


def _load_macro_file(path, xml_base_dir):
def _load_macro_file(path, xml_base_dir, macros):
tree = parse_xml(path, strip_whitespace=False)
for node in tree.iter():
# little hack: node.base contains that path to the file which is used in
# the linter. it is a property that apparently is determined from the
# xmltree, so if the macro is inserted into the main xml node.base will
# give the path of the main xml file.
# luckily lxml allows to set the property by adding xml:base to the node
# which will be returned if the property is read
# https://github.com/lxml/lxml/blob/5a5c7fb01d15af58def4bab2ba7b15c937042835/src/lxml/etree.pyx#L1106
node.base = node.base
root = tree.getroot()
return _load_macros(root, xml_base_dir)
return _load_macros(root, xml_base_dir, macros)


def _xml_set_children(element, new_children):
Expand Down
20 changes: 14 additions & 6 deletions test/unit/tool_util/test_tool_linters.py
Original file line number Diff line number Diff line change
Expand Up @@ -1285,7 +1285,7 @@ def test_xml_order(lint_ctx):
"""


def test_tool_and_macro_xml(lint_ctx_xpath):
def test_tool_and_macro_xml(lint_ctx_xpath, lint_ctx):
"""
test linters (all of them via lint_tool_source_with) on a tool and macro xml file
checking a list of asserts, where each assert is a 4-tuple:
Expand All @@ -1304,21 +1304,29 @@ def test_tool_and_macro_xml(lint_ctx_xpath):
tool_xml, _ = load_with_references(tool_path)

tool_source = XmlToolSource(tool_xml)
# lint once with the lint context using XMLLintMessageXPath and XMLLintMessageLine
lint_tool_source_with(lint_ctx_xpath, tool_source)
lint_tool_source_with(lint_ctx, tool_source)

asserts = (
("Select parameter [select] has multiple options with the same value", "tool.xml", 5, "/tool/inputs/param[1]"),
("Found param input with no name specified.", "tool.xml", 13, "/tool/inputs/param[2]"),
("Param input [No_type] input with no type specified.", "macros.xml", 3, "/tool/inputs/param[3]")
("Select parameter [select] has multiple options with the same value", 5, "/tool/inputs/param[1]"),
("Found param input with no name specified.", 13, "/tool/inputs/param[2]"),
("Param input [No_type] input with no type specified.", 3, "/tool/inputs/param[3]")
)
for a in asserts:
message, fname, line, xpath = a
message, line, xpath = a
found = False
for lint_message in lint_ctx_xpath.message_list:
if lint_message.message != message:
continue
found = True
assert lint_message.xpath == xpath, f"Assumed xpath {xpath} xpath {lint_message.xpath} for: {message}"
assert lint_message.xpath == xpath, f"Assumed xpath {xpath}; found xpath {lint_message.xpath} for: {message}"
assert found, f"Did not find {message}"
for lint_message in lint_ctx.message_list:
if lint_message.message != message:
continue
found = True
assert lint_message.line == line, f"Assumed line {line}; found line {lint_message.line} for: {message}"
assert found, f"Did not find {message}"


Expand Down
5 changes: 3 additions & 2 deletions test/unit/tool_util/test_tool_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -642,11 +642,12 @@ def test_loader_specify_nested_macro_by_token():
# test with re because loading from external macros
# adds a xml:base property (containing the source path)
# to the node which is printed
print(f"{xml_to_string(xml, pretty=True)}")
assert re.match(r"""<\?xml version="1\.0" \?>
<tool>
<macros/>
<A xml:base=".*/external.xml"/>
<B xml:base=".*/external.xml"/>
<A/>
<B/>
</tool>""", xml_to_string(xml, pretty=True), re.MULTILINE)


Expand Down

0 comments on commit c14eca9

Please sign in to comment.