Skip to content

Commit

Permalink
Merge pull request #38 from keith-hall/processing-instructions
Browse files Browse the repository at this point in the history
add support for processing instructions (even before the root element)
  • Loading branch information
rosshadden authored Jan 26, 2018
2 parents fb10258 + 5977af1 commit f4916d1
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 10 deletions.
40 changes: 32 additions & 8 deletions lxml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,13 @@ class LocationAwareComment(etree.CommentBase):
tag_pos = None


class LocationAwareProcessingInstruction(etree.PIBase):
tag_pos = None


# http://stackoverflow.com/questions/36246014/lxml-use-default-class-element-lookup-and-treebuilder-parser-target-at-the-sam
class LocationAwareXMLParser:
RE_SPLIT_XML = re.compile('<!\[CDATA\[|\]\]>|[<>]')
RE_SPLIT_XML = re.compile(r'<!\[CDATA\[|\]\]>|[<>]')

def __init__(self, position_offset = 0, **parser_options):
def getLocation(index=None):
Expand Down Expand Up @@ -126,6 +130,8 @@ def _reset(self):
self._most_recent = None
self._in_tail = None
self._all_namespaces = collections.OrderedDict()
self._addprevious = []
self._root = None

def _flush(self):
if self._text:
Expand Down Expand Up @@ -163,25 +169,43 @@ def element_end(self, tag, location=None):
def text_data(self, data, location=None):
self._text.append(data)

def pi(self, target, data, location=None):
self._flush()
self._appendNode(self.create_pi(target, data))
self._most_recent.tag_pos = location
self._in_tail = True

def comment(self, text, location=None):
if self._most_recent is not None:
self._flush()
self._appendNode(self.create_comment(text))
self._most_recent.tag_pos = location
self._in_tail = True
self._flush()
self._appendNode(self.create_comment(text))
self._most_recent.tag_pos = location
self._in_tail = True

def create_comment(self, text):
return LocationAwareComment(text)

def create_pi(self, target, data):
return LocationAwareProcessingInstruction(target, data)

def _appendNode(self, node):
if self._element_stack: # if we have anything on the stack
self._element_stack[-1].append(node) # append the node as a child to the last/top element on the stack
elif self._root is None and isinstance(node, etree.ElementBase):
self._root = node
for item in self._addprevious:
node.addprevious(item)
elif self._most_recent is not None and self._root is not None:
# after the root element
self._most_recent.addnext(node)
else:
# store this element to add before the root node when we encounter it
self._addprevious.append(node)
self._all_elements.append(node)
self._most_recent = node

def document_end(self):
"""Return the root node and a list of all elements (and comments) found in the document, to keep their proxy alive."""
return (self._most_recent, self._all_namespaces, self._all_elements)
return (self._root, self._all_namespaces, self._all_elements)


def lxml_etree_parse_xml_string_with_location(xml_chunks, position_offset = 0, should_stop = None):
Expand All @@ -206,7 +230,7 @@ def lxml_etree_parse_xml_string_with_location(xml_chunks, position_offset = 0, s
def getNodeTagRange(node, position_type):
"""Given a node and position type (open or close), return the node's position."""
pos = None
if isinstance(node, LocationAwareComment):
if isinstance(node, LocationAwareComment) or isinstance(node, LocationAwareProcessingInstruction):
pos = node.tag_pos
else:
pos = getattr(node, position_type + '_tag_pos')
Expand Down
4 changes: 3 additions & 1 deletion sublime_lxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ def get_nodes_from_document(nodes):
element = node.getparent() # get the parent
if element is None: # some nodes are not actually part of the original document we parsed, for example when using the substring function. so there is no way to find the original node, and therefore the location
continue
elif isinstance(node, etree.PIBase):
element = node
elif isinstance(node, etree.CommentBase):
element = node
elif isinstance(node, etree.ElementBase):
Expand Down Expand Up @@ -154,7 +156,7 @@ def ensureTagNameEndPosIsSet(node, open_pos):
if next_node is not None:
text_end_pos = getNodeTagRegion(view, next_node, 'open').begin()
yield sublime.Region(text_begin_pos, text_end_pos)
elif isinstance(node, etree.CommentBase):
elif isinstance(node, etree.CommentBase) or isinstance(node, etree.PIBase):
yield open_pos
elif attr_name is None or attribute_position_type is None or attribute_position_type in ('element', 'parent'):
# position type 'open' <|name| attr1="test"></name> "Goto name in open tag"
Expand Down
2 changes: 1 addition & 1 deletion xpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -958,7 +958,7 @@ def completions_axis_specifiers():
return [(completion + '\taxis', completion + '::') for completion in completions]

def completions_node_types():
completions = ['text', 'node', 'comment'] # 'processing-instruction' is also a valid XPath node type, but not parsed into an ElementTree, so useless to show it in suggestions/completions
completions = ['text', 'node', 'comment', 'processing-instruction']
return [(completion + '\tnode type', completion + '()') for completion in completions]

def completions_functions():
Expand Down

0 comments on commit f4916d1

Please sign in to comment.