Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor+fix corefud.Delete #124

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 26 additions & 35 deletions udapi/block/corefud/delete.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,19 +25,15 @@ def is_root_reachable_by_deps(self, node, parents_to_ignore=None):
proc_node, path = stack.pop()
# root is reachable
if proc_node == node.root:
break
return True
# path forms a cycle, the root cannot be reached through this branch
if proc_node in path:
continue
for dep in proc_node.deps:
# the root cannot be reached through ignored nodes
if dep['parent'] in parents_to_ignore:
continue
# process the parent recursively
stack.append((dep['parent'], path + [proc_node]))
else:
return False
return True
if proc_node not in path:
for dep in proc_node.deps:
# the root cannot be reached through ignored nodes
if dep['parent'] not in parents_to_ignore:
# process the parent recursively
stack.append((dep['parent'], path + [proc_node]))
return False

def _deps_ignore_nodes(self, node, parents_to_ignore):
""" Retrieve deps from the node, recursively ignoring specified parents.
Expand All @@ -46,18 +42,16 @@ def _deps_ignore_nodes(self, node, parents_to_ignore):
stack = [(node, [])]
while stack:
proc_node, skipped_nodes = stack.pop()
# if there is a cycle of skipped nodes, ground the subtree to the root
if proc_node in skipped_nodes:
newdeps.append({'parent': node.root, 'deprel': 'root'})
continue
for dep in proc_node.deps:
# keep deps with a parent that shouldn't be ignored
if not dep['parent'] in parents_to_ignore:
newdeps.append(dep)
continue
# process the ignored parent recursively
stack.append((dep['parent'], skipped_nodes + [proc_node]))
return newdeps
if proc_node not in skipped_nodes:
for dep in proc_node.deps:
if dep['parent'] in parents_to_ignore:
# process the ignored parent recursively
stack.append((dep['parent'], skipped_nodes + [proc_node]))
else:
# keep deps with a parent that shouldn't be ignored
newdeps.append(dep)
# If no newdeps were found (because of a cycle), return the root.
return newdeps if newdeps else [{'parent': node.root, 'deprel': 'root'}]

def process_document(self, doc):
# This block should work both with coreference loaded (deserialized) and not.
Expand All @@ -67,17 +61,14 @@ def process_document(self, doc):
if self.empty:
for node in root.descendants:
# process only the nodes dependent on empty nodes
if not '.' in node.raw_deps:
continue
# just remove empty parents if the root remains reachable
if self.is_root_reachable_by_deps(node, root.empty_nodes):
node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes]
# otherwise propagate to non-empty ancestors
else:
newdeps = self._deps_ignore_nodes(node, root.empty_nodes)
newdeps_sorted = sorted(set((dep['parent'].ord, dep['deprel']) for dep in newdeps))
node.raw_deps = '|'.join(f"{p}:{r}" for p, r in newdeps_sorted)

if '.' in node.raw_deps:
# just remove empty parents if the root remains reachable
if self.is_root_reachable_by_deps(node, root.empty_nodes):
node.deps = [dep for dep in node.deps if not dep['parent'] in root.empty_nodes]
# otherwise propagate to non-empty ancestors
else:
node.deps = self._deps_ignore_nodes(node, root.empty_nodes)
# This needs to be done even if '.' not in node.raw_deps.
if '.' in node.misc['Functor'].split(':')[0]:
del node.misc['Functor']
root.empty_nodes = []
Expand Down
2 changes: 1 addition & 1 deletion udapi/core/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def raw_deps(self):
#if self._raw_deps is not None:
# return self._raw_deps
if self._deps:
self._raw_deps = '|'.join(f"{dep['parent']._ord}:{dep['deprel']}" for dep in self._deps)
self._raw_deps = '|'.join(f"{p}:{r}" for p, r in sorted(set((d['parent'].ord, d['deprel']) for d in self._deps)))
return self._raw_deps

@raw_deps.setter
Expand Down
2 changes: 1 addition & 1 deletion udapi/core/tests/test_enhdeps.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_create_deps2empty(self):
e.deps.append({'parent': h, 'deprel':'dep:e2h'})
d.deps.append({'parent': e, 'deprel': 'dep:d2e'})
self.assertEqual("2:dep:e2h", e.raw_deps, )
self.assertEqual("5:conj|3.1:dep:d2e", d.raw_deps)
self.assertEqual("3.1:dep:d2e|5:conj", d.raw_deps)
self.assertEqual(self.tree.descendants_and_empty, self.nodes[:3] + [e] + self.nodes[3:])


20 changes: 20 additions & 0 deletions udapi/core/tests/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,5 +245,25 @@ def test_empty_nodes(self):
self.assertEqual(root.descendants_and_empty, [e1, e2, e3, e4, e6, e7])
self.assertEqual([n.ord for n in root.descendants_and_empty], [0.1, 0.2, 0.3, 0.4, 0.5, 0.6])

def test_enh_deps_and_reordering(self):
"""Test reordering of node ord in enhanced deps when reorderin/removing nodes."""
root = Root()
for i in range(3):
root.create_child(form=f'node{i+1}')

n1, n2, n3 = root.descendants()
n1.raw_deps = '2:nsubj|3:obj'
self.assertEqual(n1.raw_deps, '2:nsubj|3:obj')
self.assertEqual(n1.deps, [{'parent': n2, 'deprel': 'nsubj'}, {'parent': n3, 'deprel': 'obj'}])
n2.shift_after_node(n3)
self.assertEqual(n1.raw_deps, '2:obj|3:nsubj')
# TODO only node.raw_deps are currently guaranteed to return the deps sorted, not node.deps
#self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}, {'parent': n2, 'deprel': 'nsubj'}])
# TODO: after removing a node, all deps should be updated
#n2.remove()
#self.assertEqual(n1.raw_deps, '2:nsubj')
#self.assertEqual(n1.deps, [{'parent': n3, 'deprel': 'obj'}])


if __name__ == "__main__":
unittest.main()