Skip to content

Commit

Permalink
feat: add support for parsing xAIF argument graphs (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
leonkamke authored Apr 17, 2023
1 parent 840aee2 commit 813b9f9
Show file tree
Hide file tree
Showing 5 changed files with 272 additions and 0 deletions.
2 changes: 2 additions & 0 deletions arguebuf/load/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@
from ._load_path import load_folder as folder
from ._load_protobuf import load_protobuf as protobuf
from ._load_sadface import load_sadface as sadface
from ._load_xaif import load_xaif as xaif

__all__ = (
"aif",
"xaif",
"aml",
"argdown",
"brat",
Expand Down
108 changes: 108 additions & 0 deletions arguebuf/load/_load_xaif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
from __future__ import annotations

import typing as t

import pendulum

from arguebuf.model import Graph, utils
from arguebuf.model.edge import Edge, warn_missing_nodes
from arguebuf.model.node import AbstractNode, AtomNode, SchemeNode
from arguebuf.model.scheme import aif2scheme, text2scheme
from arguebuf.schemas import xaif
from arguebuf.schemas.aif import SchemeType

from ._config import Config, DefaultConfig

__all__ = ("load_xaif",)


def load_xaif(
obj: xaif.Graph,
name: t.Optional[str] = None,
config: Config = DefaultConfig
) -> Graph:
"""
Generate Graph structure from xAif argument graph file
"""
g = config.GraphClass(name)
obj = obj["AIF"]

for aif_node in obj["nodes"]:
node = (
atom_from_xaif(aif_node, config)
if aif_node["type"] == "I"
else scheme_from_xaif(aif_node, config)
)

if node:
g.add_node(node)

for aif_edge in obj["edges"]:
if edge := edge_from_xaif(aif_edge, g.nodes, config):
g.add_edge(edge)

return g


def scheme_from_xaif(
obj: xaif.AifNode,
config: Config
) -> t.Optional[SchemeNode]:
"""Generate SchemeNode object from xAif Node object."""

aif_type = obj["type"]
aif_scheme: str = obj.get("scheme", obj["text"])

if aif_type in aif2scheme:
scheme = aif2scheme[t.cast(SchemeType, aif_type)]

# TODO: Handle formatting like capitalization, spaces, underscores, etc.
# TODO: Araucaria does not use spaces between scheme names
# aif_scheme = re.sub("([A-Z])", r" \1", aif_scheme)
if scheme and (found_scheme := text2scheme[type(scheme)].get(aif_scheme)):
scheme = found_scheme

timestamp = pendulum.now()

return config.SchemeNodeClass(
id=obj["nodeID"],
metadata=config.MetadataClass(timestamp, timestamp),
scheme=scheme,
)

return None


def atom_from_xaif(
obj: xaif.AifNode,
config: Config
) -> AtomNode:
"""Generate AtomNode object from xAif Node object."""
timestamp = pendulum.now()

return config.AtomNodeClass(
id=obj["nodeID"],
metadata=config.MetadataClass(timestamp, timestamp),
text=utils.parse(obj["text"], config.nlp),
)


def edge_from_xaif(
obj: xaif.AifEdge,
nodes: t.Mapping[str, AbstractNode],
config: Config
) -> t.Optional[Edge]:
"""Generate Edge object from xAif Edge format."""
source_id = obj.get("fromID")
target_id = obj.get("toID")

if source_id in nodes and target_id in nodes:
return config.EdgeClass(
id=str(obj["edgeID"]),
source=nodes[source_id],
target=nodes[target_id],
)
else:
warn_missing_nodes(str(obj["edgeID"]), source_id, target_id)

return None
70 changes: 70 additions & 0 deletions arguebuf/schemas/xaif.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import typing as t

NodeType = t.Literal["RA", "CA", "MA", "PA", "", "I", "TA", "YA", "L"]


class AifNode(t.TypedDict):
nodeID: str
text: str
type: NodeType


class AifEdge(t.TypedDict):
edgeID: int
fromID: str
toID: str


class AifSchemeFulfillment(t.TypedDict):
nodeID: str
schemeID: str


class AifLocution(t.TypedDict):
nodeID: str
schemeID: str


class AifParticipant(t.TypedDict):
participantID: int
firstname: str
surname: str


class Aif(t.TypedDict):
nodes: t.List[AifNode]
edges: t.List[AifEdge]
schemefulfillments: t.List[AifSchemeFulfillment]
locutions: t.List[AifLocution]
participants: t.List[AifParticipant]


class OvaNode(t.TypedDict):
nodeID: str
visible: bool
x: int
y: int
timestamp: str


class OvaEdge(t.TypedDict):
fromID: str
toID: str
visible: bool


class Ova(t.TypedDict):
firstname: str
surname: str
nodes: t.List[OvaNode]


class Text(t.TypedDict):
txt: str
url: str


class Graph(t.TypedDict):
AIF: Aif
OVA: Ova
text: Text
37 changes: 37 additions & 0 deletions tests/test_load_edge.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,22 @@
from arguebuf.load._load_argdown import edge_from_argdown
from arguebuf.load._load_ova import edge_from_ova
from arguebuf.load._load_sadface import edge_from_sadface
from arguebuf.load._load_xaif import edge_from_xaif

xaif_data = [
(
"""
{
"edgeID": 204,
"fromID": "323_164813044708340528",
"toID": "324_164813044708340528"
}
""",
"204",
"323_164813044708340528",
"324_164813044708340528",
)
]

aif_data = [
(
Expand Down Expand Up @@ -133,6 +149,27 @@
]


@pytest.mark.parametrize("data,id,start,end", xaif_data)
def test_xaif_edge(data, id, start, end):
data_json = json.loads(data)
edge = edge_from_xaif(
data_json,
{
start: ag.AtomNode(id=start, text=""),
end: ag.AtomNode(id=end, text=""),
},
)

assert edge
assert edge.id == id
assert isinstance(edge.source, ag.AtomNode)
assert isinstance(edge.target, ag.AtomNode)
assert edge.source.id == start
assert edge.target.id == end
assert edge.metadata is not None
assert edge.userdata == {}


@pytest.mark.parametrize("data,id,start,end", argdown_json_data)
def test_argdown_json_edge(data, id, start, end):
data_json = json.loads(data)
Expand Down
55 changes: 55 additions & 0 deletions tests/test_load_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import arguebuf as ag
from arguebuf.load._config import DefaultConfig
from arguebuf.load._load_aif import atom_from_aif
from arguebuf.load._load_xaif import atom_from_xaif, scheme_from_xaif
from arguebuf.load._load_aml import atom_from_aml, scheme_from_aml
from arguebuf.load._load_ova import atom_from_ova
from arguebuf.load._load_sadface import atom_from_sadface, scheme_from_sadface
Expand All @@ -32,6 +33,21 @@
)
]

xaif_data_AtomNode = [
(
"""
{
"nodeID": "1196586_164813044708340528",
"text": "there is an even chance (49%) that Shane Jeffries is involved in corporate espionage",
"type": "I"
}
""",
"1196586_164813044708340528",
"there is an even chance (49%) that Shane Jeffries is involved in corporate espionage",
ag.AtomNode,
)
]

ova_data_AtomNode = [
(
"""
Expand Down Expand Up @@ -169,6 +185,18 @@ def test_aif_node_AN(data, id, text, type, date):
assert node.userdata == {}


@pytest.mark.parametrize("data,id,text,type", xaif_data_AtomNode)
def test_xaif_node_AN(data, id, text, type):
data_json = json.loads(data)
node = atom_from_xaif(data_json, DefaultConfig)

assert node.id == id
assert node.text == text
assert isinstance(node, type)
assert node.reference is None
assert node.userdata == {}


@pytest.mark.parametrize(
"data,id,text,type,date",
ova_data_AtomNode,
Expand Down Expand Up @@ -214,6 +242,33 @@ def test_sadface_node_SN(data, id, type, name):
assert isinstance(node.metadata, ag.Metadata)


xaif_data_SchemeNode = [
(
"""
{
"nodeID": "1201647_164813044708340528",
"text": "Default Inference",
"type": "RA"
}
""",
"1201647_164813044708340528",
"Default Inference",
ag.SchemeNode,
)
]


@pytest.mark.parametrize("data,id,text,type", xaif_data_SchemeNode)
def test_xaif_node_SN(data, id, text, type):
data_json = json.loads(data)
node = scheme_from_xaif(data_json, DefaultConfig)

assert node.id == id
assert node.scheme == Support.DEFAULT
assert isinstance(node, type)
assert isinstance(node.metadata, ag.Metadata)


aml_data_SchemeNode = [
(
"""
Expand Down

0 comments on commit 813b9f9

Please sign in to comment.