Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the AMR graph constrction and RGCN in example #578

Open
wants to merge 25 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,6 @@ cscope.*
# config file
/config
local_scripts/

**/amr_graph_construction/mawps/*

48 changes: 26 additions & 22 deletions examples/pytorch/rgcn/rgcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import torch.nn.functional as F
from dgl.nn.pytorch import RelGraphConv

from .base import GNNBase, GNNLayerBase
from graph4nlp.pytorch.modules.graph_embedding_learning.base import GNNBase, GNNLayerBase


class RGCN(GNNBase):
Expand All @@ -18,19 +18,18 @@ class RGCN(GNNBase):
Number of RGCN layers.
input_size : int, or pair of ints
Input feature size.
hidden_size: int list of int
hidden_size: int
Hidden layer size.
If a scalar is given, the sizes of all the hidden layers are the same.
If a list of scalar is given, each element in the list is the size of each hidden layer.
Example: [100,50]
output_size : int
Output feature size.
num_rels : int
Number of relations.
num_bases : int, optional
Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
Number of bases. Needed when ``regularizer`` is specified. Default: ``-1`` [all].
use_self_loop : bool, optional
True to include self loop message. Default: ``True``.
True to include self loop message. Default: ``False``.
gpu : int, optional
True to use gpu. Default: ``-1`` [cpu].
dropout : float, optional
Dropout rate. Default: ``0.0``
"""
Expand All @@ -42,12 +41,14 @@ def __init__(
hidden_size,
output_size,
num_rels,
num_bases=None,
num_bases=-1,
use_self_loop=True,
dropout=0.0,
):
super(RGCN, self).__init__()
self.num_layers = num_layers
if num_bases == -1:
num_bases = num_rels
self.num_rels = num_rels
self.num_bases = num_bases
self.use_self_loop = use_self_loop
Expand Down Expand Up @@ -75,8 +76,7 @@ def __init__(
)
)
# hidden layers
for l in range(1, self.num_layers - 1):
# due to multi-head, the input_size = hidden_size * num_heads
for l in range(1, self.num_layers-1):
self.RGCN_layers.append(
RGCNLayer(
hidden_size[l - 1],
Expand All @@ -93,7 +93,7 @@ def __init__(
# output projection
self.RGCN_layers.append(
RGCNLayer(
hidden_size[-1] if self.num_layers > 1 else input_size,
hidden_size,
output_size,
num_rels=self.num_rels,
regularizer="basis",
Expand All @@ -105,6 +105,9 @@ def __init__(
)
)

if self.gpu != -1:
self.to(device=self.gpu)

def forward(self, graph):
r"""Compute RGCN layer.

Expand All @@ -122,18 +125,19 @@ def forward(self, graph):
named as "node_emb".
"""

h = graph.node_features["node_feat"]
# get the node feature tensor from graph
g = graph.to_dgl() # transfer the current NLPgraph to DGL graph
edge_type = g.edata[dgl.ETYPE].long()
# output projection
if self.num_layers > 1:
for l in range(0, self.num_layers - 1):
h = self.RGCN_layers[l](g, h, edge_type)

# transfer the current NLPgraph to DGL graph
g = graph.to_dgl()
h = graph.node_features['node_feat']
edge_type = graph.edge_features['token_id'].squeeze(1)
for l in range(self.num_layers):
h = self.RGCN_layers[l](g, h, edge_type)
h = self.dropout(F.relu(h))
logits = self.RGCN_layers[-1](g, h, edge_type)

# put the results into the NLPGraph
# graph.node_features['node_feat'] = h
graph.node_features["node_emb"] = logits

graph.node_features["node_emb"] = logits # put the results into the NLPGraph
return graph


Expand Down Expand Up @@ -176,7 +180,7 @@ def __init__(
output_size,
num_rels,
regularizer=None,
num_bases=None,
num_bases=-1,
bias=True,
activation=None,
self_loop=False,
Expand Down
48 changes: 32 additions & 16 deletions graph4nlp/pytorch/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""
import os
import warnings
from collections import namedtuple
from collections import namedtuple, Counter
from typing import Any, Callable, Dict, List, Tuple, Union
import dgl
import scipy.sparse
Expand Down Expand Up @@ -99,6 +99,7 @@ def __init__(self, src=None, device: str = None, is_hetero: bool = False):
self.batch_size = None # Batch size
self._batch_num_nodes = None # Subgraph node number list with the length of batch size
self._batch_num_edges = None # Subgraph edge number list with the length of batch size
self.batch_graph_attributes = [] # Subgraph attribute list with the length of batch size

if src is not None:
if isinstance(src, GraphData):
Expand Down Expand Up @@ -176,7 +177,7 @@ def add_nodes(self, node_num: int, ntypes: List[str] = None):
)

if not self.is_hetero:
if ntypes is None:
if ntypes is not None:
raise ValueError(
"The graph is homogeneous, ntypes should be None. Got {}".format(ntypes)
)
Expand Down Expand Up @@ -878,15 +879,19 @@ def from_dgl(self, dgl_g: dgl.DGLGraph, is_hetero=False):
# Add nodes
self.add_nodes(dgl_g.number_of_nodes())
for k, v in dgl_g.ndata.items():
self.node_features[k] = v
self.node_features['node_'+k] = v

# node_features['node_embed'] -> tensor.size((num_of_node, emb_dim))

# Add edges
src_tensor, tgt_tensor = dgl_g.edges()
src_list = list(src_tensor.detach().cpu().numpy())
tgt_list = list(tgt_tensor.detach().cpu().numpy())
self.add_edges(src_list, tgt_list)
for k, v in dgl_g.edata.items():
self.edge_features[k] = v
self.edge_features['edge_'+k] = v
# edge_features['edge_emb'] -> tensor.size((number_of_edge, emb_dim))
# edge_features['type'] -> tensor.size((number_of_edge,))
else:
self.is_hetero = True
# For heterogeneous DGL graphs, we perform the same routines for nodes and edges.
Expand All @@ -904,33 +909,38 @@ def from_dgl(self, dgl_g: dgl.DGLGraph, is_hetero=False):
# for feature_name, feature_value in node_data.items():
# self.node_features[feature_name] = feature_value
node_data = dgl_g.ndata
ntypes = []
# ntypes = []
ntypes = [None for _ in range(dgl_g.number_of_nodes())]
processed_node_types = False
node_feat_dict = {}
for feature_name, data_dict in node_data.items():
if not processed_node_types:
for node_type, node_feature in data_dict.items():
ntypes += [node_type] * len(node_feature)
for nidx in node_feature:
ntypes[nidx] = node_type
# ntypes += [node_type] * len(node_feature)
processed_node_types = True
# for node_type, node_feature in data_dict.items():
node_feat_dict[feature_name] = torch.cat(list(data_dict.values()), dim=0)
self.add_nodes(len(ntypes), ntypes=ntypes)
for feature_name, feature_value in node_feat_dict.items():
self.node_features[feature_name] = feature_value
self.node_features['node_'+feature_name] = feature_value
# do the same thing for edges
dgl_g_etypes = dgl_g.canonical_etypes
# Add edges first
edge_feature_dict = {}
for etype in dgl_g_etypes:
num_edges = dgl_g.num_edges(etype)
src_type, r_type, dst_type = etype
srcs, dsts = dgl_g.find_edges(
torch.tensor(list(range(num_edges)), dtype=torch.long), etype
)
# srcs, dsts = dgl_g.find_edges(
# torch.tensor(list(range(num_edges)), dtype=torch.long), etype
# )
srcs, dsts = dgl_g.edges(etype=etype)
srcs, dsts = (
srcs.detach().cpu().numpy().tolist(),
dsts.detach().cpu().numpy().tolist(),
)

self.add_edges(srcs, dsts, etypes=[etype] * num_edges)
if len(dgl_g_etypes) > 1:
for feature_name, feature_dict in dgl_g.edata.items():
Expand All @@ -945,7 +955,7 @@ def from_dgl(self, dgl_g: dgl.DGLGraph, is_hetero=False):
edge_feature_dict[feature_name] = feature_value
# Add edge features then
for feat_name, feat_value in edge_feature_dict.items():
self.edge_features[feat_name] = feat_value
self.edge_features['edge_'+feat_name] = feat_value
# edge_data = dgl_g.edata
# etypes = []
# processed_edge_types = False
Expand Down Expand Up @@ -1330,22 +1340,23 @@ def split_features(self, input_tensor: torch.Tensor, type: str = "node") -> torc
return output


def from_dgl(g: dgl.DGLGraph) -> GraphData:
def from_dgl(g: dgl.DGLGraph, is_hetero=False) -> GraphData:
"""
Convert a dgl.DGLGraph to a GraphData object.

Parameters
----------
g : dgl.DGLGraph
The source graph in DGLGraph format.

is_hetero: bool, default=False
Whether the graph should be heterogeneous
Returns
-------
GraphData
The converted graph in GraphData format.
"""
graph = GraphData(is_hetero=not g.is_homogeneous)
graph.from_dgl(g, is_hetero=not g.is_homogeneous)
graph = GraphData(is_hetero=is_hetero)
graph.from_dgl(g, is_hetero=is_hetero)
return graph


Expand Down Expand Up @@ -1456,7 +1467,11 @@ def stack_edge_indices(gs):
big_graph._batch_num_nodes = [g.get_node_num() for g in graphs]
big_graph._batch_num_edges = [g.get_edge_num() for g in graphs]

# Step 8: merge node and edge types if the batch is heterograph
# Step 8: Insert graph attributes
for g in graphs:
big_graph.batch_graph_attributes.append(g.graph_attributes)

# Step 9: merge node and edge types if the batch is heterograph
if is_heterograph:
node_types = []
edge_types = []
Expand Down Expand Up @@ -1501,6 +1516,7 @@ def from_batch(batch: GraphData) -> List[GraphData]:
cum_n_edges += num_edges[i]
cum_n_nodes += num_nodes[i]
ret.append(g)
g.graph_attributes = batch.batch_graph_attributes[i]

# Add node and edge features
for k, v in batch._node_features.items():
Expand Down
Loading