graph4ai · cminus01 · Mar 18, 2022 · Mar 23, 2022 · Apr 3, 2022 · Apr 3, 2022
diff --git a/.gitignore b/.gitignore
@@ -149,3 +149,6 @@ cscope.*
 # config file
 /config
 local_scripts/
+
+**/amr_graph_construction/mawps/*
+
diff --git a/examples/pytorch/rgcn/rgcn.py b/examples/pytorch/rgcn/rgcn.py
@@ -3,7 +3,7 @@
 import torch.nn.functional as F
 from dgl.nn.pytorch import RelGraphConv
 
-from .base import GNNBase, GNNLayerBase
+from graph4nlp.pytorch.modules.graph_embedding_learning.base import GNNBase, GNNLayerBase
 
 
 class RGCN(GNNBase):
@@ -18,19 +18,18 @@ class RGCN(GNNBase):
         Number of RGCN layers.
     input_size : int, or pair of ints
         Input feature size.
-    hidden_size: int list of int
+    hidden_size: int
         Hidden layer size.
-        If a scalar is given, the sizes of all the hidden layers are the same.
-        If a list of scalar is given, each element in the list is the size of each hidden layer.
-        Example: [100,50]
     output_size : int
         Output feature size.
     num_rels : int
         Number of relations.
     num_bases : int, optional
-        Number of bases. Needed when ``regularizer`` is specified. Default: ``None``.
+        Number of bases. Needed when ``regularizer`` is specified. Default: ``-1`` [all].
     use_self_loop : bool, optional
-        True to include self loop message. Default: ``True``.
+        True to include self loop message. Default: ``False``.
+    gpu : int, optional
+        True to use gpu. Default: ``-1`` [cpu].
     dropout : float, optional
         Dropout rate. Default: ``0.0``
     """
@@ -42,12 +41,14 @@ def __init__(
         hidden_size,
         output_size,
         num_rels,
-        num_bases=None,
+        num_bases=-1,
         use_self_loop=True,
         dropout=0.0,
     ):
         super(RGCN, self).__init__()
         self.num_layers = num_layers
+        if num_bases == -1:
+            num_bases = num_rels
         self.num_rels = num_rels
         self.num_bases = num_bases
         self.use_self_loop = use_self_loop
@@ -75,8 +76,7 @@ def __init__(
                 )
             )
         # hidden layers
-        for l in range(1, self.num_layers - 1):
-            # due to multi-head, the input_size = hidden_size * num_heads
+        for l in range(1, self.num_layers-1):
             self.RGCN_layers.append(
                 RGCNLayer(
                     hidden_size[l - 1],
@@ -93,7 +93,7 @@ def __init__(
         # output projection
         self.RGCN_layers.append(
             RGCNLayer(
-                hidden_size[-1] if self.num_layers > 1 else input_size,
+                hidden_size,
                 output_size,
                 num_rels=self.num_rels,
                 regularizer="basis",
@@ -105,6 +105,9 @@ def __init__(
             )
         )
 
+        if self.gpu != -1:
+            self.to(device=self.gpu)
+
     def forward(self, graph):
         r"""Compute RGCN layer.
 
@@ -122,18 +125,19 @@ def forward(self, graph):
             named as "node_emb".
         """
 
-        h = graph.node_features["node_feat"]
-        # get the node feature tensor from graph
-        g = graph.to_dgl()  # transfer the current NLPgraph to DGL graph
-        edge_type = g.edata[dgl.ETYPE].long()
-        # output projection
-        if self.num_layers > 1:
-            for l in range(0, self.num_layers - 1):
-                h = self.RGCN_layers[l](g, h, edge_type)
-
+        # transfer the current NLPgraph to DGL graph
+        g = graph.to_dgl()
+        h = graph.node_features['node_feat']
+        edge_type = graph.edge_features['token_id'].squeeze(1)
+        for l in range(self.num_layers):
+            h = self.RGCN_layers[l](g, h, edge_type)
+            h = self.dropout(F.relu(h))
         logits = self.RGCN_layers[-1](g, h, edge_type)
+
+        # put the results into the NLPGraph
+        # graph.node_features['node_feat'] = h
+        graph.node_features["node_emb"] = logits  
 
-        graph.node_features["node_emb"] = logits  # put the results into the NLPGraph
         return graph
 
 
@@ -176,7 +180,7 @@ def __init__(
         output_size,
         num_rels,
         regularizer=None,
-        num_bases=None,
+        num_bases=-1,
         bias=True,
         activation=None,
         self_loop=False,

diff --git a/graph4nlp/pytorch/data/data.py b/graph4nlp/pytorch/data/data.py
@@ -8,7 +8,7 @@
 """
 import os
 import warnings
-from collections import namedtuple
+from collections import namedtuple, Counter
 from typing import Any, Callable, Dict, List, Tuple, Union
 import dgl
 import scipy.sparse
@@ -99,6 +99,7 @@ def __init__(self, src=None, device: str = None, is_hetero: bool = False):
         self.batch_size = None  # Batch size
         self._batch_num_nodes = None  # Subgraph node number list with the length of batch size
         self._batch_num_edges = None  # Subgraph edge number list with the length of batch size
+        self.batch_graph_attributes = []  # Subgraph attribute list with the length of batch size
 
         if src is not None:
             if isinstance(src, GraphData):
@@ -176,7 +177,7 @@ def add_nodes(self, node_num: int, ntypes: List[str] = None):
             )
 
         if not self.is_hetero:
-            if ntypes is None:
+            if ntypes is not None:
                 raise ValueError(
                     "The graph is homogeneous, ntypes should be None. Got {}".format(ntypes)
                 )
@@ -878,15 +879,19 @@ def from_dgl(self, dgl_g: dgl.DGLGraph, is_hetero=False):
             # Add nodes
             self.add_nodes(dgl_g.number_of_nodes())
             for k, v in dgl_g.ndata.items():
-                self.node_features[k] = v
+                self.node_features['node_'+k] = v
+
+                # node_features['node_embed'] -> tensor.size((num_of_node, emb_dim))
 
             # Add edges
             src_tensor, tgt_tensor = dgl_g.edges()
             src_list = list(src_tensor.detach().cpu().numpy())
             tgt_list = list(tgt_tensor.detach().cpu().numpy())
             self.add_edges(src_list, tgt_list)
             for k, v in dgl_g.edata.items():
-                self.edge_features[k] = v
+                self.edge_features['edge_'+k] = v
+                # edge_features['edge_emb'] -> tensor.size((number_of_edge, emb_dim))
+                # edge_features['type'] -> tensor.size((number_of_edge,))
         else:
             self.is_hetero = True
             # For heterogeneous DGL graphs, we perform the same routines for nodes and edges.
@@ -904,33 +909,38 @@ def from_dgl(self, dgl_g: dgl.DGLGraph, is_hetero=False):
             #     for feature_name, feature_value in node_data.items():
             #         self.node_features[feature_name] = feature_value
             node_data = dgl_g.ndata
-            ntypes = []
+            # ntypes = []
+            ntypes = [None for _ in range(dgl_g.number_of_nodes())]
             processed_node_types = False
             node_feat_dict = {}
             for feature_name, data_dict in node_data.items():
                 if not processed_node_types:
                     for node_type, node_feature in data_dict.items():
-                        ntypes += [node_type] * len(node_feature)
+                        for nidx in node_feature:
+                            ntypes[nidx] = node_type
+                        # ntypes += [node_type] * len(node_feature)
                     processed_node_types = True
                 # for node_type, node_feature in data_dict.items():
                 node_feat_dict[feature_name] = torch.cat(list(data_dict.values()), dim=0)
             self.add_nodes(len(ntypes), ntypes=ntypes)
             for feature_name, feature_value in node_feat_dict.items():
-                self.node_features[feature_name] = feature_value
+                self.node_features['node_'+feature_name] = feature_value
             # do the same thing for edges
             dgl_g_etypes = dgl_g.canonical_etypes
             # Add edges first
             edge_feature_dict = {}
             for etype in dgl_g_etypes:
                 num_edges = dgl_g.num_edges(etype)
                 src_type, r_type, dst_type = etype
-                srcs, dsts = dgl_g.find_edges(
-                    torch.tensor(list(range(num_edges)), dtype=torch.long), etype
-                )
+                # srcs, dsts = dgl_g.find_edges(
+                #     torch.tensor(list(range(num_edges)), dtype=torch.long), etype
+                # )
+                srcs, dsts = dgl_g.edges(etype=etype)
                 srcs, dsts = (
                     srcs.detach().cpu().numpy().tolist(),
                     dsts.detach().cpu().numpy().tolist(),
                 )
+
                 self.add_edges(srcs, dsts, etypes=[etype] * num_edges)
                 if len(dgl_g_etypes) > 1:
                     for feature_name, feature_dict in dgl_g.edata.items():
@@ -945,7 +955,7 @@ def from_dgl(self, dgl_g: dgl.DGLGraph, is_hetero=False):
                         edge_feature_dict[feature_name] = feature_value
             # Add edge features then
             for feat_name, feat_value in edge_feature_dict.items():
-                self.edge_features[feat_name] = feat_value
+                self.edge_features['edge_'+feat_name] = feat_value
             # edge_data = dgl_g.edata
             # etypes = []
             # processed_edge_types = False
@@ -1330,22 +1340,23 @@ def split_features(self, input_tensor: torch.Tensor, type: str = "node") -> torc
         return output
 
 
-def from_dgl(g: dgl.DGLGraph) -> GraphData:
+def from_dgl(g: dgl.DGLGraph, is_hetero=False) -> GraphData:
     """
     Convert a dgl.DGLGraph to a GraphData object.
 
     Parameters
     ----------
     g : dgl.DGLGraph
         The source graph in DGLGraph format.
-
+    is_hetero: bool, default=False
+        Whether the graph should be heterogeneous
     Returns
     -------
     GraphData
         The converted graph in GraphData format.
     """
-    graph = GraphData(is_hetero=not g.is_homogeneous)
-    graph.from_dgl(g, is_hetero=not g.is_homogeneous)
+    graph = GraphData(is_hetero=is_hetero)
+    graph.from_dgl(g, is_hetero=is_hetero)
     return graph
 
 
@@ -1456,7 +1467,11 @@ def stack_edge_indices(gs):
     big_graph._batch_num_nodes = [g.get_node_num() for g in graphs]
     big_graph._batch_num_edges = [g.get_edge_num() for g in graphs]
 
-    # Step 8: merge node and edge types if the batch is heterograph
+    # Step 8: Insert graph attributes
+    for g in graphs:
+        big_graph.batch_graph_attributes.append(g.graph_attributes)
+
+    # Step 9: merge node and edge types if the batch is heterograph
     if is_heterograph:
         node_types = []
         edge_types = []
@@ -1501,6 +1516,7 @@ def from_batch(batch: GraphData) -> List[GraphData]:
         cum_n_edges += num_edges[i]
         cum_n_nodes += num_nodes[i]
         ret.append(g)
+        g.graph_attributes = batch.batch_graph_attributes[i]
 
     # Add node and edge features
     for k, v in batch._node_features.items():