Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Commit

Permalink
Remove gluonnlp.layers.HybridSequential (#1427)
Browse files Browse the repository at this point in the history
* Remove gluonnlp.layers.HybridSequential

* Bump mxnet dependency
  • Loading branch information
leezu authored Nov 5, 2020
1 parent 2032159 commit f3d209f
Show file tree
Hide file tree
Showing 15 changed files with 40 additions and 94 deletions.
6 changes: 3 additions & 3 deletions docs/examples/question_answering/squad_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import Optional, List, Tuple
from gluonnlp.data.tokenizers import BaseTokenizerWithVocab
from gluonnlp.utils.preprocessing import match_tokens_with_char_spans
from gluonnlp.layers import get_activation, HybridSequential
from gluonnlp.layers import get_activation
from gluonnlp.op import select_vectors_by_position
from gluonnlp.attention_cell import masked_logsoftmax, masked_softmax
import string
Expand Down Expand Up @@ -183,7 +183,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.start_scores = nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer)
self.end_scores = HybridSequential()
self.end_scores = nn.HybridSequential()
self.end_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand All @@ -192,7 +192,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.end_scores.add(nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
self.answerable_scores = HybridSequential()
self.answerable_scores = nn.HybridSequential()
self.answerable_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand Down
6 changes: 3 additions & 3 deletions scripts/question_answering/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from mxnet import np, npx
from mxnet.gluon import nn, HybridBlock
from mxnet.util import use_np
from gluonnlp.layers import get_activation, HybridSequential
from gluonnlp.layers import get_activation
from gluonnlp.op import select_vectors_by_position
from gluonnlp.attention_cell import masked_logsoftmax, masked_softmax

Expand Down Expand Up @@ -162,7 +162,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.start_scores = nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer)
self.end_scores = HybridSequential()
self.end_scores = nn.HybridSequential()
self.end_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand All @@ -171,7 +171,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.end_scores.add(nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
self.answerable_scores = HybridSequential()
self.answerable_scores = nn.HybridSequential()
self.answerable_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand Down
57 changes: 3 additions & 54 deletions src/gluonnlp/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,57 +74,6 @@ def get_norm_layer(normalization: str = 'layer_norm',
raise NotImplementedError('The type of normalization must be str')


class HybridSequential(HybridBlock):
"""Stacks HybridBlocks sequentially.
Example::
net = nn.HybridSequential()
net.add(nn.Dense(10, activation='relu'))
net.add(nn.Dense(20))
net.hybridize()
"""
def __init__(self):
super(HybridSequential, self).__init__()
self._layers = []

def add(self, *blocks):
"""Adds block on top of the stack."""
for block in blocks:
self._layers.append(block)
self.register_child(block)

def forward(self, x, *args):
for block in self._children.values():
x = block()(x, *args)
args = []
if isinstance(x, (tuple, list)):
args = x[1:]
x = x[0]
if args:
x = tuple([x] + list(args))
return x

def __repr__(self):
s = '{name}(\n{modstr}\n)'
modstr = '\n'.join([' ({key}): {block}'.format(key=key,
block=_indent(block().__repr__(), 2))
for key, block in self._children.items()])
return s.format(name=self.__class__.__name__, modstr=modstr)

def __getitem__(self, key):
layers = list(self._children.values())[key]
if isinstance(layers, list):
net = type(self)()
net.add(*(l() for l in layers))
return net
else:
return layers()

def __len__(self):
return len(self._children)


@use_np
class NoNorm(HybridBlock):
r"""
Expand Down Expand Up @@ -726,7 +675,7 @@ def __init__(self, vocab_size: int,
else:
self.proj_layers = None
else:
self.proj_layers = HybridSequential()
self.proj_layers = nn.HybridSequential()
for i, (l_idx, r_idx) in enumerate(zip([0] + cutoffs, cutoffs + [vocab_size])):
inner_embed_size = int(embed_size / div_val**i)
if inner_embed_size == 0:
Expand Down Expand Up @@ -887,8 +836,8 @@ def __init__(self, vocab_size: int, embed_size: int, in_units: int,
use_bias=use_bias,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer)
self.inter_proj_l = HybridSequential()
self.out_proj_l = HybridSequential()
self.inter_proj_l = nn.HybridSequential()
self.out_proj_l = nn.HybridSequential()
if div_val == 1.0:
if in_units != embed_size:
self.inter_proj_l.add(nn.Dense(in_units=in_units,
Expand Down
7 changes: 3 additions & 4 deletions src/gluonnlp/models/albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
from ..layers import get_activation, PositionalEmbedding
from ..op import select_vectors_by_position
from ..data.tokenizers import SentencepieceTokenizer
from ..layers import HybridSequential

albert_cfg_reg = Registry('albert_cfg')

Expand Down Expand Up @@ -192,7 +191,7 @@ def __init__(self, units=512, hidden_size=2048,
self._layout = layout


self.all_encoder_groups = HybridSequential()
self.all_encoder_groups = nn.HybridSequential()
for group_idx in range(num_groups):
self.all_encoder_groups.add(
TransformerEncoderLayer(units=units,
Expand Down Expand Up @@ -561,7 +560,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.embed_size,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -656,7 +655,7 @@ def __init__(self, backbone_cfg,
self.sop_classifier = nn.Dense(units=2,
in_units=self.backbone_model.units,
weight_initializer=weight_initializer)
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.embed_size,
in_units=self.backbone_model.units,
Expand Down
9 changes: 4 additions & 5 deletions src/gluonnlp/models/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
from ..utils.registry import Registry
from ..initializer import TruncNorm
from ..attention_cell import MultiHeadAttentionCell, gen_self_attn_mask
from ..layers import get_activation, PositionalEmbedding, PositionwiseFFN, InitializerType, \
HybridSequential
from ..layers import get_activation, PositionalEmbedding, PositionwiseFFN, InitializerType
from ..op import select_vectors_by_position
from ..data.tokenizers import HuggingFaceWordPieceTokenizer

Expand Down Expand Up @@ -238,7 +237,7 @@ def __init__(self, units: int = 512,
self._output_all_encodings = output_all_encodings
self._layout = layout

self.all_layers = HybridSequential()
self.all_layers = nn.HybridSequential()
for layer_idx in range(num_layers):
self.all_layers.add(
TransformerEncoderLayer(units=units,
Expand Down Expand Up @@ -590,7 +589,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -686,7 +685,7 @@ def __init__(self, backbone_cfg,
self.nsp_classifier = nn.Dense(units=2,
in_units=self.backbone_model.units,
weight_initializer=weight_initializer)
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down
8 changes: 4 additions & 4 deletions src/gluonnlp/models/electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from ..registry import BACKBONE_REGISTRY
from ..op import gumbel_softmax, select_vectors_by_position, add_vectors_by_position, update_vectors_by_position
from ..base import get_model_zoo_home_dir, get_repo_model_zoo_url, get_model_zoo_checksum_dir
from ..layers import PositionalEmbedding, get_activation, HybridSequential
from ..layers import PositionalEmbedding, get_activation
from .transformer import TransformerEncoderLayer
from ..initializer import TruncNorm
from ..utils.config import CfgNode as CN
Expand Down Expand Up @@ -227,7 +227,7 @@ def __init__(self, units=512,
self._output_attention = output_attention
self._output_all_encodings = output_all_encodings

self.all_encoder_layers = HybridSequential()
self.all_encoder_layers = nn.HybridSequential()
for layer_idx in range(num_layers):
self.all_encoder_layers.add(
TransformerEncoderLayer(units=units,
Expand Down Expand Up @@ -632,7 +632,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.rtd_encoder = HybridSequential()
self.rtd_encoder = nn.HybridSequential()
# Extra non-linear layer
self.rtd_encoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -714,7 +714,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.embed_size,
in_units=self.backbone_model.units,
Expand Down
4 changes: 2 additions & 2 deletions src/gluonnlp/models/gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from ..utils.registry import Registry
from ..initializer import TruncNorm
from ..attention_cell import MultiHeadAttentionCell
from ..layers import get_activation, PositionalEmbedding, HybridSequential
from ..layers import get_activation, PositionalEmbedding
from ..data.tokenizers import HuggingFaceByteBPETokenizer


Expand Down Expand Up @@ -443,7 +443,7 @@ def __init__(self,
dtype=self._dtype,
method=pos_embed_type
)
self._layers = HybridSequential()
self._layers = nn.HybridSequential()
for layer_idx in range(self._num_layers):
self._layers.add(
GPT2Layer(
Expand Down
10 changes: 5 additions & 5 deletions src/gluonnlp/models/mobilebert.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from ..op import select_vectors_by_position
from ..base import get_model_zoo_home_dir, get_repo_model_zoo_url, get_model_zoo_checksum_dir
from ..layers import InitializerType, PositionwiseFFN, PositionalEmbedding, get_norm_layer, \
get_activation, HybridSequential
get_activation
from ..initializer import TruncNorm
from ..utils.config import CfgNode as CN
from ..utils.misc import load_checksum_stats, download
Expand Down Expand Up @@ -262,7 +262,7 @@ def __init__(self,
in_channels=real_units,
epsilon=layer_norm_eps)

self.stacked_ffn = HybridSequential()
self.stacked_ffn = nn.HybridSequential()
for ffn_idx in range(num_stacked_ffn):
is_last_ffn = (ffn_idx == (num_stacked_ffn - 1))
# only apply dropout on last ffn layer if use bottleneck
Expand Down Expand Up @@ -394,7 +394,7 @@ def __init__(self,
'by the number of heads. Received real_units={}, num_heads={}' \
.format(real_units, num_heads)

self.all_layers = HybridSequential()
self.all_layers = nn.HybridSequential()
for layer_idx in range(num_layers):
self.all_layers.add(
MobileBertEncoderLayer(use_bottleneck=use_bottleneck,
Expand Down Expand Up @@ -811,7 +811,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -921,7 +921,7 @@ def __init__(self, backbone_cfg,
in_units=self.backbone_model.units,
weight_initializer=weight_initializer,
dtype=self.backbone_model.dtype)
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down
6 changes: 3 additions & 3 deletions src/gluonnlp/models/roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from ..op import select_vectors_by_position
from ..base import get_model_zoo_home_dir, get_repo_model_zoo_url, \
get_model_zoo_checksum_dir
from ..layers import PositionalEmbedding, get_activation, HybridSequential
from ..layers import PositionalEmbedding, get_activation
from ..registry import BACKBONE_REGISTRY
from ..utils.misc import download, load_checksum_stats
from ..utils.registry import Registry
Expand Down Expand Up @@ -147,7 +147,7 @@ def __init__(self,
self._layout = layout
self._output_all_encodings = output_all_encodings
self._output_attention = output_attention
self.all_layers = HybridSequential()
self.all_layers = nn.HybridSequential()
for layer_idx in range(self.num_layers):
self.all_layers.add(
TransformerEncoderLayer(
Expand Down Expand Up @@ -461,7 +461,7 @@ def __init__(self, backbone_cfg,
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.units = self.backbone_model.units
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.units,
in_units=self.units,
Expand Down
6 changes: 3 additions & 3 deletions src/gluonnlp/models/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Optional, Tuple, List
from ..utils.registry import Registry
from ..attention_cell import MultiHeadAttentionCell, gen_self_attn_mask, gen_mem_attn_mask
from ..layers import PositionalEmbedding, PositionwiseFFN, InitializerType, HybridSequential
from ..layers import PositionalEmbedding, PositionwiseFFN, InitializerType
from ..utils.config import CfgNode as CN
from ..sequence_sampler import BaseStepDecoder
__all__ = ['TransformerEncoderLayer', 'TransformerDecoderLayer',
Expand Down Expand Up @@ -313,7 +313,7 @@ def __init__(self, num_layers=6, recurrent=False,
self.ln_data = nn.LayerNorm(epsilon=layer_norm_eps,
in_channels=units)
# Construct the intermediate layers
self.layers = HybridSequential()
self.layers = nn.HybridSequential()
real_num_layers = 1 if recurrent else num_layers
for i in range(real_num_layers):
self.layers.add(TransformerEncoderLayer(
Expand Down Expand Up @@ -740,7 +740,7 @@ def __init__(self, num_layers=6, recurrent=False,
self.ln_final = nn.LayerNorm(epsilon=layer_norm_eps,
in_channels=units)
# Construct the intermediate layers
self.layers = HybridSequential()
self.layers = nn.HybridSequential()
real_num_layers = 1 if recurrent else num_layers
for i in range(real_num_layers):
self.layers.add(TransformerDecoderLayer(units=units,
Expand Down
4 changes: 2 additions & 2 deletions src/gluonnlp/models/transformer_xl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from ..attention_cell import multi_head_dot_attn, gen_self_attn_mask, gen_mem_attn_mask,\
RelAttentionScoreCell, MultiHeadAttentionCell
from ..layers import get_activation, PositionalEmbedding, PositionwiseFFN,\
AdaptiveEmbedding, ProjectedAdaptiveLogSoftmaxWithLoss, HybridSequential
AdaptiveEmbedding, ProjectedAdaptiveLogSoftmaxWithLoss
from ..utils.config import CfgNode as CN
from ..sequence_sampler import BaseStepDecoder
__all__ = ['TransformerXLDecoderLayer', 'TransformerXLDecoder', 'TransformerXLForLM',
Expand Down Expand Up @@ -180,7 +180,7 @@ def __init__(self, num_layers=3,
shape=(num_heads, units // num_heads),
init=bias_initializer,
allow_deferred_init=True)
self.decoder_layers = HybridSequential()
self.decoder_layers = nn.HybridSequential()
for i in range(num_layers):
self.decoder_layers.add(
TransformerXLDecoderLayer(units=units,
Expand Down
3 changes: 1 addition & 2 deletions tests/test_utils_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from numpy.testing import assert_allclose
from gluonnlp.utils.misc import AverageSGDTracker, download, sha1sum, logging_config,\
get_mxnet_visible_ctx
from gluonnlp.layers import HybridSequential
mx.npx.set_np()


Expand All @@ -23,7 +22,7 @@ def test_average_sgd_tracker():
moving_avg_param = None
net_final_moving_avg_param = None
for use_moving_avg in [False, True]:
net = HybridSequential()
net = nn.HybridSequential()
net.add(nn.Dense(10), nn.Dense(3))
net.initialize(init=mx.init.One())
net.hybridize()
Expand Down
Loading

0 comments on commit f3d209f

Please sign in to comment.