Skip to content
This repository has been archived by the owner on Jan 15, 2024. It is now read-only.

Remove gluonnlp.layers.HybridSequential #1427

Merged
merged 2 commits into from
Nov 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions docs/examples/question_answering/squad_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from typing import Optional, List, Tuple
from gluonnlp.data.tokenizers import BaseTokenizerWithVocab
from gluonnlp.utils.preprocessing import match_tokens_with_char_spans
from gluonnlp.layers import get_activation, HybridSequential
from gluonnlp.layers import get_activation
from gluonnlp.op import select_vectors_by_position
from gluonnlp.attention_cell import masked_logsoftmax, masked_softmax
import string
Expand Down Expand Up @@ -183,7 +183,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.start_scores = nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer)
self.end_scores = HybridSequential()
self.end_scores = nn.HybridSequential()
self.end_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand All @@ -192,7 +192,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.end_scores.add(nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
self.answerable_scores = HybridSequential()
self.answerable_scores = nn.HybridSequential()
self.answerable_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand Down
6 changes: 3 additions & 3 deletions scripts/question_answering/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from mxnet import np, npx
from mxnet.gluon import nn, HybridBlock
from mxnet.util import use_np
from gluonnlp.layers import get_activation, HybridSequential
from gluonnlp.layers import get_activation
from gluonnlp.op import select_vectors_by_position
from gluonnlp.attention_cell import masked_logsoftmax, masked_softmax

Expand Down Expand Up @@ -162,7 +162,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.start_scores = nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer)
self.end_scores = HybridSequential()
self.end_scores = nn.HybridSequential()
self.end_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand All @@ -171,7 +171,7 @@ def __init__(self, backbone, units=768, layer_norm_eps=1E-12, dropout_prob=0.1,
self.end_scores.add(nn.Dense(1, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
self.answerable_scores = HybridSequential()
self.answerable_scores = nn.HybridSequential()
self.answerable_scores.add(nn.Dense(units, flatten=False,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer))
Expand Down
57 changes: 3 additions & 54 deletions src/gluonnlp/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,57 +74,6 @@ def get_norm_layer(normalization: str = 'layer_norm',
raise NotImplementedError('The type of normalization must be str')


class HybridSequential(HybridBlock):
"""Stacks HybridBlocks sequentially.

Example::

net = nn.HybridSequential()
net.add(nn.Dense(10, activation='relu'))
net.add(nn.Dense(20))
net.hybridize()
"""
def __init__(self):
super(HybridSequential, self).__init__()
self._layers = []

def add(self, *blocks):
"""Adds block on top of the stack."""
for block in blocks:
self._layers.append(block)
self.register_child(block)

def forward(self, x, *args):
for block in self._children.values():
x = block()(x, *args)
args = []
if isinstance(x, (tuple, list)):
args = x[1:]
x = x[0]
if args:
x = tuple([x] + list(args))
return x

def __repr__(self):
s = '{name}(\n{modstr}\n)'
modstr = '\n'.join([' ({key}): {block}'.format(key=key,
block=_indent(block().__repr__(), 2))
for key, block in self._children.items()])
return s.format(name=self.__class__.__name__, modstr=modstr)

def __getitem__(self, key):
layers = list(self._children.values())[key]
if isinstance(layers, list):
net = type(self)()
net.add(*(l() for l in layers))
return net
else:
return layers()

def __len__(self):
return len(self._children)


@use_np
class NoNorm(HybridBlock):
r"""
Expand Down Expand Up @@ -726,7 +675,7 @@ def __init__(self, vocab_size: int,
else:
self.proj_layers = None
else:
self.proj_layers = HybridSequential()
self.proj_layers = nn.HybridSequential()
for i, (l_idx, r_idx) in enumerate(zip([0] + cutoffs, cutoffs + [vocab_size])):
inner_embed_size = int(embed_size / div_val**i)
if inner_embed_size == 0:
Expand Down Expand Up @@ -887,8 +836,8 @@ def __init__(self, vocab_size: int, embed_size: int, in_units: int,
use_bias=use_bias,
weight_initializer=weight_initializer,
bias_initializer=bias_initializer)
self.inter_proj_l = HybridSequential()
self.out_proj_l = HybridSequential()
self.inter_proj_l = nn.HybridSequential()
self.out_proj_l = nn.HybridSequential()
if div_val == 1.0:
if in_units != embed_size:
self.inter_proj_l.add(nn.Dense(in_units=in_units,
Expand Down
7 changes: 3 additions & 4 deletions src/gluonnlp/models/albert.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
from ..layers import get_activation, PositionalEmbedding
from ..op import select_vectors_by_position
from ..data.tokenizers import SentencepieceTokenizer
from ..layers import HybridSequential

albert_cfg_reg = Registry('albert_cfg')

Expand Down Expand Up @@ -192,7 +191,7 @@ def __init__(self, units=512, hidden_size=2048,
self._layout = layout


self.all_encoder_groups = HybridSequential()
self.all_encoder_groups = nn.HybridSequential()
for group_idx in range(num_groups):
self.all_encoder_groups.add(
TransformerEncoderLayer(units=units,
Expand Down Expand Up @@ -561,7 +560,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.embed_size,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -656,7 +655,7 @@ def __init__(self, backbone_cfg,
self.sop_classifier = nn.Dense(units=2,
in_units=self.backbone_model.units,
weight_initializer=weight_initializer)
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.embed_size,
in_units=self.backbone_model.units,
Expand Down
9 changes: 4 additions & 5 deletions src/gluonnlp/models/bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@
from ..utils.registry import Registry
from ..initializer import TruncNorm
from ..attention_cell import MultiHeadAttentionCell, gen_self_attn_mask
from ..layers import get_activation, PositionalEmbedding, PositionwiseFFN, InitializerType, \
HybridSequential
from ..layers import get_activation, PositionalEmbedding, PositionwiseFFN, InitializerType
from ..op import select_vectors_by_position
from ..data.tokenizers import HuggingFaceWordPieceTokenizer

Expand Down Expand Up @@ -238,7 +237,7 @@ def __init__(self, units: int = 512,
self._output_all_encodings = output_all_encodings
self._layout = layout

self.all_layers = HybridSequential()
self.all_layers = nn.HybridSequential()
for layer_idx in range(num_layers):
self.all_layers.add(
TransformerEncoderLayer(units=units,
Expand Down Expand Up @@ -590,7 +589,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -686,7 +685,7 @@ def __init__(self, backbone_cfg,
self.nsp_classifier = nn.Dense(units=2,
in_units=self.backbone_model.units,
weight_initializer=weight_initializer)
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down
8 changes: 4 additions & 4 deletions src/gluonnlp/models/electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from ..registry import BACKBONE_REGISTRY
from ..op import gumbel_softmax, select_vectors_by_position, add_vectors_by_position, update_vectors_by_position
from ..base import get_model_zoo_home_dir, get_repo_model_zoo_url, get_model_zoo_checksum_dir
from ..layers import PositionalEmbedding, get_activation, HybridSequential
from ..layers import PositionalEmbedding, get_activation
from .transformer import TransformerEncoderLayer
from ..initializer import TruncNorm
from ..utils.config import CfgNode as CN
Expand Down Expand Up @@ -227,7 +227,7 @@ def __init__(self, units=512,
self._output_attention = output_attention
self._output_all_encodings = output_all_encodings

self.all_encoder_layers = HybridSequential()
self.all_encoder_layers = nn.HybridSequential()
for layer_idx in range(num_layers):
self.all_encoder_layers.add(
TransformerEncoderLayer(units=units,
Expand Down Expand Up @@ -632,7 +632,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.rtd_encoder = HybridSequential()
self.rtd_encoder = nn.HybridSequential()
# Extra non-linear layer
self.rtd_encoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -714,7 +714,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.embed_size,
in_units=self.backbone_model.units,
Expand Down
4 changes: 2 additions & 2 deletions src/gluonnlp/models/gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from ..utils.registry import Registry
from ..initializer import TruncNorm
from ..attention_cell import MultiHeadAttentionCell
from ..layers import get_activation, PositionalEmbedding, HybridSequential
from ..layers import get_activation, PositionalEmbedding
from ..data.tokenizers import HuggingFaceByteBPETokenizer


Expand Down Expand Up @@ -443,7 +443,7 @@ def __init__(self,
dtype=self._dtype,
method=pos_embed_type
)
self._layers = HybridSequential()
self._layers = nn.HybridSequential()
for layer_idx in range(self._num_layers):
self._layers.add(
GPT2Layer(
Expand Down
10 changes: 5 additions & 5 deletions src/gluonnlp/models/mobilebert.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from ..op import select_vectors_by_position
from ..base import get_model_zoo_home_dir, get_repo_model_zoo_url, get_model_zoo_checksum_dir
from ..layers import InitializerType, PositionwiseFFN, PositionalEmbedding, get_norm_layer, \
get_activation, HybridSequential
get_activation
from ..initializer import TruncNorm
from ..utils.config import CfgNode as CN
from ..utils.misc import load_checksum_stats, download
Expand Down Expand Up @@ -262,7 +262,7 @@ def __init__(self,
in_channels=real_units,
epsilon=layer_norm_eps)

self.stacked_ffn = HybridSequential()
self.stacked_ffn = nn.HybridSequential()
for ffn_idx in range(num_stacked_ffn):
is_last_ffn = (ffn_idx == (num_stacked_ffn - 1))
# only apply dropout on last ffn layer if use bottleneck
Expand Down Expand Up @@ -394,7 +394,7 @@ def __init__(self,
'by the number of heads. Received real_units={}, num_heads={}' \
.format(real_units, num_heads)

self.all_layers = HybridSequential()
self.all_layers = nn.HybridSequential()
for layer_idx in range(num_layers):
self.all_layers.add(
MobileBertEncoderLayer(use_bottleneck=use_bottleneck,
Expand Down Expand Up @@ -811,7 +811,7 @@ def __init__(self, backbone_cfg,
weight_initializer = self.backbone_model.weight_initializer
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down Expand Up @@ -921,7 +921,7 @@ def __init__(self, backbone_cfg,
in_units=self.backbone_model.units,
weight_initializer=weight_initializer,
dtype=self.backbone_model.dtype)
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.backbone_model.units,
in_units=self.backbone_model.units,
Expand Down
6 changes: 3 additions & 3 deletions src/gluonnlp/models/roberta.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from ..op import select_vectors_by_position
from ..base import get_model_zoo_home_dir, get_repo_model_zoo_url, \
get_model_zoo_checksum_dir
from ..layers import PositionalEmbedding, get_activation, HybridSequential
from ..layers import PositionalEmbedding, get_activation
from ..registry import BACKBONE_REGISTRY
from ..utils.misc import download, load_checksum_stats
from ..utils.registry import Registry
Expand Down Expand Up @@ -147,7 +147,7 @@ def __init__(self,
self._layout = layout
self._output_all_encodings = output_all_encodings
self._output_attention = output_attention
self.all_layers = HybridSequential()
self.all_layers = nn.HybridSequential()
for layer_idx in range(self.num_layers):
self.all_layers.add(
TransformerEncoderLayer(
Expand Down Expand Up @@ -461,7 +461,7 @@ def __init__(self, backbone_cfg,
if bias_initializer is None:
bias_initializer = self.backbone_model.bias_initializer
self.units = self.backbone_model.units
self.mlm_decoder = HybridSequential()
self.mlm_decoder = nn.HybridSequential()
# Extra non-linear layer
self.mlm_decoder.add(nn.Dense(units=self.units,
in_units=self.units,
Expand Down
6 changes: 3 additions & 3 deletions src/gluonnlp/models/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing import Optional, Tuple, List
from ..utils.registry import Registry
from ..attention_cell import MultiHeadAttentionCell, gen_self_attn_mask, gen_mem_attn_mask
from ..layers import PositionalEmbedding, PositionwiseFFN, InitializerType, HybridSequential
from ..layers import PositionalEmbedding, PositionwiseFFN, InitializerType
from ..utils.config import CfgNode as CN
from ..sequence_sampler import BaseStepDecoder
__all__ = ['TransformerEncoderLayer', 'TransformerDecoderLayer',
Expand Down Expand Up @@ -313,7 +313,7 @@ def __init__(self, num_layers=6, recurrent=False,
self.ln_data = nn.LayerNorm(epsilon=layer_norm_eps,
in_channels=units)
# Construct the intermediate layers
self.layers = HybridSequential()
self.layers = nn.HybridSequential()
real_num_layers = 1 if recurrent else num_layers
for i in range(real_num_layers):
self.layers.add(TransformerEncoderLayer(
Expand Down Expand Up @@ -740,7 +740,7 @@ def __init__(self, num_layers=6, recurrent=False,
self.ln_final = nn.LayerNorm(epsilon=layer_norm_eps,
in_channels=units)
# Construct the intermediate layers
self.layers = HybridSequential()
self.layers = nn.HybridSequential()
real_num_layers = 1 if recurrent else num_layers
for i in range(real_num_layers):
self.layers.add(TransformerDecoderLayer(units=units,
Expand Down
4 changes: 2 additions & 2 deletions src/gluonnlp/models/transformer_xl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from ..attention_cell import multi_head_dot_attn, gen_self_attn_mask, gen_mem_attn_mask,\
RelAttentionScoreCell, MultiHeadAttentionCell
from ..layers import get_activation, PositionalEmbedding, PositionwiseFFN,\
AdaptiveEmbedding, ProjectedAdaptiveLogSoftmaxWithLoss, HybridSequential
AdaptiveEmbedding, ProjectedAdaptiveLogSoftmaxWithLoss
from ..utils.config import CfgNode as CN
from ..sequence_sampler import BaseStepDecoder
__all__ = ['TransformerXLDecoderLayer', 'TransformerXLDecoder', 'TransformerXLForLM',
Expand Down Expand Up @@ -180,7 +180,7 @@ def __init__(self, num_layers=3,
shape=(num_heads, units // num_heads),
init=bias_initializer,
allow_deferred_init=True)
self.decoder_layers = HybridSequential()
self.decoder_layers = nn.HybridSequential()
for i in range(num_layers):
self.decoder_layers.add(
TransformerXLDecoderLayer(units=units,
Expand Down
3 changes: 1 addition & 2 deletions tests/test_utils_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from numpy.testing import assert_allclose
from gluonnlp.utils.misc import AverageSGDTracker, download, sha1sum, logging_config,\
get_mxnet_visible_ctx
from gluonnlp.layers import HybridSequential
mx.npx.set_np()


Expand All @@ -23,7 +22,7 @@ def test_average_sgd_tracker():
moving_avg_param = None
net_final_moving_avg_param = None
for use_moving_avg in [False, True]:
net = HybridSequential()
net = nn.HybridSequential()
net.add(nn.Dense(10), nn.Dense(3))
net.initialize(init=mx.init.One())
net.hybridize()
Expand Down
Loading