try to use the new none feature

fix fix fix remove all hack try to remove all hack remove print
dmlc · Oct 13, 2019 · 8c92e03 · 8c92e03
1 parent ec7c653
commit 8c92e03
Show file tree

Hide file tree

Showing 12 changed files with 7 additions and 71 deletions.
diff --git a/env/cpu/py3-master.yml b/env/cpu/py3-master.yml
@@ -19,7 +19,7 @@ dependencies:
   - scipy=1.3.1
   - pip:
     - pylint-quotes==0.2.1
-    - mxnet-mkl>=1.6.0b20190729
+    - mxnet-mkl>=1.6.0b20191006
     - sacremoses
     - sentencepiece<0.2
     - sphinx-autodoc-typehints==1.7.0
diff --git a/env/cpu/py3.yml b/env/cpu/py3.yml
@@ -30,7 +30,7 @@ dependencies:
   - scipy=1.3.1
   - pip:
     - pylint-quotes==0.2.1
-    - mxnet-mkl>=1.5.0
+    - mxnet-mkl>=1.6.0b20191006
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/szha/mx-theme/tarball/master

diff --git a/env/docker/py3.yml b/env/docker/py3.yml
@@ -30,7 +30,7 @@ dependencies:
   - scikit-learn=0.21.3
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu101mkl>=1.5.0
+    - mxnet-cu101mkl>=1.6.0b20191006
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/szha/mx-theme/tarball/master

diff --git a/env/gpu/py3-master.yml b/env/gpu/py3-master.yml
@@ -29,7 +29,7 @@ dependencies:
   - scipy=1.3.1
   - pip:
     - pylint-quotes<0.2
-    - mxnet-cu101mkl>=1.6.0b20190730
+    - mxnet-cu101mkl>=1.6.0b20191006
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/szha/mx-theme/tarball/master

diff --git a/env/gpu/py3.yml b/env/gpu/py3.yml
@@ -29,7 +29,7 @@ dependencies:
   - scipy=1.3.1
   - pip:
     - pylint-quotes==0.2.1
-    - mxnet-cu101mkl>=1.5.0
+    - mxnet-cu101mkl>=1.6.0b20191006
     - sacremoses
     - sentencepiece<0.2
     - https://github.com/szha/mx-theme/tarball/master

diff --git a/src/gluonnlp/model/attention_cell.py b/src/gluonnlp/model/attention_cell.py
@@ -151,15 +151,9 @@ def __call__(self, query, key, value=None, mask=None):  # pylint: disable=argume
         """
         return super(AttentionCell, self).__call__(query, key, value, mask)
 
-    def forward(self, query, key, value=None, mask=None):  # pylint: disable=arguments-differ
+    def hybrid_forward(self, F, query, key, value=None, mask=None):  # pylint: disable=arguments-differ
         if value is None:
             value = key
-        if mask is None:
-            return super(AttentionCell, self).forward(query, key, value)
-        else:
-            return super(AttentionCell, self).forward(query, key, value, mask)
-
-    def hybrid_forward(self, F, query, key, value, mask=None):  # pylint: disable=arguments-differ
         att_weights = self._compute_weight(F, query, key, mask)
         context_vec = self._read_by_weight(F, att_weights, value)
         return context_vec, att_weights

diff --git a/src/gluonnlp/model/bert.py b/src/gluonnlp/model/bert.py
@@ -421,9 +421,6 @@ def __call__(self, inputs, token_types, valid_length=None, masked_positions=None
 
         This is used in training or fine-tuning a BERT model.
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        valid_length = [] if valid_length is None else valid_length
-        masked_positions = [] if masked_positions is None else masked_positions
         return super(BERTModel, self).__call__(inputs, token_types,
                                                valid_length, masked_positions)
 
@@ -433,10 +430,6 @@ def hybrid_forward(self, F, inputs, token_types, valid_length=None, masked_posit
 
         This is used in training or fine-tuning a BERT model.
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None
-        if isinstance(masked_positions, list) and len(masked_positions) == 0:
-            masked_positions = None
-
         outputs = []
         seq_out, attention_out = self._encode_sequence(inputs, token_types, valid_length)
         outputs.append(seq_out)
@@ -597,9 +590,6 @@ def __call__(self, inputs, valid_length=None, masked_positions=None):
 
         This is used in training or fine-tuning a BERT model.
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        valid_length = [] if valid_length is None else valid_length
-        masked_positions = [] if masked_positions is None else masked_positions
         return super(RoBERTaModel, self).__call__(inputs, [], valid_length=valid_length,
                                                   masked_positions=masked_positions)
 
@@ -654,8 +644,6 @@ def __call__(self, inputs, token_types, valid_length=None):
         outputs : NDArray or Symbol
             Shape (batch_size, num_classes)
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        valid_length = [] if valid_length is None else valid_length
         return super(BERTClassifier, self).__call__(inputs, token_types, valid_length)
 
     def hybrid_forward(self, F, inputs, token_types, valid_length=None):
@@ -677,9 +665,6 @@ def hybrid_forward(self, F, inputs, token_types, valid_length=None):
         outputs : NDArray
             Shape (batch_size, num_classes)
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None
-        if isinstance(valid_length, list) and len(valid_length) == 0:
-            valid_length = None
         _, pooler_out = self.bert(inputs, token_types, valid_length)
         return self.classifier(pooler_out)
 
@@ -742,8 +727,6 @@ def __call__(self, inputs, valid_length=None):
         outputs : NDArray or Symbol
             Shape (batch_size, num_classes)
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        valid_length = [] if valid_length is None else valid_length
         return super(RoBERTaClassifier, self).__call__(inputs, valid_length)
 
     def hybrid_forward(self, F, inputs, valid_length=None):
@@ -762,10 +745,6 @@ def hybrid_forward(self, F, inputs, valid_length=None):
         outputs : NDArray or Symbol
             Shape (batch_size, num_classes)
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None
-        if isinstance(valid_length, list) and len(valid_length) == 0:
-            valid_length = None
-
         seq_out = self.roberta(inputs, valid_length)
         assert not isinstance(seq_out, (tuple, list)), 'Expected one output from RoBERTaModel'
         outputs = seq_out.slice(begin=(0, 0, 0), end=(None, 1, None))

diff --git a/src/gluonnlp/model/language_model.py b/src/gluonnlp/model/language_model.py
@@ -91,10 +91,6 @@ def hybrid_forward(self, F, inputs, begin_state=None):
             output recurrent state tensor with length equals to num_layers.
             the state with shape `(1, batch_size, num_hidden)`
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        if isinstance(begin_state, list) and len(begin_state) == 0:
-            begin_state = None
-
         encoded = self.embedding(inputs)
         if not begin_state:
             if F == nd:
@@ -165,10 +161,6 @@ def hybrid_forward(self, F, inputs, begin_state=None): # pylint: disable=argumen
             output recurrent state tensor with length equals to num_layers-1.
             the state with shape `(num_layers, batch_size, num_hidden)`
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        if isinstance(begin_state, list) and len(begin_state) == 0:
-            begin_state = None
-
         encoded = self.embedding(inputs)
         if not begin_state:
             if F == nd:

diff --git a/src/gluonnlp/model/sequence_sampler.py b/src/gluonnlp/model/sequence_sampler.py
@@ -786,7 +786,7 @@ def __call__(self, inputs, states):
         # Valid length is initialized to be 1
         beam_alive_mask = mx.nd.ones(shape=(batch_size, beam_size), ctx=ctx, dtype=np.int32)
         valid_length = mx.nd.ones(shape=(batch_size, beam_size), ctx=ctx, dtype=np.int32)
-        scores = 0.
+        scores = mx.nd.zeros(shape=(batch_size, beam_size), ctx=ctx)
         samples = step_input.reshape((batch_size, beam_size, 1)).astype(np.int32)
         for _ in range(self._max_length):
             outputs, new_states = self._decoder(step_input, states)

diff --git a/src/gluonnlp/model/train/cache.py b/src/gluonnlp/model/train/cache.py
@@ -122,8 +122,6 @@ def __call__(self, inputs, target, next_word_history, cache_history, begin_state
             The hidden states to be kept in the memory for look up
             (size is equal to the window size)
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        begin_state = [] if begin_state is None else begin_state
         return super(CacheCell, self).__call__(inputs, target, next_word_history,
                                                cache_history, begin_state)
 
@@ -158,10 +156,6 @@ def hybrid_forward(self, F, inputs, target, next_word_history, cache_history, be
             The hidden states to be kept in the memory for look up
             (size is equal to the window size)
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        if isinstance(begin_state, list) and len(begin_state) == 0:
-            begin_state = None
-
         output, hidden, encoder_hs, _ = super(self.lm_model.__class__, self.lm_model).\
                                         hybrid_forward(F, inputs, begin_state)
         encoder_h = encoder_hs[-1].reshape(-3, -2)

diff --git a/src/gluonnlp/model/train/language_model.py b/src/gluonnlp/model/train/language_model.py
@@ -144,8 +144,6 @@ def __call__(self, inputs, begin_state=None):
             to num_layers. The shape of every encoder's dropped output
             `(sequence_length, batch_size, num_hidden)`
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        begin_state = [] if begin_state is None else begin_state
         return super(AWDRNN, self).__call__(inputs, begin_state)
 
     def hybrid_forward(self, F, inputs, begin_state=None): # pylint: disable=arguments-differ
@@ -176,10 +174,6 @@ def hybrid_forward(self, F, inputs, begin_state=None): # pylint: disable=argumen
             to num_layers. The shape of every encoder's dropped output
             `(sequence_length, batch_size, num_hidden)`
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        if isinstance(begin_state, list) and len(begin_state) == 0:
-            begin_state = None
-
         encoded = self.embedding(inputs)
         if not begin_state:
             if F == nd:
@@ -302,9 +296,6 @@ def __call__(self, inputs, begin_state=None): # pylint: disable=arguments-differ
             The list of last output with dropout of the model's encoder.
             the shape of last encoder's dropped output `(sequence_length, batch_size, num_hidden)`
         """
-        print('called train.__call__')
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        begin_state = [] if begin_state is None else begin_state
         return super(StandardRNN, self).__call__(inputs, begin_state)
 
     def hybrid_forward(self, F, inputs, begin_state=None): # pylint: disable=arguments-differ
@@ -335,11 +326,6 @@ def hybrid_forward(self, F, inputs, begin_state=None): # pylint: disable=argumen
             The list of last output with dropout of the model's encoder.
             the shape of last encoder's dropped output `(sequence_length, batch_size, num_hidden)`
         """
-        print('called train.hybrid_forward')
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        if isinstance(begin_state, list) and len(begin_state) == 0:
-            begin_state = None
-
         encoded = self.embedding(inputs)
         if not begin_state:
             if F == nd:

diff --git a/src/gluonnlp/model/transformer.py b/src/gluonnlp/model/transformer.py
@@ -437,9 +437,6 @@ def __call__(self, inputs, states=None, valid_length=None):
             - outputs of the transformer encoder. Shape (batch_size, length, C_out)
             - additional_outputs of all the transformer encoder
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        valid_length = [] if valid_length is None else valid_length
-        states = [] if states is None else states
         return super(BaseTransformerEncoder, self).__call__(inputs, states, valid_length)
 
     def _arange_like(self, F, inputs, axis):
@@ -492,12 +489,6 @@ def hybrid_forward(self, F, inputs, states=None, valid_length=None, position_wei
             (batch_size, num_heads, length, length)
 
         """
-        # XXX Temporary hack for hybridization as hybridblock does not support None inputs
-        if isinstance(valid_length, list) and len(valid_length) == 0:
-            valid_length = None
-        if isinstance(states, list) and len(states) == 0:
-            states = None
-
         steps = self._arange_like(F, inputs, axis=1)
         if valid_length is not None:
             ones = F.ones_like(steps)