From cc93471349dd8807953b14f79decfb9b1fcba7ce Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Fri, 15 Jul 2022 19:23:54 +0100
Subject: [PATCH 01/22] Add serving_output and serving methods to some vision
 models

---
 .../models/convnext/modeling_tf_convnext.py   | 15 +++++-
 .../data2vec/modeling_tf_data2vec_vision.py   | 31 +++++++++++-
 .../models/regnet/modeling_tf_regnet.py       | 17 ++++++-
 .../models/resnet/modeling_tf_resnet.py       | 36 +++++++++++++
 .../models/swin/modeling_tf_swin.py           | 50 +++++++++++++++++++
 .../models/vit_mae/modeling_tf_vit_mae.py     | 24 ++++++++-
 tests/test_modeling_tf_common.py              | 13 +++++
 7 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py
index 58f4c3bba984..8865650067ad 100644
--- a/src/transformers/models/convnext/modeling_tf_convnext.py
+++ b/src/transformers/models/convnext/modeling_tf_convnext.py
@@ -383,7 +383,8 @@ def serving(self, inputs):
             inputs (`Dict[str, tf.Tensor]`):
                 The input of the saved model as a dictionary of tensors.
         """
-        return self.call(inputs)
+        output = self.call(inputs)
+        return self.serving_output(output)
 
 
 CONVNEXT_START_DOCSTRING = r"""
@@ -492,6 +493,13 @@ def call(
             hidden_states=outputs.hidden_states,
         )
 
+    def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+
+        TFBaseModelOutputWithPooling(
+            last_hidden_state=output.last_hidden_state, pooler_output=output.pooler_output, hidden_states=hs
+        )
+
 
 @add_start_docstrings(
     """
@@ -584,3 +592,8 @@ def call(
             logits=logits,
             hidden_states=outputs.hidden_states,
         )
+
+    def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+
+        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs)
diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
index aeb5143426d5..83ab706c9c47 100644
--- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
+++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
@@ -801,8 +801,8 @@ def serving(self, inputs):
             inputs (`Dict[str, tf.Tensor]`):
                 The input of the saved model as a dictionary of tensors.
         """
-
-        return self.call(inputs)
+        output = self.call(inputs)
+        return self.serving_output(output)
 
 
 DATA2VEC_VISION_START_DOCSTRING = r"""
@@ -910,6 +910,17 @@ def call(
 
         return outputs
 
+    def serving_output(self, output: TFData2VecVisionModelOutputWithPooling) -> TFData2VecVisionModelOutputWithPooling:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFData2VecVisionModelOutputWithPooling(
+            last_hidden_state=output.last_hidden_state,
+            pooler_output=output.pooler_output,
+            hidden_states=hs,
+            attentions=attns,
+        )
+
 
 @add_start_docstrings(
     """
@@ -983,6 +994,12 @@ def call(
             attentions=outputs.attentions,
         )
 
+    def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+
 
 class TFData2VecVisionConvModule(tf.keras.layers.Layer):
     """
@@ -1443,3 +1460,13 @@ def reshape_features(x):
             hidden_states=outputs.hidden_states if output_hidden_states else None,
             attentions=outputs.attentions,
         )
+
+    def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFSemanticSegmenterOutput(
+            logits=output.logits,
+            hidden_states=hs,
+            attentions=attns,
+        )
diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py
index b6a8187744a7..646a26a6e311 100644
--- a/src/transformers/models/regnet/modeling_tf_regnet.py
+++ b/src/transformers/models/regnet/modeling_tf_regnet.py
@@ -371,7 +371,8 @@ def serving(self, inputs):
             inputs (`Dict[str, tf.Tensor]`):
                 The input of the saved model as a dictionary of tensors.
         """
-        return self.call(inputs)
+        output = self.call(inputs)
+        return self.serving_output(output)
 
 
 REGNET_START_DOCSTRING = r"""
@@ -444,6 +445,15 @@ def call(
             hidden_states=outputs.hidden_states,
         )
 
+    def serving_output(
+        self, output: TFBaseModelOutputWithPoolingAndNoAttention
+    ) -> TFBaseModelOutputWithPoolingAndNoAttention:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+
+        return TFBaseModelOutputWithPoolingAndNoAttention(
+            last_hidden_state=output.last_hidden_state, pooler_output=output.pooler_output, hidden_states=hs
+        )
+
 
 @add_start_docstrings(
     """
@@ -506,3 +516,8 @@ def call(
             return ((loss,) + output) if loss is not None else output
 
         return TFSequenceClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
+
+    def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+
+        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs)
diff --git a/src/transformers/models/resnet/modeling_tf_resnet.py b/src/transformers/models/resnet/modeling_tf_resnet.py
index c7c6c95fb818..b9c839cc1392 100644
--- a/src/transformers/models/resnet/modeling_tf_resnet.py
+++ b/src/transformers/models/resnet/modeling_tf_resnet.py
@@ -288,6 +288,24 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]:
         VISION_DUMMY_INPUTS = tf.random.uniform(shape=(3, self.config.num_channels, 224, 224), dtype=tf.float32)
         return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)}
 
+    @tf.function(
+        input_signature=[
+            {
+                "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"),
+            }
+        ]
+    )
+    def serving(self, inputs):
+        """
+        Method used for serving the model.
+
+        Args:
+            inputs (`Dict[str, tf.Tensor]`):
+                The input of the saved model as a dictionary of tensors.
+        """
+        output = self.call(inputs)
+        return self.serving_output(output)
+
 
 RESNET_START_DOCSTRING = r"""
     This model is a TensorFlow
@@ -413,6 +431,17 @@ def call(
         )
         return resnet_outputs
 
+    def serving_output(
+        self, output: TFBaseModelOutputWithPoolingAndNoAttention
+    ) -> TFBaseModelOutputWithPoolingAndNoAttention:
+        # In TF transformer models, the tuple of hidden states are normally transformed to a single tensor using:
+        # hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        # We don't return the hidden states here as they all have different dimensions so can be concatenated like
+        # this.
+        return TFBaseModelOutputWithPoolingAndNoAttention(
+            last_hidden_state=output.last_hidden_state, pooler_output=output.pooler_output
+        )
+
 
 @add_start_docstrings(
     """
@@ -477,3 +506,10 @@ def call(
             return (loss,) + output if loss is not None else output
 
         return TFImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
+
+    def serving_output(self, output: TFImageClassifierOutputWithNoAttention) -> TFImageClassifierOutputWithNoAttention:
+        # In TF transformer models, the tuple of hidden states are normally transformed to a single tensor using:
+        # hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        # We don't return the hidden states here as they all have different dimensions so can be concatenated like
+        # this.
+        return TFImageClassifierOutputWithNoAttention(logits=output.logits)
diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py
index 308ad2176f88..8cadf604baef 100644
--- a/src/transformers/models/swin/modeling_tf_swin.py
+++ b/src/transformers/models/swin/modeling_tf_swin.py
@@ -961,6 +961,25 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]:
         )
         return {"pixel_values": tf.constant(VISION_DUMMY_INPUTS)}
 
+    @tf.function(
+        input_signature=[
+            {
+                "pixel_values": tf.TensorSpec((None, None, None, None), tf.float32, name="pixel_values"),
+            }
+        ]
+    )
+    def serving(self, inputs):
+        """
+        Method used for serving the model.
+
+        Args:
+            inputs (`Dict[str, tf.Tensor]`):
+                The input of the saved model as a dictionary of tensors.
+        """
+        output = self.call(inputs)
+
+        return self.serving_output(output)
+
 
 SWIN_START_DOCSTRING = r"""
     This model is a Tensorflow
@@ -1223,6 +1242,19 @@ def call(
 
         return swin_outputs
 
+    def serving_output(self, output: TFSwinModelOutput) -> TFSwinModelOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        r_hs = tf.convert_to_tensor(output.reshaped_hidden_states) if self.config.output_hidden_states else None
+
+        return TFSwinModelOutput(
+            last_hidden_state=output.sequence_output,
+            pooler_output=output.pooled_output,
+            hidden_states=hs,
+            attentions=attns,
+            reshaped_hidden_states=r_hs,
+        )
+
 
 class TFSwinPixelShuffle(tf.keras.layers.Layer):
     """TF layer implementation of torch.nn.PixelShuffle"""
@@ -1377,6 +1409,15 @@ def call(
             reshaped_hidden_states=outputs.reshaped_hidden_states,
         )
 
+    def serving_output(self, output: TFSwinMaskedImageModelingOutput) -> TFSwinMaskedImageModelingOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        r_hs = tf.convert_to_tensor(output.reshaped_hidden_states) if self.config.output_hidden_states else None
+
+        return TFSwinMaskedImageModelingOutput(
+            logits=output.logits, hidden_states=hs, attentions=attns, reshaped_hidden_states=r_hs
+        )
+
 
 @add_start_docstrings(
     """
@@ -1452,3 +1493,12 @@ def call(
             attentions=outputs.attentions,
             reshaped_hidden_states=outputs.reshaped_hidden_states,
         )
+
+    def serving_output(self, output: TFSwinImageClassifierOutput) -> TFSwinImageClassifierOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        r_hs = tf.convert_to_tensor(output.reshaped_hidden_states) if self.config.output_hidden_states else None
+
+        return TFSwinImageClassifierOutput(
+            logits=output.logits, hidden_states=hs, attentions=attns, reshaped_hidden_states=r_hs
+        )
diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py
index d5fbecabd62d..0f64a3b6f8e5 100644
--- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py
+++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py
@@ -722,8 +722,8 @@ def serving(self, inputs):
             inputs (`Dict[str, tf.Tensor]`):
                 The input of the saved model as a dictionary of tensors.
         """
-
-        return self.call(inputs)
+        output = self.call(inputs)
+        return self.serving_output(output)
 
 
 VIT_MAE_START_DOCSTRING = r"""
@@ -842,6 +842,18 @@ def call(
 
         return outputs
 
+    def serving_output(self, output: TFViTMAEModelOutput) -> TFViTMAEModelOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFViTMAEModelOutput(
+            last_hidden_state=output.last_hidden_state,
+            mask=output.mask,
+            ids_restore=output.ids_restore,
+            hidden_states=hs,
+            attentions=attns,
+        )
+
 
 class TFViTMAEDecoder(tf.keras.layers.Layer):
     def __init__(self, config, num_patches, **kwargs):
@@ -1143,3 +1155,11 @@ def call(
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
         )
+
+    def serving_output(self, output: TFViTMAEForPreTrainingOutput) -> TFViTMAEForPreTrainingOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFViTMAEForPreTrainingOutput(
+            logits=output.logits, mask=output.mask, ids_restore=output.ids_restore, hidden_states=hs, attentions=attns
+        )
diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py
index d27ecaccb014..41b3c5ace21c 100644
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -205,6 +205,19 @@ def test_save_load_config(self):
 
             self.assert_outputs_same(after_outputs, outputs)
 
+    def test_prepare_serving_output(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        config.output_hidden_states = True
+        config.output_attentions = self.has_attentions
+
+        for model_class in self.all_model_classes:
+            model = model_class(config)
+            outputs = model(self._prepare_for_class(inputs_dict, model_class))
+            serving_outputs = model.serving_output(outputs)
+
+            for k, v in serving_outputs.items():
+                self.assertIsInstance(v, (tf.Tensor, None), msg=f"{k} is not a Tensor or None")
+
     def test_forward_signature(self):
         config, _ = self.model_tester.prepare_config_and_inputs_for_common()
 

From 0a8ed80c6179e19be4cd58a27ff0710b647ba7be Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Fri, 15 Jul 2022 19:30:04 +0100
Subject: [PATCH 02/22] Add serving outputs for DeiT

---
 .../models/deit/modeling_tf_deit.py           | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py
index ae7191dac53d..e261cdc4226e 100644
--- a/src/transformers/models/deit/modeling_tf_deit.py
+++ b/src/transformers/models/deit/modeling_tf_deit.py
@@ -864,6 +864,16 @@ def call(
             attentions=outputs.attentions,
         )
 
+    def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFMaskedLMOutput(
+            logits=output.logits,
+            hidden_states=hs,
+            attentions=attns
+        )
+
 
 @add_start_docstrings(
     """
@@ -961,6 +971,16 @@ def call(
             attentions=outputs.attentions,
         )
 
+    def serving_output(self, output: TFImageClassifierOutput) -> TFImageClassifierOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFImageClassifierOutput(
+            logits=output.logits,
+            hidden_states=hs,
+            attentions=attns
+        )
+
 
 @add_start_docstrings(
     """
@@ -1041,3 +1061,15 @@ def call(
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
         )
+
+    def serving_output(self, output: TFDeiTForImageClassificationWithTeacherOutput) -> TFDeiTForImageClassificationWithTeacherOutput:
+        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+
+        return TFDeiTForImageClassificationWithTeacherOutput(
+            logits=output.logits,
+            cls_logits=output.cls_logits,
+            distillation_logits=output.distillation_logits,
+            hidden_states=hs,
+            attentions=attns
+        )

From 2e948cab12d7ec483df81bbb2fcc660ae23c5b69 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Tue, 19 Jul 2022 20:55:03 +0100
Subject: [PATCH 03/22] Don't convert hidden states - differing shapes

---
 src/transformers/models/regnet/modeling_tf_regnet.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py
index 646a26a6e311..11176b42ca9d 100644
--- a/src/transformers/models/regnet/modeling_tf_regnet.py
+++ b/src/transformers/models/regnet/modeling_tf_regnet.py
@@ -448,10 +448,10 @@ def call(
     def serving_output(
         self, output: TFBaseModelOutputWithPoolingAndNoAttention
     ) -> TFBaseModelOutputWithPoolingAndNoAttention:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-
         return TFBaseModelOutputWithPoolingAndNoAttention(
-            last_hidden_state=output.last_hidden_state, pooler_output=output.pooler_output, hidden_states=hs
+            last_hidden_state=output.last_hidden_state,
+            pooler_output=output.pooler_output,
+            hidden_states=output.hidden_states
         )
 
 
@@ -518,6 +518,4 @@ def call(
         return TFSequenceClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
 
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-
-        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs)
+        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states)

From bf684e751176acbf813c2e9aeed9f50dd123204e Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Tue, 19 Jul 2022 20:57:39 +0100
Subject: [PATCH 04/22] Make saveable

---
 .../models/convnext/modeling_tf_convnext.py   | 12 ++--
 .../models/deit/modeling_tf_deit.py           |  2 +-
 .../models/funnel/modeling_tf_funnel.py       | 29 ++------
 .../models/hubert/modeling_tf_hubert.py       | 14 +++-
 .../modeling_tf_speech_to_text.py             | 67 ++++++++-----------
 .../models/tapas/modeling_tf_tapas.py         | 14 ++++
 .../models/wav2vec2/modeling_tf_wav2vec2.py   | 14 +++-
 7 files changed, 74 insertions(+), 78 deletions(-)

diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py
index 8865650067ad..88134fb2129b 100644
--- a/src/transformers/models/convnext/modeling_tf_convnext.py
+++ b/src/transformers/models/convnext/modeling_tf_convnext.py
@@ -494,10 +494,10 @@ def call(
         )
 
     def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-
-        TFBaseModelOutputWithPooling(
-            last_hidden_state=output.last_hidden_state, pooler_output=output.pooler_output, hidden_states=hs
+        return TFBaseModelOutputWithPooling(
+            last_hidden_state=output.last_hidden_state,
+            pooler_output=output.pooler_output,
+            hidden_states=output.hidden_states
         )
 
 
@@ -594,6 +594,4 @@ def call(
         )
 
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-
-        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs)
+        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states)
diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py
index e261cdc4226e..c7c0c5547c1c 100644
--- a/src/transformers/models/deit/modeling_tf_deit.py
+++ b/src/transformers/models/deit/modeling_tf_deit.py
@@ -193,7 +193,7 @@ def call(self, pixel_values: tf.Tensor) -> tf.Tensor:
             raise ValueError(
                 "Make sure that the channel dimension of the pixel values match with the one set in the configuration."
             )
-        if height != self.image_size[0] or width != self.image_size[1]:
+        if tf.executing_eagerly() and (height != self.image_size[0] or width != self.image_size[1]):
             raise ValueError(
                 f"Input image size ({height}*{width}) doesn't match model ({self.image_size[0]}*{self.image_size[1]})."
             )
diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py
index 92a4453d1cbe..e45c214440c2 100644
--- a/src/transformers/models/funnel/modeling_tf_funnel.py
+++ b/src/transformers/models/funnel/modeling_tf_funnel.py
@@ -1175,12 +1175,8 @@ def call(
             training=training,
         )
 
-    # Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output
     def serving_output(self, output):
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns)
+        return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=output.hidden_states, attentions=output.attentions)
 
 
 @add_start_docstrings(
@@ -1249,10 +1245,7 @@ def call(
         )
 
     def serving_output(self, output):
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFFunnelForPreTrainingOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFFunnelForPreTrainingOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions)
 
 
 @add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING)
@@ -1322,12 +1315,8 @@ def call(
             attentions=outputs.attentions,
         )
 
-    # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output
     def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFMaskedLMOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions)
 
 
 @add_start_docstrings(
@@ -1592,12 +1581,8 @@ def call(
             attentions=outputs.attentions,
         )
 
-    # Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output
     def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFTokenClassifierOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions)
 
 
 @add_start_docstrings(
@@ -1683,11 +1668,7 @@ def call(
             attentions=outputs.attentions,
         )
 
-    # Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output
     def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
         return TFQuestionAnsweringModelOutput(
-            start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns
+            start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=output.hidden_states, attentions=output.attentions
         )
diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index 3bc8fdc3c4f4..7cef21261ad2 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -227,7 +227,7 @@ def _compute_mask_indices(
             f" `sequence_length`: {sequence_length}`"
         )
     # compute number of masked spans in batch
-    num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,)))
+    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform((1,)))
     num_masked_spans = max(num_masked_spans, min_masks)
 
     # make sure num masked indices <= sequence_length
@@ -256,7 +256,7 @@ def _compute_mask_indices(
 
     # scatter indices to mask
     spec_aug_mask = _scatter_values_on_batch_indices(
-        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, spec_aug_mask.shape
+        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, shape_list(spec_aug_mask)
     )
 
     return spec_aug_mask
@@ -1319,7 +1319,15 @@ def __init__(self, config, *inputs, **kwargs):
             "to train/fine-tine this model, you need a GPU or a TPU"
         )
 
-    @tf.function
+    @tf.function(
+        input_signature=[
+            {
+                "input_values": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
+                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
+            }
+        ]
+    )
     def serving(self, inputs):
         output = self.call(input_values=inputs, training=False)
 
diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
index 18d2593ca9d6..92bfa04f7b81 100755
--- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
@@ -143,7 +143,7 @@ def __init__(self, config: Speech2TextConfig, **kwargs):
         ]
 
     def call(self, input_features: tf.Tensor) -> tf.Tensor:
-        hidden_states = tf.identity(input_features)  # TF Conv1D assumes Batch x Time x Channels, same as the input
+        hidden_states = tf.cast(input_features, tf.float32)  # TF Conv1D assumes Batch x Time x Channels, same as the input
         for i, conv in enumerate(self.conv_layers):
             # equivalent to `padding=k // 2` on PT's `nn.Conv1d`
             pad_len = self.kernel_sizes[i] // 2
@@ -187,23 +187,20 @@ def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: Optiona
             # zero pad
             emb = tf.concat([emb, tf.zeros(num_embeddings, 1)], axis=1)
         if padding_idx is not None:
-            emb = tf.concat([emb[:padding_idx, :], tf.zeros((1, emb.shape[1])), emb[padding_idx + 1 :, :]], axis=0)
+            emb = tf.concat([emb[:padding_idx, :], tf.zeros((1, shape_list(emb)[1])), emb[padding_idx + 1 :, :]], axis=0)
         return emb
 
-    def _resize_embeddings(self):
-        """Recreates (and effectivelly resizes) the sinusoidal embeddings"""
-        self.embeddings = self.add_weight(
-            name="weights",  # name also used in PT
-            shape=self.embedding_weights.shape,
-        )
-        self.embeddings.assign(self.embedding_weights)
-
     def build(self, input_shape: tf.TensorShape):
         """
         Build shared token embedding layer Shared weights logic adapted from
         https://github.com/tensorflow/models/blob/a009f4fb9d2fc4949e32192a944688925ef78659/official/transformer/v2/embedding_layer.py#L24
         """
-        self._resize_embeddings()
+        self.embeddings = self.add_weight(
+            name="weights",  # name also used in PT
+            shape=shape_list(self.embedding_weights),
+            trainable=False,
+        )
+        self.embeddings.assign(self.embedding_weights)
         super().build(input_shape)
 
     def call(self, input_ids: tf.Tensor, past_key_values_length: int = 0) -> tf.Tensor:
@@ -215,7 +212,7 @@ def call(self, input_ids: tf.Tensor, past_key_values_length: int = 0) -> tf.Tens
         max_pos = self.padding_idx + 1 + seq_len
         if max_pos > shape_list(self.embeddings)[0]:
             self.embedding_weights = self._get_embedding(max_pos + self.offset, self.embedding_dim, self.padding_idx)
-            self._resize_embeddings()
+            self.embeddings.assign(self.embedding_weights)
         return tf.reshape(tf.gather(self.embeddings, tf.reshape(position_ids, (-1,)), axis=0), (bsz, seq_len, -1))
 
     @staticmethod
@@ -608,7 +605,7 @@ def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor):
     @tf.function(
         input_signature=[
             {
-                "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
+                "input_features": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
                 "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
                 "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
                 "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
@@ -779,7 +776,7 @@ def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor):
     def _get_feature_vector_attention_mask(self, feature_vector_length, attention_mask):
         # generate creates 3D attention mask, because of the shape of input_features
         # convert it to 2D if thats the case
-        if len(attention_mask.shape) > 2:
+        if len(shape_list(attention_mask)) > 2:
             attention_mask = attention_mask[:, :, -1]
 
         subsampled_lengths = self._get_feat_extract_output_lengths(tf.math.reduce_sum(attention_mask, -1))
@@ -791,7 +788,6 @@ def _get_feature_vector_attention_mask(self, feature_vector_length, attention_ma
             ),
             axis=-1,
         )
-
         attention_mask = tf.scatter_nd(indices=indices, updates=tf.ones(bsz), shape=[bsz, feature_vector_length])
         attention_mask = tf.cast(tf.reverse(tf.math.cumsum(tf.reverse(attention_mask, [-1]), -1), [-1]), tf.int64)
         return attention_mask
@@ -845,10 +841,10 @@ def call(
 
         # subsample attention mask if necessary
         if attention_mask is not None:
-            attention_mask = self._get_feature_vector_attention_mask(inputs_embeds.shape[1], attention_mask)
+            attention_mask = self._get_feature_vector_attention_mask(shape_list(inputs_embeds)[1], attention_mask)
             padding_mask = tf.cast(tf.math.not_equal(attention_mask, 1), tf.int64)
         else:
-            padding_mask = tf.zeros(inputs_embeds.shape[:-1], dtype=tf.int64)
+            padding_mask = tf.zeros(shape_list(inputs_embeds)[:-1], dtype=tf.int64)
 
         embed_pos = self.embed_positions(padding_mask)
 
@@ -942,22 +938,6 @@ def get_embed_tokens(self):
     def set_embed_tokens(self, embed_tokens):
         self.embed_tokens = embed_tokens
 
-    def _prepare_decoder_attention_mask(self, attention_mask, input_shape, inputs_embeds, past_key_values_length):
-        # create causal mask
-        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-        combined_attention_mask = None
-        if input_shape[-1] > 1:
-            combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length=past_key_values_length)
-
-        if attention_mask is not None:
-            # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-            expanded_attn_mask = _expand_mask(attention_mask, tgt_len=input_shape[-1])
-            combined_attention_mask = (
-                expanded_attn_mask if combined_attention_mask is None else expanded_attn_mask + combined_attention_mask
-            )
-
-        return combined_attention_mask
-
     @unpack_inputs
     def call(
         self,
@@ -1053,9 +1033,16 @@ def call(
         else:
             inputs_embeds = inputs_embeds
 
-        attention_mask = self._prepare_decoder_attention_mask(
-            attention_mask, input_shape, inputs_embeds, past_key_values_length
-        )
+        # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+        if input_shape[-1] > 1:
+            combined_attention_mask = _make_causal_mask(input_shape, past_key_values_length=past_key_values_length)
+        else:
+            combined_attention_mask = _expand_mask(
+                tf.ones((input_shape[0], input_shape[1] + past_key_values_length)), tgt_len=input_shape[-1]
+            )
+
+        if attention_mask is not None:
+            combined_attention_mask = combined_attention_mask + _expand_mask(attention_mask, tgt_len=input_shape[-1])
 
         # expand encoder attention mask
         if encoder_hidden_states is not None and encoder_attention_mask is not None:
@@ -1100,7 +1087,7 @@ def call(
 
             hidden_states, layer_self_attn, layer_cross_attn, present_key_value = decoder_layer(
                 hidden_states,
-                attention_mask=attention_mask,
+                attention_mask=combined_attention_mask,
                 encoder_hidden_states=encoder_hidden_states,
                 encoder_attention_mask=encoder_attention_mask,
                 layer_head_mask=head_mask[idx] if head_mask is not None else None,
@@ -1203,7 +1190,7 @@ def call(
         # downsample encoder attention mask
         if attention_mask is not None:
             encoder_attention_mask = self.encoder._get_feature_vector_attention_mask(
-                encoder_outputs[0].shape[1], attention_mask
+                shape_list(encoder_outputs[0])[1], attention_mask
             )
         else:
             encoder_attention_mask = None
@@ -1465,8 +1452,8 @@ def serving_output(self, output):
         enc_hs = tf.convert_to_tensor(output.encoder_hidden_states) if self.config.output_hidden_states else None
         enc_attns = tf.convert_to_tensor(output.encoder_attentions) if self.config.output_attentions else None
 
-        return TFSeq2SeqModelOutput(
-            last_hidden_state=output.last_hidden_state,
+        return TFSeq2SeqLMOutput(
+            logits=output.logits,
             past_key_values=pkv,
             decoder_hidden_states=dec_hs,
             decoder_attentions=dec_attns,
diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py
index 1875cc800907..2a9e8490382d 100644
--- a/src/transformers/models/tapas/modeling_tf_tapas.py
+++ b/src/transformers/models/tapas/modeling_tf_tapas.py
@@ -862,6 +862,20 @@ class TFTapasPreTrainedModel(TFPreTrainedModel):
     config_class = TapasConfig
     base_model_prefix = "tapas"
 
+    @tf.function(
+        input_signature=[
+            {
+                "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
+                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
+            }
+        ]
+    )
+    def serving(self, inputs):
+        output = self.call(inputs)
+
+        return self.serving_output(output)
+
 
 TAPAS_START_DOCSTRING = r"""
 
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index 999aae995ae9..0ca685f082d3 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -268,7 +268,7 @@ def _compute_mask_indices(
             f" `sequence_length`: {sequence_length}`"
         )
     # compute number of masked spans in batch
-    num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,)))
+    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform((1,)))
     num_masked_spans = max(num_masked_spans, min_masks)
 
     # make sure num masked indices <= sequence_length
@@ -297,7 +297,7 @@ def _compute_mask_indices(
 
     # scatter indices to mask
     spec_aug_mask = _scatter_values_on_batch_indices(
-        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, spec_aug_mask.shape
+        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, shape_list(spec_aug_mask)
     )
 
     return spec_aug_mask
@@ -1352,7 +1352,15 @@ def __init__(self, config, *inputs, **kwargs):
             "to train/fine-tine this model, you need a GPU or a TPU"
         )
 
-    @tf.function
+    @tf.function(
+        input_signature=[
+            {
+                "input_values": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
+                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
+            }
+        ]
+    )
     def serving(self, inputs):
         output = self.call(input_values=inputs, training=False)
 

From c2303823b24bbd40540de673e5ed6d180caaadef Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Tue, 19 Jul 2022 20:58:49 +0100
Subject: [PATCH 05/22] Fix up

---
 .../models/convnext/modeling_tf_convnext.py   |  2 +-
 .../models/deit/modeling_tf_deit.py           | 18 ++++++------------
 .../models/funnel/modeling_tf_funnel.py       | 19 +++++++++++++++----
 .../models/regnet/modeling_tf_regnet.py       |  2 +-
 .../modeling_tf_speech_to_text.py             |  8 ++++++--
 5 files changed, 29 insertions(+), 20 deletions(-)

diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py
index 88134fb2129b..a6c8fd4977e2 100644
--- a/src/transformers/models/convnext/modeling_tf_convnext.py
+++ b/src/transformers/models/convnext/modeling_tf_convnext.py
@@ -497,7 +497,7 @@ def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOut
         return TFBaseModelOutputWithPooling(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
-            hidden_states=output.hidden_states
+            hidden_states=output.hidden_states,
         )
 
 
diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py
index c7c0c5547c1c..c96147795596 100644
--- a/src/transformers/models/deit/modeling_tf_deit.py
+++ b/src/transformers/models/deit/modeling_tf_deit.py
@@ -868,11 +868,7 @@ def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
         hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
         attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFMaskedLMOutput(
-            logits=output.logits,
-            hidden_states=hs,
-            attentions=attns
-        )
+        return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns)
 
 
 @add_start_docstrings(
@@ -975,11 +971,7 @@ def serving_output(self, output: TFImageClassifierOutput) -> TFImageClassifierOu
         hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
         attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFImageClassifierOutput(
-            logits=output.logits,
-            hidden_states=hs,
-            attentions=attns
-        )
+        return TFImageClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)
 
 
 @add_start_docstrings(
@@ -1062,7 +1054,9 @@ def call(
             attentions=outputs.attentions,
         )
 
-    def serving_output(self, output: TFDeiTForImageClassificationWithTeacherOutput) -> TFDeiTForImageClassificationWithTeacherOutput:
+    def serving_output(
+        self, output: TFDeiTForImageClassificationWithTeacherOutput
+    ) -> TFDeiTForImageClassificationWithTeacherOutput:
         hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
         attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
@@ -1071,5 +1065,5 @@ def serving_output(self, output: TFDeiTForImageClassificationWithTeacherOutput)
             cls_logits=output.cls_logits,
             distillation_logits=output.distillation_logits,
             hidden_states=hs,
-            attentions=attns
+            attentions=attns,
         )
diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py
index e45c214440c2..04a985a06f33 100644
--- a/src/transformers/models/funnel/modeling_tf_funnel.py
+++ b/src/transformers/models/funnel/modeling_tf_funnel.py
@@ -1176,7 +1176,11 @@ def call(
         )
 
     def serving_output(self, output):
-        return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=output.hidden_states, attentions=output.attentions)
+        return TFBaseModelOutput(
+            last_hidden_state=output.last_hidden_state,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions,
+        )
 
 
 @add_start_docstrings(
@@ -1245,7 +1249,9 @@ def call(
         )
 
     def serving_output(self, output):
-        return TFFunnelForPreTrainingOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions)
+        return TFFunnelForPreTrainingOutput(
+            logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
+        )
 
 
 @add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING)
@@ -1582,7 +1588,9 @@ def call(
         )
 
     def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput:
-        return TFTokenClassifierOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions)
+        return TFTokenClassifierOutput(
+            logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
+        )
 
 
 @add_start_docstrings(
@@ -1670,5 +1678,8 @@ def call(
 
     def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
         return TFQuestionAnsweringModelOutput(
-            start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=output.hidden_states, attentions=output.attentions
+            start_logits=output.start_logits,
+            end_logits=output.end_logits,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions,
         )
diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py
index 11176b42ca9d..07117dccc261 100644
--- a/src/transformers/models/regnet/modeling_tf_regnet.py
+++ b/src/transformers/models/regnet/modeling_tf_regnet.py
@@ -451,7 +451,7 @@ def serving_output(
         return TFBaseModelOutputWithPoolingAndNoAttention(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
-            hidden_states=output.hidden_states
+            hidden_states=output.hidden_states,
         )
 
 
diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
index 92bfa04f7b81..efd46f9195c0 100755
--- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
@@ -143,7 +143,9 @@ def __init__(self, config: Speech2TextConfig, **kwargs):
         ]
 
     def call(self, input_features: tf.Tensor) -> tf.Tensor:
-        hidden_states = tf.cast(input_features, tf.float32)  # TF Conv1D assumes Batch x Time x Channels, same as the input
+        hidden_states = tf.cast(
+            input_features, tf.float32
+        )  # TF Conv1D assumes Batch x Time x Channels, same as the input
         for i, conv in enumerate(self.conv_layers):
             # equivalent to `padding=k // 2` on PT's `nn.Conv1d`
             pad_len = self.kernel_sizes[i] // 2
@@ -187,7 +189,9 @@ def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: Optiona
             # zero pad
             emb = tf.concat([emb, tf.zeros(num_embeddings, 1)], axis=1)
         if padding_idx is not None:
-            emb = tf.concat([emb[:padding_idx, :], tf.zeros((1, shape_list(emb)[1])), emb[padding_idx + 1 :, :]], axis=0)
+            emb = tf.concat(
+                [emb[:padding_idx, :], tf.zeros((1, shape_list(emb)[1])), emb[padding_idx + 1 :, :]], axis=0
+            )
         return emb
 
     def build(self, input_shape: tf.TensorShape):

From ac7019b98bf2337354c0ab6ac50441cf838c9821 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 12:11:34 +0100
Subject: [PATCH 06/22] Make swin saveable

---
 .../models/swin/modeling_tf_swin.py           | 124 ++++++++----------
 1 file changed, 52 insertions(+), 72 deletions(-)

diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py
index 8cadf604baef..66506764d9ef 100644
--- a/src/transformers/models/swin/modeling_tf_swin.py
+++ b/src/transformers/models/swin/modeling_tf_swin.py
@@ -227,7 +227,7 @@ def window_reverse(windows: tf.Tensor, window_size: int, height: int, width: int
     Merges windows to produce higher resolution features.
     """
     x = shape_list(windows)[0]
-    y = tf.cast(height * width / window_size / window_size, tf.int32)
+    y = tf.cast(height * width / (window_size * window_size), tf.int32)
     batch_size = int(x / y)
     windows = tf.reshape(
         windows, (batch_size, height // window_size, width // window_size, window_size, window_size, -1)
@@ -541,7 +541,7 @@ def call(
         attention_scores = attention_scores + tf.expand_dims(relative_position_bias, 0)
 
         if attention_mask is not None:
-            # Apply the attention mask is (precomputed for all layers in SwinModel forward() function)
+            # Apply the attention mask is (precomputed for all layers in SwinModel call() function)
             mask_shape = shape_list(attention_mask)[0]
             attention_scores = tf.reshape(
                 attention_scores, (batch_size // mask_shape, mask_shape, self.num_attention_heads, dim, dim)
@@ -647,10 +647,10 @@ def __init__(
     ) -> None:
         super().__init__(**kwargs)
         self.chunk_size_feed_forward = config.chunk_size_feed_forward
-        self.shift_size = shift_size
-        self.window_size = config.window_size
+        min_res = tf.reduce_min(input_resolution)
+        self.window_size = min_res if min_res <= config.window_size else config.window_size
+        self.shift_size = 0 if min_res <= self.window_size else shift_size
         self.input_resolution = input_resolution
-        self.set_shift_and_window_size(input_resolution)
 
         self.layernorm_before = tf.keras.layers.LayerNormalization(
             epsilon=config.layer_norm_eps, name="layernorm_before"
@@ -659,7 +659,7 @@ def __init__(
         self.drop_path = (
             TFSwinDropPath(config.drop_path_rate, name="drop_path")
             if config.drop_path_rate > 0.0
-            else tf.identity(name="drop_path")
+            else tf.keras.layers.Activation("linear", name="drop_path")
         )
         self.layernorm_after = tf.keras.layers.LayerNormalization(
             epsilon=config.layer_norm_eps, name="layernorm_after"
@@ -667,56 +667,38 @@ def __init__(
         self.intermediate = TFSwinIntermediate(config, dim, name="intermediate")
         self.swin_output = TFSwinOutput(config, dim, name="output")
 
-    def set_shift_and_window_size(self, input_resolution: Tuple[int, int]) -> None:
-        if min(input_resolution) <= self.window_size:
-            # if window size is larger than input resolution, we don't partition windows
-            self.shift_size = 0
-            self.window_size = min(input_resolution)
-
-    def get_attn_mask(self, height: int, width: int) -> Optional[tf.Tensor]:
-        if self.shift_size > 0:
-            # calculate attention mask for SW-MSA
-            img_mask = tf.zeros((height, width))
-            height_slices = (
-                (0, -self.window_size),
-                (-self.window_size, -self.shift_size),
-                (-self.shift_size, -1),
-            )
-            width_slices = (
-                (0, -self.window_size),
-                (-self.window_size, -self.shift_size),
-                (-self.shift_size, -1),
-            )
+    def get_attn_mask(self, height: int, width: int, window_size: int, shift_size: int) -> Optional[tf.Tensor]:
+        img_mask = tf.zeros((height, width))
+        height_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1))
+        width_slices = ((0, -window_size), (-window_size, -shift_size), (-shift_size, -1))
 
+        # calculate attention mask for SW-MSA
+        if shift_size > 0:
             count = 0
             for height_slice in height_slices:
                 for width_slice in width_slices:
-                    indices = [
-                        [i, j]
-                        for i in range(height_slice[0] % height, height_slice[1] % height + 1)
-                        for j in range(width_slice[0] % width, width_slice[1] % width + 1)
-                    ]
-                    if indices:
+                    height_inds = tf.range(height_slice[0] % height, height_slice[1] % height + 1)
+                    width_inds = tf.range(width_slice[0] % width, width_slice[1] % width + 1)
+                    indices = tf.reshape(tf.stack(tf.meshgrid(height_inds, width_inds), axis=-1), (-1, 2))
+                    if len(indices) >= 1:
                         updates = tf.ones((len(indices),), dtype=img_mask.dtype) * count
                         img_mask = tf.tensor_scatter_nd_update(img_mask, indices, updates)
                     count += 1
 
-            img_mask = tf.expand_dims(img_mask, -1)
-            img_mask = tf.expand_dims(img_mask, 0)
+        img_mask = tf.expand_dims(img_mask, -1)
+        img_mask = tf.expand_dims(img_mask, 0)
 
-            mask_windows = window_partition(img_mask, self.window_size)
-            mask_windows = tf.reshape(mask_windows, (-1, self.window_size * self.window_size))
-            attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2)
-            attn_mask = tf.where(attn_mask != 0, float(-100.0), attn_mask)
-            attn_mask = tf.where(attn_mask == 0, float(0.0), attn_mask)
-        else:
-            attn_mask = None
+        mask_windows = window_partition(img_mask, self.window_size)
+        mask_windows = tf.reshape(mask_windows, (-1, self.window_size * self.window_size))
+        attn_mask = tf.expand_dims(mask_windows, 1) - tf.expand_dims(mask_windows, 2)
+        attn_mask = tf.where(attn_mask != 0, float(-100.0), attn_mask)
+        attn_mask = tf.where(attn_mask == 0, float(0.0), attn_mask)
         return attn_mask
 
     def maybe_pad(self, hidden_states: tf.Tensor, height: int, width: int) -> Tuple[tf.Tensor, tf.Tensor]:
         pad_right = (self.window_size - width % self.window_size) % self.window_size
         pad_bottom = (self.window_size - height % self.window_size) % self.window_size
-        pad_values = tf.constant([[0, 0], [0, pad_bottom], [0, pad_right], [0, 0]])
+        pad_values = [[0, 0], [0, pad_bottom], [0, pad_right], [0, 0]]
         hidden_states = tf.pad(hidden_states, pad_values)
         pad_values = tf.reshape(pad_values, (-1,))
         return hidden_states, pad_values
@@ -729,7 +711,11 @@ def call(
         output_attentions: bool = False,
         training: bool = False,
     ) -> tf.Tensor:
-        self.set_shift_and_window_size(input_dimensions)
+        # if window size is larger than input resolution, we don't partition windows
+        min_res = tf.reduce_min(input_dimensions)
+        shift_size = 0 if min_res <= self.window_size else self.shift_size
+        window_size = min_res if min_res <= self.window_size else self.window_size
+
         height, width = input_dimensions
         batch_size, _, channels = shape_list(hidden_states)
         shortcut = hidden_states
@@ -741,15 +727,15 @@ def call(
 
         _, height_pad, width_pad, _ = shape_list(hidden_states)
         # cyclic shift
-        if self.shift_size > 0:
-            shifted_hidden_states = tf.roll(hidden_states, shift=(-self.shift_size, -self.shift_size), axis=(1, 2))
+        if shift_size > 0:
+            shifted_hidden_states = tf.roll(hidden_states, shift=(-shift_size, -shift_size), axis=(1, 2))
         else:
             shifted_hidden_states = hidden_states
 
         # partition windows
-        hidden_states_windows = window_partition(shifted_hidden_states, self.window_size)
-        hidden_states_windows = tf.reshape(hidden_states_windows, (-1, self.window_size * self.window_size, channels))
-        attn_mask = self.get_attn_mask(height_pad, width_pad)
+        hidden_states_windows = window_partition(shifted_hidden_states, window_size)
+        hidden_states_windows = tf.reshape(hidden_states_windows, (-1, window_size * window_size, channels))
+        attn_mask = self.get_attn_mask(height=height_pad, width=width_pad, window_size=window_size, shift_size=shift_size)
 
         attention_outputs = self.attention(
             hidden_states_windows, attn_mask, head_mask, output_attentions=output_attentions, training=training
@@ -757,12 +743,12 @@ def call(
 
         attention_output = attention_outputs[0]
 
-        attention_windows = tf.reshape(attention_output, (-1, self.window_size, self.window_size, channels))
-        shifted_windows = window_reverse(attention_windows, self.window_size, height_pad, width_pad)
+        attention_windows = tf.reshape(attention_output, (-1, window_size, window_size, channels))
+        shifted_windows = window_reverse(attention_windows, window_size, height_pad, width_pad)
 
         # reverse cyclic shift
-        if self.shift_size > 0:
-            attention_windows = tf.roll(shifted_windows, shift=(self.shift_size, self.shift_size), axis=(1, 2))
+        if shift_size > 0:
+            attention_windows = tf.roll(shifted_windows, shift=(shift_size, shift_size), axis=(1, 2))
         else:
             attention_windows = shifted_windows
 
@@ -1243,16 +1229,12 @@ def call(
         return swin_outputs
 
     def serving_output(self, output: TFSwinModelOutput) -> TFSwinModelOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-        r_hs = tf.convert_to_tensor(output.reshaped_hidden_states) if self.config.output_hidden_states else None
-
         return TFSwinModelOutput(
-            last_hidden_state=output.sequence_output,
-            pooler_output=output.pooled_output,
-            hidden_states=hs,
-            attentions=attns,
-            reshaped_hidden_states=r_hs,
+            last_hidden_state=output.last_hidden_state,
+            pooler_output=output.pooler_output,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions,
+            reshaped_hidden_states=output.reshaped_hidden_states,
         )
 
 
@@ -1410,12 +1392,11 @@ def call(
         )
 
     def serving_output(self, output: TFSwinMaskedImageModelingOutput) -> TFSwinMaskedImageModelingOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-        r_hs = tf.convert_to_tensor(output.reshaped_hidden_states) if self.config.output_hidden_states else None
-
         return TFSwinMaskedImageModelingOutput(
-            logits=output.logits, hidden_states=hs, attentions=attns, reshaped_hidden_states=r_hs
+            logits=output.logits,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions,
+            reshaped_hidden_states=output.reshaped_hidden_states,
         )
 
 
@@ -1437,7 +1418,7 @@ def __init__(self, config: SwinConfig):
         self.classifier = (
             tf.keras.layers.Dense(config.num_labels, name="classifier")
             if config.num_labels > 0
-            else tf.identity(name="classifier")
+            else tf.keras.layers.Activation("linear", name="classifier")
         )
 
     @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING)
@@ -1495,10 +1476,9 @@ def call(
         )
 
     def serving_output(self, output: TFSwinImageClassifierOutput) -> TFSwinImageClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-        r_hs = tf.convert_to_tensor(output.reshaped_hidden_states) if self.config.output_hidden_states else None
-
         return TFSwinImageClassifierOutput(
-            logits=output.logits, hidden_states=hs, attentions=attns, reshaped_hidden_states=r_hs
+            logits=output.logits,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions,
+            reshaped_hidden_states=output.reshaped_hidden_states,
         )

From 505cb774b1b7eb5c9a6c8e2bc63f12061824b8bd Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 14:30:17 +0100
Subject: [PATCH 07/22] Add in tests

---
 tests/models/bart/test_modeling_tf_bart.py    |  2 +-
 .../blenderbot/test_modeling_tf_blenderbot.py |  2 +-
 .../test_modeling_tf_blenderbot_small.py      |  2 +-
 .../models/funnel/test_modeling_tf_funnel.py  |  4 +--
 tests/models/led/test_modeling_tf_led.py      |  2 +-
 .../longformer/test_modeling_tf_longformer.py |  2 +-
 .../models/lxmert/test_modeling_tf_lxmert.py  |  2 +-
 .../models/marian/test_modeling_tf_marian.py  |  2 +-
 tests/models/mbart/test_modeling_tf_mbart.py  |  2 +-
 .../mobilebert/test_modeling_tf_mobilebert.py |  2 +-
 tests/models/opt/test_modeling_tf_opt.py      |  2 +-
 .../pegasus/test_modeling_tf_pegasus.py       |  2 +-
 tests/models/t5/test_modeling_tf_t5.py        |  2 +-
 tests/test_modeling_tf_common.py              | 31 +++++++++++++++++--
 14 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py
index 58cf515988a0..c0d9b3775585 100644
--- a/tests/models/bart/test_modeling_tf_bart.py
+++ b/tests/models/bart/test_modeling_tf_bart.py
@@ -293,8 +293,8 @@ def _get_word_embedding_weight(model, embedding_layer):
                                 models_equal = False
                     self.assertTrue(models_equal)
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
 
diff --git a/tests/models/blenderbot/test_modeling_tf_blenderbot.py b/tests/models/blenderbot/test_modeling_tf_blenderbot.py
index a8ca54558f06..9a71e5f7b1c1 100644
--- a/tests/models/blenderbot/test_modeling_tf_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_tf_blenderbot.py
@@ -213,8 +213,8 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     def test_resize_token_embeddings(self):
diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
index f8543aad59d8..3e61b54e6d21 100644
--- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
+++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
@@ -278,8 +278,8 @@ def _get_word_embedding_weight(model, embedding_layer):
                                 models_equal = False
                     self.assertTrue(models_equal)
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
 
diff --git a/tests/models/funnel/test_modeling_tf_funnel.py b/tests/models/funnel/test_modeling_tf_funnel.py
index 422985f7a6fb..52647e7493fa 100644
--- a/tests/models/funnel/test_modeling_tf_funnel.py
+++ b/tests/models/funnel/test_modeling_tf_funnel.py
@@ -371,8 +371,8 @@ def test_for_question_answering(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     def test_compile_tf_model(self):
@@ -407,6 +407,6 @@ def test_for_multiple_choice(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
diff --git a/tests/models/led/test_modeling_tf_led.py b/tests/models/led/test_modeling_tf_led.py
index 8075d071e662..1b2b69934796 100644
--- a/tests/models/led/test_modeling_tf_led.py
+++ b/tests/models/led/test_modeling_tf_led.py
@@ -365,8 +365,8 @@ def test_xla_mode(self):
         # TODO JP: Make LED XLA compliant
         pass
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     def test_generate_with_headmasking(self):
diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py
index 12c19e566e95..cde0d273d2a6 100644
--- a/tests/models/longformer/test_modeling_tf_longformer.py
+++ b/tests/models/longformer/test_modeling_tf_longformer.py
@@ -326,8 +326,8 @@ def test_for_multiple_choice(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     def test_xla_mode(self):
diff --git a/tests/models/lxmert/test_modeling_tf_lxmert.py b/tests/models/lxmert/test_modeling_tf_lxmert.py
index 7594f889189c..8ec5b661d7a3 100644
--- a/tests/models/lxmert/test_modeling_tf_lxmert.py
+++ b/tests/models/lxmert/test_modeling_tf_lxmert.py
@@ -600,8 +600,8 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     @slow
diff --git a/tests/models/marian/test_modeling_tf_marian.py b/tests/models/marian/test_modeling_tf_marian.py
index e62d7f0d35cc..8ace8feaba3d 100644
--- a/tests/models/marian/test_modeling_tf_marian.py
+++ b/tests/models/marian/test_modeling_tf_marian.py
@@ -246,8 +246,8 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     def test_resize_token_embeddings(self):
diff --git a/tests/models/mbart/test_modeling_tf_mbart.py b/tests/models/mbart/test_modeling_tf_mbart.py
index 559a44e5db6a..5a3e8238c60b 100644
--- a/tests/models/mbart/test_modeling_tf_mbart.py
+++ b/tests/models/mbart/test_modeling_tf_mbart.py
@@ -281,8 +281,8 @@ def _get_word_embedding_weight(model, embedding_layer):
                                 models_equal = False
                     self.assertTrue(models_equal)
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
 
diff --git a/tests/models/mobilebert/test_modeling_tf_mobilebert.py b/tests/models/mobilebert/test_modeling_tf_mobilebert.py
index 9db55cec2d58..17b74e811d43 100644
--- a/tests/models/mobilebert/test_modeling_tf_mobilebert.py
+++ b/tests/models/mobilebert/test_modeling_tf_mobilebert.py
@@ -306,8 +306,8 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     @slow
diff --git a/tests/models/opt/test_modeling_tf_opt.py b/tests/models/opt/test_modeling_tf_opt.py
index 287b3ce31941..19f86b82e8ae 100644
--- a/tests/models/opt/test_modeling_tf_opt.py
+++ b/tests/models/opt/test_modeling_tf_opt.py
@@ -227,8 +227,8 @@ def _get_word_embedding_weight(model, embedding_layer):
                             models_equal = False
                     self.assertTrue(models_equal)
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
 
diff --git a/tests/models/pegasus/test_modeling_tf_pegasus.py b/tests/models/pegasus/test_modeling_tf_pegasus.py
index 14fcce39a649..39dcb0a1be2a 100644
--- a/tests/models/pegasus/test_modeling_tf_pegasus.py
+++ b/tests/models/pegasus/test_modeling_tf_pegasus.py
@@ -244,8 +244,8 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     def test_resize_token_embeddings(self):
diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py
index 452bd913c499..a4b85abc2d9a 100644
--- a/tests/models/t5/test_modeling_tf_t5.py
+++ b/tests/models/t5/test_modeling_tf_t5.py
@@ -305,8 +305,8 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
+    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
     def test_saved_model_creation(self):
-        # This test is too long (>30sec) and makes fail the CI
         pass
 
     @slow
diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py
index 41b3c5ace21c..0eb022641f88 100644
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -205,6 +205,26 @@ def test_save_load_config(self):
 
             self.assert_outputs_same(after_outputs, outputs)
 
+    def test_saved_model_creation(self):
+        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
+        config.output_hidden_states = False
+        config.output_attentions = False
+
+        if hasattr(config, "use_cache"):
+            config.use_cache = False
+
+        model_class = self.all_model_classes[0]
+
+        class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
+        model = model_class(config)
+
+        model(class_inputs_dict)
+
+        with tempfile.TemporaryDirectory() as tmpdirname:
+            model.save_pretrained(tmpdirname, saved_model=True)
+            saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
+            self.assertTrue(os.path.exists(saved_model_dir))
+
     def test_prepare_serving_output(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
         config.output_hidden_states = True
@@ -212,11 +232,18 @@ def test_prepare_serving_output(self):
 
         for model_class in self.all_model_classes:
             model = model_class(config)
-            outputs = model(self._prepare_for_class(inputs_dict, model_class))
+            inputs = self._prepare_for_class(inputs_dict, model_class)
+            outputs = model(inputs)
             serving_outputs = model.serving_output(outputs)
 
             for k, v in serving_outputs.items():
-                self.assertIsInstance(v, (tf.Tensor, None), msg=f"{k} is not a Tensor or None")
+                # Check that we have one of three possible outputs: None, tuple of tensors or a tensor
+                if isinstance(v, tuple):
+                    self.assertTrue(all(isinstance(elem, tf.Tensor) for elem in v))
+                elif v is not None:
+                    self.assertIsInstance(v, tf.Tensor)
+                else:
+                    self.assertIsNone(v)
 
     def test_forward_signature(self):
         config, _ = self.model_tester.prepare_config_and_inputs_for_common()

From a86b3695d4c91b1d74460b555e205a8a4b3c3691 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 14:52:13 +0100
Subject: [PATCH 08/22] Fix funnel tests (can't convert to tensor)

---
 .../models/funnel/modeling_tf_funnel.py       | 26 ++++++++-----------
 .../models/swin/modeling_tf_swin.py           |  4 ++-
 tests/models/clip/test_modeling_tf_clip.py    |  5 ++++
 3 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py
index 04a985a06f33..2db2cb6c233c 100644
--- a/src/transformers/models/funnel/modeling_tf_funnel.py
+++ b/src/transformers/models/funnel/modeling_tf_funnel.py
@@ -1127,12 +1127,12 @@ def call(
             training=training,
         )
 
-    # Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output
     def serving_output(self, output):
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns)
+        return TFBaseModelOutput(
+            last_hidden_state=output.last_hidden_state,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions,
+        )
 
 
 @add_start_docstrings(
@@ -1393,12 +1393,10 @@ def call(
             attentions=outputs.attentions,
         )
 
-    # Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFSequenceClassifierOutput(
+            logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
+        )
 
 
 @add_start_docstrings(
@@ -1509,12 +1507,10 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput:
 
         return self.serving_output(output=output)
 
-    # Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output
     def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFMultipleChoiceModelOutput(
+            logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
+        )
 
 
 @add_start_docstrings(
diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py
index 66506764d9ef..06f46caeede1 100644
--- a/src/transformers/models/swin/modeling_tf_swin.py
+++ b/src/transformers/models/swin/modeling_tf_swin.py
@@ -735,7 +735,9 @@ def call(
         # partition windows
         hidden_states_windows = window_partition(shifted_hidden_states, window_size)
         hidden_states_windows = tf.reshape(hidden_states_windows, (-1, window_size * window_size, channels))
-        attn_mask = self.get_attn_mask(height=height_pad, width=width_pad, window_size=window_size, shift_size=shift_size)
+        attn_mask = self.get_attn_mask(
+            height=height_pad, width=width_pad, window_size=window_size, shift_size=shift_size
+        )
 
         attention_outputs = self.attention(
             hidden_states_windows, attn_mask, head_mask, output_attentions=output_attentions, training=training
diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py
index 797d5b73b349..aa22c1cc0b9d 100644
--- a/tests/models/clip/test_modeling_tf_clip.py
+++ b/tests/models/clip/test_modeling_tf_clip.py
@@ -611,6 +611,11 @@ def test_model_from_pretrained(self):
     def test_saved_model_creation_extended(self):
         pass
 
+    @unittest.skip(reason="`saved_model` doesn't work with nested outputs so no preparation happens.")
+    @slow
+    def test_prepare_serving_output(self):
+        pass
+
 
 # We will verify our results on an image of cute cats
 def prepare_img():

From cf747bfe6e0425f53b9906f9e130784197300ca2 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 15:45:18 +0100
Subject: [PATCH 09/22] Fix numpy call

---
 src/transformers/models/hubert/modeling_tf_hubert.py     | 2 +-
 src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index 7cef21261ad2..1d1857aded62 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -227,7 +227,7 @@ def _compute_mask_indices(
             f" `sequence_length`: {sequence_length}`"
         )
     # compute number of masked spans in batch
-    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform((1,)))
+    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform(size=(1,)))
     num_masked_spans = max(num_masked_spans, min_masks)
 
     # make sure num masked indices <= sequence_length
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index 0ca685f082d3..b524d87028d7 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -268,7 +268,7 @@ def _compute_mask_indices(
             f" `sequence_length`: {sequence_length}`"
         )
     # compute number of masked spans in batch
-    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform((1,)))
+    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform(size=(1,)))
     num_masked_spans = max(num_masked_spans, min_masks)
 
     # make sure num masked indices <= sequence_length

From 0ff7d8c850fbb97381cd40805adc6e1106555b20 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 16:09:45 +0100
Subject: [PATCH 10/22] Tidy up a bit

---
 .../models/convnext/modeling_tf_convnext.py     |  2 ++
 .../models/funnel/modeling_tf_funnel.py         | 16 ++++++++++++++++
 .../models/regnet/modeling_tf_regnet.py         |  2 ++
 .../models/resnet/modeling_tf_resnet.py         | 17 ++---------------
 .../models/swin/modeling_tf_swin.py             | 11 +++--------
 .../models/tapas/modeling_tf_tapas.py           |  1 -
 6 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/src/transformers/models/convnext/modeling_tf_convnext.py b/src/transformers/models/convnext/modeling_tf_convnext.py
index a6c8fd4977e2..405aeff6e0bd 100644
--- a/src/transformers/models/convnext/modeling_tf_convnext.py
+++ b/src/transformers/models/convnext/modeling_tf_convnext.py
@@ -494,6 +494,7 @@ def call(
         )
 
     def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
+        # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFBaseModelOutputWithPooling(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
@@ -594,4 +595,5 @@ def call(
         )
 
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
+        # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states)
diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py
index 2db2cb6c233c..c9165e58945d 100644
--- a/src/transformers/models/funnel/modeling_tf_funnel.py
+++ b/src/transformers/models/funnel/modeling_tf_funnel.py
@@ -1128,6 +1128,8 @@ def call(
         )
 
     def serving_output(self, output):
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFBaseModelOutput(
             last_hidden_state=output.last_hidden_state,
             hidden_states=output.hidden_states,
@@ -1176,6 +1178,8 @@ def call(
         )
 
     def serving_output(self, output):
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFBaseModelOutput(
             last_hidden_state=output.last_hidden_state,
             hidden_states=output.hidden_states,
@@ -1249,6 +1253,8 @@ def call(
         )
 
     def serving_output(self, output):
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFFunnelForPreTrainingOutput(
             logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
         )
@@ -1322,6 +1328,8 @@ def call(
         )
 
     def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFMaskedLMOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions)
 
 
@@ -1394,6 +1402,8 @@ def call(
         )
 
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFSequenceClassifierOutput(
             logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
         )
@@ -1508,6 +1518,8 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput:
         return self.serving_output(output=output)
 
     def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFMultipleChoiceModelOutput(
             logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
         )
@@ -1584,6 +1596,8 @@ def call(
         )
 
     def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFTokenClassifierOutput(
             logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
         )
@@ -1673,6 +1687,8 @@ def call(
         )
 
     def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of
+        # different dimensions
         return TFQuestionAnsweringModelOutput(
             start_logits=output.start_logits,
             end_logits=output.end_logits,
diff --git a/src/transformers/models/regnet/modeling_tf_regnet.py b/src/transformers/models/regnet/modeling_tf_regnet.py
index 07117dccc261..3ee09a529534 100644
--- a/src/transformers/models/regnet/modeling_tf_regnet.py
+++ b/src/transformers/models/regnet/modeling_tf_regnet.py
@@ -448,6 +448,7 @@ def call(
     def serving_output(
         self, output: TFBaseModelOutputWithPoolingAndNoAttention
     ) -> TFBaseModelOutputWithPoolingAndNoAttention:
+        # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFBaseModelOutputWithPoolingAndNoAttention(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
@@ -518,4 +519,5 @@ def call(
         return TFSequenceClassifierOutput(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
 
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
+        # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFSequenceClassifierOutput(logits=output.logits, hidden_states=output.hidden_states)
diff --git a/src/transformers/models/resnet/modeling_tf_resnet.py b/src/transformers/models/resnet/modeling_tf_resnet.py
index b9c839cc1392..31a41b86fd55 100644
--- a/src/transformers/models/resnet/modeling_tf_resnet.py
+++ b/src/transformers/models/resnet/modeling_tf_resnet.py
@@ -296,13 +296,6 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]:
         ]
     )
     def serving(self, inputs):
-        """
-        Method used for serving the model.
-
-        Args:
-            inputs (`Dict[str, tf.Tensor]`):
-                The input of the saved model as a dictionary of tensors.
-        """
         output = self.call(inputs)
         return self.serving_output(output)
 
@@ -434,10 +427,7 @@ def call(
     def serving_output(
         self, output: TFBaseModelOutputWithPoolingAndNoAttention
     ) -> TFBaseModelOutputWithPoolingAndNoAttention:
-        # In TF transformer models, the tuple of hidden states are normally transformed to a single tensor using:
-        # hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        # We don't return the hidden states here as they all have different dimensions so can be concatenated like
-        # this.
+        # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFBaseModelOutputWithPoolingAndNoAttention(
             last_hidden_state=output.last_hidden_state, pooler_output=output.pooler_output
         )
@@ -508,8 +498,5 @@ def call(
         return TFImageClassifierOutputWithNoAttention(loss=loss, logits=logits, hidden_states=outputs.hidden_states)
 
     def serving_output(self, output: TFImageClassifierOutputWithNoAttention) -> TFImageClassifierOutputWithNoAttention:
-        # In TF transformer models, the tuple of hidden states are normally transformed to a single tensor using:
-        # hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        # We don't return the hidden states here as they all have different dimensions so can be concatenated like
-        # this.
+        # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFImageClassifierOutputWithNoAttention(logits=output.logits)
diff --git a/src/transformers/models/swin/modeling_tf_swin.py b/src/transformers/models/swin/modeling_tf_swin.py
index 06f46caeede1..dfedd2d885d7 100644
--- a/src/transformers/models/swin/modeling_tf_swin.py
+++ b/src/transformers/models/swin/modeling_tf_swin.py
@@ -957,15 +957,7 @@ def dummy_inputs(self) -> Dict[str, tf.Tensor]:
         ]
     )
     def serving(self, inputs):
-        """
-        Method used for serving the model.
-
-        Args:
-            inputs (`Dict[str, tf.Tensor]`):
-                The input of the saved model as a dictionary of tensors.
-        """
         output = self.call(inputs)
-
         return self.serving_output(output)
 
 
@@ -1231,6 +1223,7 @@ def call(
         return swin_outputs
 
     def serving_output(self, output: TFSwinModelOutput) -> TFSwinModelOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFSwinModelOutput(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
@@ -1394,6 +1387,7 @@ def call(
         )
 
     def serving_output(self, output: TFSwinMaskedImageModelingOutput) -> TFSwinMaskedImageModelingOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFSwinMaskedImageModelingOutput(
             logits=output.logits,
             hidden_states=output.hidden_states,
@@ -1478,6 +1472,7 @@ def call(
         )
 
     def serving_output(self, output: TFSwinImageClassifierOutput) -> TFSwinImageClassifierOutput:
+        # hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFSwinImageClassifierOutput(
             logits=output.logits,
             hidden_states=output.hidden_states,
diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py
index 2a9e8490382d..cbde84e5b348 100644
--- a/src/transformers/models/tapas/modeling_tf_tapas.py
+++ b/src/transformers/models/tapas/modeling_tf_tapas.py
@@ -873,7 +873,6 @@ class TFTapasPreTrainedModel(TFPreTrainedModel):
     )
     def serving(self, inputs):
         output = self.call(inputs)
-
         return self.serving_output(output)
 
 
From 88e788805749e38ec35a15597976b9d86532b7f7 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 16:28:18 +0100
Subject: [PATCH 11/22] Add in hidden states - resnet

---
 src/transformers/models/resnet/modeling_tf_resnet.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/transformers/models/resnet/modeling_tf_resnet.py b/src/transformers/models/resnet/modeling_tf_resnet.py
index 31a41b86fd55..bed053ae404f 100644
--- a/src/transformers/models/resnet/modeling_tf_resnet.py
+++ b/src/transformers/models/resnet/modeling_tf_resnet.py
@@ -263,10 +263,7 @@ def call(
         if not return_dict:
             return tuple(v for v in [hidden_state, hidden_states] if v is not None)
 
-        return TFBaseModelOutputWithNoAttention(
-            last_hidden_state=hidden_state,
-            hidden_states=hidden_states,
-        )
+        return TFBaseModelOutputWithNoAttention(last_hidden_state=hidden_state, hidden_states=hidden_states)
 
 
 class TFResNetPreTrainedModel(TFPreTrainedModel):
@@ -429,7 +426,9 @@ def serving_output(
     ) -> TFBaseModelOutputWithPoolingAndNoAttention:
         # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFBaseModelOutputWithPoolingAndNoAttention(
-            last_hidden_state=output.last_hidden_state, pooler_output=output.pooler_output
+            last_hidden_state=output.last_hidden_state,
+            pooler_output=output.pooler_output,
+            hidden_states=output.hidden_states,
         )
 
 
@@ -499,4 +498,4 @@ def call(
 
     def serving_output(self, output: TFImageClassifierOutputWithNoAttention) -> TFImageClassifierOutputWithNoAttention:
         # hidden_states not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
-        return TFImageClassifierOutputWithNoAttention(logits=output.logits)
+        return TFImageClassifierOutputWithNoAttention(logits=output.logits, hidden_states=output.hidden_states)

From 899c76cf641f4509283c91e45b155253248ca9d9 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 17:27:22 +0100
Subject: [PATCH 12/22] Remove numpy

---
 .../models/wav2vec2/modeling_tf_wav2vec2.py          | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index b524d87028d7..b04a59e9035e 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -268,12 +268,16 @@ def _compute_mask_indices(
             f" `sequence_length`: {sequence_length}`"
         )
     # compute number of masked spans in batch
-    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform(size=(1,)))
-    num_masked_spans = max(num_masked_spans, min_masks)
+    num_masked_spans = mask_prob * sequence_length / mask_length + tf.random.uniform((1,))
+    num_masked_spans = tf.maximum(num_masked_spans, min_masks)
+    num_masked_spans = tf.cast(num_masked_spans, tf.int32)
 
     # make sure num masked indices <= sequence_length
-    if num_masked_spans * mask_length > sequence_length:
-        num_masked_spans = sequence_length // mask_length
+    num_masked_spans = tf.cond(
+        num_masked_spans * mask_length > sequence_length,
+        true_fn=lambda: sequence_length // mask_length,
+        false_fn=lambda: num_masked_spans,
+    )
 
     # SpecAugment mask to fill
     spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32)

From 57cf29ab992cacda923b3722889d48326b680b68 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 18:31:27 +0100
Subject: [PATCH 13/22] Fix failing tests - tensor shape and skipping tests

---
 .../models/hubert/modeling_tf_hubert.py             | 13 +++++++++----
 .../models/wav2vec2/modeling_tf_wav2vec2.py         |  1 +
 tests/models/clip/test_modeling_tf_clip.py          |  5 +++++
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index 1d1857aded62..cd4d037a35e6 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -227,12 +227,17 @@ def _compute_mask_indices(
             f" `sequence_length`: {sequence_length}`"
         )
     # compute number of masked spans in batch
-    num_masked_spans = int(mask_prob * sequence_length / mask_length + np.random.uniform(size=(1,)))
-    num_masked_spans = max(num_masked_spans, min_masks)
+    num_masked_spans = mask_prob * sequence_length / mask_length + tf.random.uniform((1,))
+    num_masked_spans = tf.maximum(num_masked_spans, min_masks)
+    num_masked_spans = tf.cast(num_masked_spans, tf.int32)
 
     # make sure num masked indices <= sequence_length
-    if num_masked_spans * mask_length > sequence_length:
-        num_masked_spans = sequence_length // mask_length
+    num_masked_spans = tf.cond(
+        num_masked_spans * mask_length > sequence_length,
+        true_fn=lambda: sequence_length // mask_length,
+        false_fn=lambda: num_masked_spans,
+    )
+    num_masked_spans = tf.squeeze(num_masked_spans)
 
     # SpecAugment mask to fill
     spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32)
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index b04a59e9035e..9348fbfa5706 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -278,6 +278,7 @@ def _compute_mask_indices(
         true_fn=lambda: sequence_length // mask_length,
         false_fn=lambda: num_masked_spans,
     )
+    num_masked_spans = tf.squeeze(num_masked_spans)
 
     # SpecAugment mask to fill
     spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32)
diff --git a/tests/models/clip/test_modeling_tf_clip.py b/tests/models/clip/test_modeling_tf_clip.py
index aa22c1cc0b9d..05b4c7920ebd 100644
--- a/tests/models/clip/test_modeling_tf_clip.py
+++ b/tests/models/clip/test_modeling_tf_clip.py
@@ -606,6 +606,11 @@ def test_model_from_pretrained(self):
             model = TFCLIPModel.from_pretrained(model_name)
             self.assertIsNotNone(model)
 
+    @unittest.skip(reason="Currently `saved_model` doesn't work with nested outputs.")
+    @slow
+    def test_saved_model_creation(self):
+        pass
+
     @unittest.skip(reason="Currently `saved_model` doesn't work with nested outputs.")
     @slow
     def test_saved_model_creation_extended(self):

From 1a378c36b50a5680b5db83a22fa22c414872c2b3 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Wed, 20 Jul 2022 19:35:21 +0100
Subject: [PATCH 14/22] Remove duplicated function

---
 tests/test_modeling_tf_common.py     |  1 +
 tests/utils/test_modeling_tf_core.py | 21 ---------------------
 2 files changed, 1 insertion(+), 21 deletions(-)

diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py
index 0eb022641f88..380cc4114c19 100644
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -205,6 +205,7 @@ def test_save_load_config(self):
 
             self.assert_outputs_same(after_outputs, outputs)
 
+    @slow
     def test_saved_model_creation(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
         config.output_hidden_states = False
diff --git a/tests/utils/test_modeling_tf_core.py b/tests/utils/test_modeling_tf_core.py
index d1683d69cf7a..1923903b0337 100644
--- a/tests/utils/test_modeling_tf_core.py
+++ b/tests/utils/test_modeling_tf_core.py
@@ -201,27 +201,6 @@ def test_xla_fit(self):
                 val_loss = history.history["val_loss"][0]
                 self.assertTrue(not isnan(val_loss))
 
-    @slow
-    def test_saved_model_creation(self):
-        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
-        config.output_hidden_states = False
-        config.output_attentions = False
-
-        if hasattr(config, "use_cache"):
-            config.use_cache = False
-
-        model_class = self.all_model_classes[0]
-
-        class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
-        model = model_class(config)
-
-        model(class_inputs_dict)
-
-        with tempfile.TemporaryDirectory() as tmpdirname:
-            model.save_pretrained(tmpdirname, saved_model=True)
-            saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
-            self.assertTrue(os.path.exists(saved_model_dir))
-
     @slow
     def test_saved_model_creation_extended(self):
         config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

From 5178ab806f5a439dc807cbd6db54b3ff939789d3 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Thu, 21 Jul 2022 11:06:51 +0100
Subject: [PATCH 15/22] PR comments - formatting and var names

---
 .../data2vec/modeling_tf_data2vec_vision.py   | 24 +++++++--------
 .../models/deit/modeling_tf_deit.py           | 28 +++++++++---------
 .../models/hubert/modeling_tf_hubert.py       | 15 +++++-----
 .../modeling_tf_speech_to_text.py             |  5 ++--
 .../models/tapas/modeling_tf_tapas.py         | 29 ++++++++++---------
 .../models/vit_mae/modeling_tf_vit_mae.py     | 18 +++++++-----
 .../models/wav2vec2/modeling_tf_wav2vec2.py   | 14 ++++-----
 7 files changed, 68 insertions(+), 65 deletions(-)

diff --git a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
index 83ab706c9c47..e09cbfb9c42a 100644
--- a/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
+++ b/src/transformers/models/data2vec/modeling_tf_data2vec_vision.py
@@ -911,14 +911,14 @@ def call(
         return outputs
 
     def serving_output(self, output: TFData2VecVisionModelOutputWithPooling) -> TFData2VecVisionModelOutputWithPooling:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFData2VecVisionModelOutputWithPooling(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
-            hidden_states=hs,
-            attentions=attns,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
 
 
@@ -995,10 +995,10 @@ def call(
         )
 
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
 
 
 class TFData2VecVisionConvModule(tf.keras.layers.Layer):
@@ -1462,11 +1462,7 @@ def reshape_features(x):
         )
 
     def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFSemanticSegmenterOutput(
-            logits=output.logits,
-            hidden_states=hs,
-            attentions=attns,
-        )
+        return TFSemanticSegmenterOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
diff --git a/src/transformers/models/deit/modeling_tf_deit.py b/src/transformers/models/deit/modeling_tf_deit.py
index c96147795596..7cf041eef445 100644
--- a/src/transformers/models/deit/modeling_tf_deit.py
+++ b/src/transformers/models/deit/modeling_tf_deit.py
@@ -680,14 +680,14 @@ def call(
         return outputs
 
     def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFBaseModelOutputWithPooling(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
-            hidden_states=hs,
-            attentions=attns,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
 
 
@@ -865,10 +865,10 @@ def call(
         )
 
     def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFMaskedLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
 
 
 @add_start_docstrings(
@@ -968,10 +968,10 @@ def call(
         )
 
     def serving_output(self, output: TFImageClassifierOutput) -> TFImageClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFImageClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFImageClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
 
 
 @add_start_docstrings(
@@ -1057,13 +1057,13 @@ def call(
     def serving_output(
         self, output: TFDeiTForImageClassificationWithTeacherOutput
     ) -> TFDeiTForImageClassificationWithTeacherOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFDeiTForImageClassificationWithTeacherOutput(
             logits=output.logits,
             cls_logits=output.cls_logits,
             distillation_logits=output.distillation_logits,
-            hidden_states=hs,
-            attentions=attns,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index cd4d037a35e6..d17e3cf9ce8d 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -1524,10 +1524,11 @@ def call(
         return outputs
 
     def serving_output(self, output):
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-
-        return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns)
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        return TFBaseModelOutput(
+            last_hidden_state=output.last_hidden_state, hidden_states=hidden_states, attentions=attentions
+        )
 
 
 @add_start_docstrings(
@@ -1698,6 +1699,6 @@ def call(
         )
 
     def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-        return TFCausalLMOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        return TFCausalLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
index efd46f9195c0..a343fef9b86c 100755
--- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
@@ -143,9 +143,8 @@ def __init__(self, config: Speech2TextConfig, **kwargs):
         ]
 
     def call(self, input_features: tf.Tensor) -> tf.Tensor:
-        hidden_states = tf.cast(
-            input_features, tf.float32
-        )  # TF Conv1D assumes Batch x Time x Channels, same as the input
+        # TF Conv1D assumes Batch x Time x Channels, same as the input
+        hidden_states = tf.cast(input_features, tf.float32)
         for i, conv in enumerate(self.conv_layers):
             # equivalent to `padding=k // 2` on PT's `nn.Conv1d`
             pad_len = self.kernel_sizes[i] // 2
diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py
index cbde84e5b348..8d9b7f32e53e 100644
--- a/src/transformers/models/tapas/modeling_tf_tapas.py
+++ b/src/transformers/models/tapas/modeling_tf_tapas.py
@@ -1034,14 +1034,14 @@ def call(
         return outputs
 
     def serving_output(self, output: TFBaseModelOutputWithPooling) -> TFBaseModelOutputWithPooling:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFBaseModelOutputWithPooling(
             last_hidden_state=output.last_hidden_state,
             pooler_output=output.pooler_output,
-            hidden_states=hs,
-            attentions=attns,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
 
 
@@ -1141,10 +1141,10 @@ def call(
         )
 
     def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFMaskedLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
 
 
 class TFTapasComputeTokenLogits(tf.keras.layers.Layer):
@@ -1570,11 +1570,14 @@ def call(
         )
 
     def serving_output(self, output: TFTableQuestionAnsweringOutput) -> TFTableQuestionAnsweringOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFTableQuestionAnsweringOutput(
-            logits=output.logits, logits_aggregation=output.logits_aggregation, hidden_states=hs, attentions=attns
+            logits=output.logits,
+            logits_aggregation=output.logits_aggregation,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
 
 
@@ -1680,10 +1683,10 @@ def call(
         )
 
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
-        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)
 
 
 """ TAPAS utilities."""
diff --git a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py
index 0f64a3b6f8e5..d43bfa45b1fb 100644
--- a/src/transformers/models/vit_mae/modeling_tf_vit_mae.py
+++ b/src/transformers/models/vit_mae/modeling_tf_vit_mae.py
@@ -843,15 +843,15 @@ def call(
         return outputs
 
     def serving_output(self, output: TFViTMAEModelOutput) -> TFViTMAEModelOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFViTMAEModelOutput(
             last_hidden_state=output.last_hidden_state,
             mask=output.mask,
             ids_restore=output.ids_restore,
-            hidden_states=hs,
-            attentions=attns,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
 
 
@@ -1157,9 +1157,13 @@ def call(
         )
 
     def serving_output(self, output: TFViTMAEForPreTrainingOutput) -> TFViTMAEForPreTrainingOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFViTMAEForPreTrainingOutput(
-            logits=output.logits, mask=output.mask, ids_restore=output.ids_restore, hidden_states=hs, attentions=attns
+            logits=output.logits,
+            mask=output.mask,
+            ids_restore=output.ids_restore,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index 9348fbfa5706..b01356b69444 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -1557,14 +1557,14 @@ def call(
         return outputs
 
     def serving_output(self, output):
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
 
         return TFWav2Vec2BaseModelOutput(
             last_hidden_state=output.last_hidden_state,
             extract_features=output.extract_features,
-            hidden_states=hs,
-            attentions=attns,
+            hidden_states=hidden_states,
+            attentions=attentions,
         )
 
 
@@ -1739,6 +1739,6 @@ def call(
         )
 
     def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput:
-        hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
-        attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
-        return TFCausalLMOutput(logits=output.logits, hidden_states=hs, attentions=attns)
+        hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None
+        attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None
+        return TFCausalLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions)

From 6f0aa42bdcc08ff3fd5c9d1a00d7c9e83172bd97 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Thu, 21 Jul 2022 15:39:22 +0100
Subject: [PATCH 16/22] PR comments Add suggestions made by Joao Gante: * Use
 tf.shape instead of shape_list * Use @tooslow decorator on tests * Simplify
 some of the logic

---
 src/transformers/models/hubert/modeling_tf_hubert.py |  8 ++------
 .../speech_to_text/modeling_tf_speech_to_text.py     | 12 ++++++------
 .../models/wav2vec2/modeling_tf_wav2vec2.py          |  8 ++------
 tests/models/bart/test_modeling_tf_bart.py           |  2 +-
 .../models/blenderbot/test_modeling_tf_blenderbot.py |  2 +-
 .../test_modeling_tf_blenderbot_small.py             |  2 +-
 tests/models/funnel/test_modeling_tf_funnel.py       |  6 +++---
 tests/models/led/test_modeling_tf_led.py             |  4 ++--
 .../models/longformer/test_modeling_tf_longformer.py |  2 +-
 tests/models/lxmert/test_modeling_tf_lxmert.py       |  4 ++--
 tests/models/marian/test_modeling_tf_marian.py       |  2 +-
 tests/models/mbart/test_modeling_tf_mbart.py         |  4 ++--
 .../models/mobilebert/test_modeling_tf_mobilebert.py |  4 ++--
 tests/models/opt/test_modeling_tf_opt.py             |  4 ++--
 tests/models/pegasus/test_modeling_tf_pegasus.py     |  4 ++--
 tests/models/t5/test_modeling_tf_t5.py               |  4 ++--
 16 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index d17e3cf9ce8d..52b0b1471f3e 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -232,11 +232,7 @@ def _compute_mask_indices(
     num_masked_spans = tf.cast(num_masked_spans, tf.int32)
 
     # make sure num masked indices <= sequence_length
-    num_masked_spans = tf.cond(
-        num_masked_spans * mask_length > sequence_length,
-        true_fn=lambda: sequence_length // mask_length,
-        false_fn=lambda: num_masked_spans,
-    )
+    num_masked_spans = tf.math.minimum(sequence_length // mask_length, num_masked_spans)
     num_masked_spans = tf.squeeze(num_masked_spans)
 
     # SpecAugment mask to fill
@@ -261,7 +257,7 @@ def _compute_mask_indices(
 
     # scatter indices to mask
     spec_aug_mask = _scatter_values_on_batch_indices(
-        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, shape_list(spec_aug_mask)
+        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, tf.shape(spec_aug_mask)
     )
 
     return spec_aug_mask
diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
index a343fef9b86c..805d59bd94b7 100755
--- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
@@ -189,7 +189,7 @@ def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: Optiona
             emb = tf.concat([emb, tf.zeros(num_embeddings, 1)], axis=1)
         if padding_idx is not None:
             emb = tf.concat(
-                [emb[:padding_idx, :], tf.zeros((1, shape_list(emb)[1])), emb[padding_idx + 1 :, :]], axis=0
+                [emb[:padding_idx, :], tf.zeros((1, tf.shape(emb)[1])), emb[padding_idx + 1 :, :]], axis=0
             )
         return emb
 
@@ -200,7 +200,7 @@ def build(self, input_shape: tf.TensorShape):
         """
         self.embeddings = self.add_weight(
             name="weights",  # name also used in PT
-            shape=shape_list(self.embedding_weights),
+            shape=tf.shape(self.embedding_weights),
             trainable=False,
         )
         self.embeddings.assign(self.embedding_weights)
@@ -779,7 +779,7 @@ def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor):
     def _get_feature_vector_attention_mask(self, feature_vector_length, attention_mask):
         # generate creates 3D attention mask, because of the shape of input_features
         # convert it to 2D if thats the case
-        if len(shape_list(attention_mask)) > 2:
+        if len(attention_mask.shape) > 2:
             attention_mask = attention_mask[:, :, -1]
 
         subsampled_lengths = self._get_feat_extract_output_lengths(tf.math.reduce_sum(attention_mask, -1))
@@ -844,10 +844,10 @@ def call(
 
         # subsample attention mask if necessary
         if attention_mask is not None:
-            attention_mask = self._get_feature_vector_attention_mask(shape_list(inputs_embeds)[1], attention_mask)
+            attention_mask = self._get_feature_vector_attention_mask(tf.shape(inputs_embeds)[1], attention_mask)
             padding_mask = tf.cast(tf.math.not_equal(attention_mask, 1), tf.int64)
         else:
-            padding_mask = tf.zeros(shape_list(inputs_embeds)[:-1], dtype=tf.int64)
+            padding_mask = tf.zeros(tf.shape(inputs_embeds)[:-1], dtype=tf.int64)
 
         embed_pos = self.embed_positions(padding_mask)
 
@@ -1193,7 +1193,7 @@ def call(
         # downsample encoder attention mask
         if attention_mask is not None:
             encoder_attention_mask = self.encoder._get_feature_vector_attention_mask(
-                shape_list(encoder_outputs[0])[1], attention_mask
+                tf.shape(encoder_outputs[0])[1], attention_mask
             )
         else:
             encoder_attention_mask = None
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index b01356b69444..4b9e33d0b800 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -273,11 +273,7 @@ def _compute_mask_indices(
     num_masked_spans = tf.cast(num_masked_spans, tf.int32)
 
     # make sure num masked indices <= sequence_length
-    num_masked_spans = tf.cond(
-        num_masked_spans * mask_length > sequence_length,
-        true_fn=lambda: sequence_length // mask_length,
-        false_fn=lambda: num_masked_spans,
-    )
+    num_masked_spans = tf.math.minimum(sequence_length // mask_length, num_masked_spans)
     num_masked_spans = tf.squeeze(num_masked_spans)
 
     # SpecAugment mask to fill
@@ -302,7 +298,7 @@ def _compute_mask_indices(
 
     # scatter indices to mask
     spec_aug_mask = _scatter_values_on_batch_indices(
-        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, shape_list(spec_aug_mask)
+        tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, tf.shape(spec_aug_mask)
     )
 
     return spec_aug_mask
diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py
index c0d9b3775585..95055708850d 100644
--- a/tests/models/bart/test_modeling_tf_bart.py
+++ b/tests/models/bart/test_modeling_tf_bart.py
@@ -293,7 +293,7 @@ def _get_word_embedding_weight(model, embedding_layer):
                                 models_equal = False
                     self.assertTrue(models_equal)
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/blenderbot/test_modeling_tf_blenderbot.py b/tests/models/blenderbot/test_modeling_tf_blenderbot.py
index 9a71e5f7b1c1..dfbb628c361a 100644
--- a/tests/models/blenderbot/test_modeling_tf_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_tf_blenderbot.py
@@ -213,7 +213,7 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
index 3e61b54e6d21..875ab6a41c44 100644
--- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
+++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
@@ -278,7 +278,7 @@ def _get_word_embedding_weight(model, embedding_layer):
                                 models_equal = False
                     self.assertTrue(models_equal)
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/funnel/test_modeling_tf_funnel.py b/tests/models/funnel/test_modeling_tf_funnel.py
index 52647e7493fa..faeb9a799510 100644
--- a/tests/models/funnel/test_modeling_tf_funnel.py
+++ b/tests/models/funnel/test_modeling_tf_funnel.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import FunnelConfig, is_tf_available
-from transformers.testing_utils import require_tf
+from transformers.testing_utils import require_tf, tooslow
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
@@ -371,7 +371,7 @@ def test_for_question_answering(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_question_answering(*config_and_inputs)
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
@@ -407,6 +407,6 @@ def test_for_multiple_choice(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
diff --git a/tests/models/led/test_modeling_tf_led.py b/tests/models/led/test_modeling_tf_led.py
index 1b2b69934796..dfdb66606faf 100644
--- a/tests/models/led/test_modeling_tf_led.py
+++ b/tests/models/led/test_modeling_tf_led.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import LEDConfig, is_tf_available
-from transformers.testing_utils import require_tf, slow
+from transformers.testing_utils import require_tf, slow, tooslow
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor
@@ -365,7 +365,7 @@ def test_xla_mode(self):
         # TODO JP: Make LED XLA compliant
         pass
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py
index cde0d273d2a6..02a35e48fe5f 100644
--- a/tests/models/longformer/test_modeling_tf_longformer.py
+++ b/tests/models/longformer/test_modeling_tf_longformer.py
@@ -326,7 +326,7 @@ def test_for_multiple_choice(self):
         config_and_inputs = self.model_tester.prepare_config_and_inputs()
         self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/lxmert/test_modeling_tf_lxmert.py b/tests/models/lxmert/test_modeling_tf_lxmert.py
index 8ec5b661d7a3..73eda47eb950 100644
--- a/tests/models/lxmert/test_modeling_tf_lxmert.py
+++ b/tests/models/lxmert/test_modeling_tf_lxmert.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 from transformers import LxmertConfig, is_tf_available
-from transformers.testing_utils import require_tf, slow
+from transformers.testing_utils import require_tf, slow, tooslow
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
@@ -600,7 +600,7 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/marian/test_modeling_tf_marian.py b/tests/models/marian/test_modeling_tf_marian.py
index 8ace8feaba3d..eb54e0c66f00 100644
--- a/tests/models/marian/test_modeling_tf_marian.py
+++ b/tests/models/marian/test_modeling_tf_marian.py
@@ -246,7 +246,7 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/mbart/test_modeling_tf_mbart.py b/tests/models/mbart/test_modeling_tf_mbart.py
index 5a3e8238c60b..b1bdb40cf79f 100644
--- a/tests/models/mbart/test_modeling_tf_mbart.py
+++ b/tests/models/mbart/test_modeling_tf_mbart.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import AutoTokenizer, MBartConfig, is_tf_available
-from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
+from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
@@ -281,7 +281,7 @@ def _get_word_embedding_weight(model, embedding_layer):
                                 models_equal = False
                     self.assertTrue(models_equal)
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/mobilebert/test_modeling_tf_mobilebert.py b/tests/models/mobilebert/test_modeling_tf_mobilebert.py
index 17b74e811d43..1800cd3ca143 100644
--- a/tests/models/mobilebert/test_modeling_tf_mobilebert.py
+++ b/tests/models/mobilebert/test_modeling_tf_mobilebert.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import MobileBertConfig, is_tf_available
-from transformers.testing_utils import require_tf, slow
+from transformers.testing_utils import require_tf, slow, tooslow
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
@@ -306,7 +306,7 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/opt/test_modeling_tf_opt.py b/tests/models/opt/test_modeling_tf_opt.py
index 19f86b82e8ae..61d6aad53fc1 100644
--- a/tests/models/opt/test_modeling_tf_opt.py
+++ b/tests/models/opt/test_modeling_tf_opt.py
@@ -18,7 +18,7 @@
 import numpy as np
 
 from transformers import OPTConfig, is_tf_available
-from transformers.testing_utils import require_sentencepiece, require_tf, slow
+from transformers.testing_utils import require_sentencepiece, require_tf, slow, tooslow
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor
@@ -227,7 +227,7 @@ def _get_word_embedding_weight(model, embedding_layer):
                             models_equal = False
                     self.assertTrue(models_equal)
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/pegasus/test_modeling_tf_pegasus.py b/tests/models/pegasus/test_modeling_tf_pegasus.py
index 39dcb0a1be2a..c26b25fc55e0 100644
--- a/tests/models/pegasus/test_modeling_tf_pegasus.py
+++ b/tests/models/pegasus/test_modeling_tf_pegasus.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import AutoTokenizer, PegasusConfig, is_tf_available
-from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
+from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
@@ -244,7 +244,7 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 
diff --git a/tests/models/t5/test_modeling_tf_t5.py b/tests/models/t5/test_modeling_tf_t5.py
index a4b85abc2d9a..297c921a3d4f 100644
--- a/tests/models/t5/test_modeling_tf_t5.py
+++ b/tests/models/t5/test_modeling_tf_t5.py
@@ -16,7 +16,7 @@
 import unittest
 
 from transformers import T5Config, is_tf_available
-from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
+from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
@@ -305,7 +305,7 @@ def test_model_common_attributes(self):
                 name = model.get_bias()
                 assert name is None
 
-    @unittest.skip(reason="Test takes too long to run (>30sec) causing the CI to fail.")
+    @tooslow
     def test_saved_model_creation(self):
         pass
 

From e9486db0a881b976305faf0723d9a03add7c54d4 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Thu, 21 Jul 2022 16:39:09 +0100
Subject: [PATCH 17/22] PR comments Address Yih-Dar Sheih comments - making
 tensor names consistent and make types float

---
 src/transformers/models/funnel/modeling_tf_funnel.py        | 6 +++---
 src/transformers/models/hubert/modeling_tf_hubert.py        | 6 +++---
 .../models/speech_to_text/modeling_tf_speech_to_text.py     | 2 +-
 src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py    | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py
index c9165e58945d..9d2b6852413b 100644
--- a/src/transformers/models/funnel/modeling_tf_funnel.py
+++ b/src/transformers/models/funnel/modeling_tf_funnel.py
@@ -1506,9 +1506,9 @@ def call(
     @tf.function(
         input_signature=[
             {
-                "input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
-                "attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"),
-                "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
+                "input_ids": tf.TensorSpec((None, None), tf.float32, name="input_ids"),
+                "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.float32, name="token_type_ids"),
             }
         ]
     )
diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index 52b0b1471f3e..36f89840f3ce 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -1323,9 +1323,9 @@ def __init__(self, config, *inputs, **kwargs):
     @tf.function(
         input_signature=[
             {
-                "input_values": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
-                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
-                "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
+                "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"),
+                "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.float32, name="token_type_ids"),
             }
         ]
     )
diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
index 805d59bd94b7..dc8bd9a34b7e 100755
--- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
@@ -608,7 +608,7 @@ def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor):
     @tf.function(
         input_signature=[
             {
-                "input_features": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"),
+                "input_features": tf.TensorSpec((None, None, None), tf.int32, name="input_features"),
                 "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
                 "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
                 "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index 4b9e33d0b800..60bf0199c5cc 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -1356,9 +1356,9 @@ def __init__(self, config, *inputs, **kwargs):
     @tf.function(
         input_signature=[
             {
-                "input_values": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
-                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
-                "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
+                "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"),
+                "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.float32, name="token_type_ids"),
             }
         ]
     )

From db264ea87587cb17ac16735b0fe7d99901c54e8a Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Thu, 21 Jul 2022 17:26:53 +0100
Subject: [PATCH 18/22] Types consistent with docs; disable test on swin (slow)

---
 src/transformers/models/funnel/modeling_tf_funnel.py        | 4 ++--
 src/transformers/models/hubert/modeling_tf_hubert.py        | 4 ++--
 .../models/speech_to_text/modeling_tf_speech_to_text.py     | 4 +---
 src/transformers/models/tapas/modeling_tf_tapas.py          | 2 +-
 src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py    | 4 ++--
 tests/models/bart/test_modeling_tf_bart.py                  | 2 +-
 tests/models/blenderbot/test_modeling_tf_blenderbot.py      | 2 +-
 .../blenderbot_small/test_modeling_tf_blenderbot_small.py   | 2 +-
 tests/models/longformer/test_modeling_tf_longformer.py      | 2 +-
 tests/models/marian/test_modeling_tf_marian.py              | 2 +-
 tests/models/swin/test_modeling_tf_swin.py                  | 6 +++++-
 11 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/transformers/models/funnel/modeling_tf_funnel.py b/src/transformers/models/funnel/modeling_tf_funnel.py
index 9d2b6852413b..ba74871dd1d0 100644
--- a/src/transformers/models/funnel/modeling_tf_funnel.py
+++ b/src/transformers/models/funnel/modeling_tf_funnel.py
@@ -1506,9 +1506,9 @@ def call(
     @tf.function(
         input_signature=[
             {
-                "input_ids": tf.TensorSpec((None, None), tf.float32, name="input_ids"),
+                "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
                 "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
-                "token_type_ids": tf.TensorSpec((None, None), tf.float32, name="token_type_ids"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
             }
         ]
     )
diff --git a/src/transformers/models/hubert/modeling_tf_hubert.py b/src/transformers/models/hubert/modeling_tf_hubert.py
index 36f89840f3ce..fc6e5b13d408 100644
--- a/src/transformers/models/hubert/modeling_tf_hubert.py
+++ b/src/transformers/models/hubert/modeling_tf_hubert.py
@@ -1324,8 +1324,8 @@ def __init__(self, config, *inputs, **kwargs):
         input_signature=[
             {
                 "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"),
-                "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
-                "token_type_ids": tf.TensorSpec((None, None), tf.float32, name="token_type_ids"),
+                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
             }
         ]
     )
diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
index dc8bd9a34b7e..3f31ade5e26d 100755
--- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
@@ -188,9 +188,7 @@ def _get_embedding(num_embeddings: int, embedding_dim: int, padding_idx: Optiona
             # zero pad
             emb = tf.concat([emb, tf.zeros(num_embeddings, 1)], axis=1)
         if padding_idx is not None:
-            emb = tf.concat(
-                [emb[:padding_idx, :], tf.zeros((1, tf.shape(emb)[1])), emb[padding_idx + 1 :, :]], axis=0
-            )
+            emb = tf.concat([emb[:padding_idx, :], tf.zeros((1, tf.shape(emb)[1])), emb[padding_idx + 1 :, :]], axis=0)
         return emb
 
     def build(self, input_shape: tf.TensorShape):
diff --git a/src/transformers/models/tapas/modeling_tf_tapas.py b/src/transformers/models/tapas/modeling_tf_tapas.py
index 8d9b7f32e53e..93d98914f1f3 100644
--- a/src/transformers/models/tapas/modeling_tf_tapas.py
+++ b/src/transformers/models/tapas/modeling_tf_tapas.py
@@ -866,7 +866,7 @@ class TFTapasPreTrainedModel(TFPreTrainedModel):
         input_signature=[
             {
                 "input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"),
-                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
+                "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
                 "token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"),
             }
         ]
diff --git a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
index 60bf0199c5cc..fed0414863a5 100644
--- a/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+++ b/src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
@@ -1357,8 +1357,8 @@ def __init__(self, config, *inputs, **kwargs):
         input_signature=[
             {
                 "input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"),
-                "attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"),
-                "token_type_ids": tf.TensorSpec((None, None), tf.float32, name="token_type_ids"),
+                "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
+                "token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"),
             }
         ]
     )
diff --git a/tests/models/bart/test_modeling_tf_bart.py b/tests/models/bart/test_modeling_tf_bart.py
index 95055708850d..5e5c5ee592a1 100644
--- a/tests/models/bart/test_modeling_tf_bart.py
+++ b/tests/models/bart/test_modeling_tf_bart.py
@@ -18,7 +18,7 @@
 import numpy as np
 
 from transformers import BartConfig, BartTokenizer, is_tf_available
-from transformers.testing_utils import require_tf, slow
+from transformers.testing_utils import require_tf, slow, tooslow
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
diff --git a/tests/models/blenderbot/test_modeling_tf_blenderbot.py b/tests/models/blenderbot/test_modeling_tf_blenderbot.py
index dfbb628c361a..7b974cbe326a 100644
--- a/tests/models/blenderbot/test_modeling_tf_blenderbot.py
+++ b/tests/models/blenderbot/test_modeling_tf_blenderbot.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import BlenderbotConfig, BlenderbotTokenizer, is_tf_available
-from transformers.testing_utils import require_tf, require_tokenizers, slow
+from transformers.testing_utils import require_tf, require_tokenizers, slow, tooslow
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
diff --git a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
index 875ab6a41c44..0b8d6132a20a 100644
--- a/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
+++ b/tests/models/blenderbot_small/test_modeling_tf_blenderbot_small.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import BlenderbotSmallConfig, BlenderbotSmallTokenizer, is_tf_available
-from transformers.testing_utils import require_tf, require_tokenizers, slow
+from transformers.testing_utils import require_tf, require_tokenizers, slow, tooslow
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
diff --git a/tests/models/longformer/test_modeling_tf_longformer.py b/tests/models/longformer/test_modeling_tf_longformer.py
index 02a35e48fe5f..cc62bb6caf70 100644
--- a/tests/models/longformer/test_modeling_tf_longformer.py
+++ b/tests/models/longformer/test_modeling_tf_longformer.py
@@ -17,7 +17,7 @@
 import unittest
 
 from transformers import is_tf_available
-from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
+from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow
 
 from ...test_configuration_common import ConfigTester
 from ...test_modeling_tf_common import TFModelTesterMixin, ids_tensor, random_attention_mask
diff --git a/tests/models/marian/test_modeling_tf_marian.py b/tests/models/marian/test_modeling_tf_marian.py
index eb54e0c66f00..e8d65e0ad0ea 100644
--- a/tests/models/marian/test_modeling_tf_marian.py
+++ b/tests/models/marian/test_modeling_tf_marian.py
@@ -19,7 +19,7 @@
 import warnings
 
 from transformers import AutoTokenizer, MarianConfig, MarianTokenizer, TranslationPipeline, is_tf_available
-from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow
+from transformers.testing_utils import require_sentencepiece, require_tf, require_tokenizers, slow, tooslow
 from transformers.utils import cached_property
 
 from ...test_configuration_common import ConfigTester
diff --git a/tests/models/swin/test_modeling_tf_swin.py b/tests/models/swin/test_modeling_tf_swin.py
index 94ca1ac2ba86..be5861ce48b4 100644
--- a/tests/models/swin/test_modeling_tf_swin.py
+++ b/tests/models/swin/test_modeling_tf_swin.py
@@ -21,7 +21,7 @@
 import numpy as np
 
 from transformers import SwinConfig
-from transformers.testing_utils import require_tf, require_vision, slow, to_2tuple
+from transformers.testing_utils import require_tf, require_vision, slow, to_2tuple, tooslow
 from transformers.utils import cached_property, is_tf_available, is_vision_available
 
 from ...test_configuration_common import ConfigTester
@@ -225,6 +225,10 @@ def test_for_image_classification(self):
     def test_inputs_embeds(self):
         pass
 
+    @tooslow
+    def test_saved_model_creation(self):
+        pass
+
     def test_model_common_attributes(self):
         config, _ = self.model_tester.prepare_config_and_inputs_for_common()
 

From 006d9e55981e9e731007f798240ab4eccb84daf8 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Thu, 21 Jul 2022 18:23:58 +0100
Subject: [PATCH 19/22] CI trigger


From b6e7d06741c59f22f18e5da879ddc684aa79e2e8 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Thu, 21 Jul 2022 18:41:10 +0100
Subject: [PATCH 20/22] Change input_features to float32

---
 .../models/speech_to_text/modeling_tf_speech_to_text.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
index 3f31ade5e26d..2e8c4cddd20b 100755
--- a/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
+++ b/src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py
@@ -606,7 +606,7 @@ def _get_feat_extract_output_lengths(self, input_lengths: tf.Tensor):
     @tf.function(
         input_signature=[
             {
-                "input_features": tf.TensorSpec((None, None, None), tf.int32, name="input_features"),
+                "input_features": tf.TensorSpec((None, None, None), tf.float32, name="input_features"),
                 "attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"),
                 "decoder_input_ids": tf.TensorSpec((None, None), tf.int32, name="decoder_input_ids"),
                 "decoder_attention_mask": tf.TensorSpec((None, None), tf.int32, name="decoder_attention_mask"),

From 02caeaeb2b63249e19030f87abea4245c3b9f7be Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Fri, 22 Jul 2022 11:59:12 +0100
Subject: [PATCH 21/22] Add serving_output for segformer

---
 .../models/segformer/modeling_tf_segformer.py | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/transformers/models/segformer/modeling_tf_segformer.py b/src/transformers/models/segformer/modeling_tf_segformer.py
index e64a10178bf8..0e7da4a60f47 100644
--- a/src/transformers/models/segformer/modeling_tf_segformer.py
+++ b/src/transformers/models/segformer/modeling_tf_segformer.py
@@ -544,7 +544,9 @@ def serving(self, inputs):
             inputs (`Dict[str, tf.Tensor]`):
                 The input of the saved model as a dictionary of tensors.
         """
-        return self.call(inputs)
+        output = self.call(inputs)
+
+        return self.serving_output(output)
 
 
 SEGFORMER_START_DOCSTRING = r"""
@@ -628,6 +630,14 @@ def call(
         )
         return outputs
 
+    def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput:
+        # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
+        return TFBaseModelOutput(
+            last_hidden_state=output.last_hidden_state,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions
+        )
+
 
 @add_start_docstrings(
     """
@@ -692,6 +702,14 @@ def call(
             loss=loss, logits=logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions
         )
 
+    def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
+        # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
+        return TFSequenceClassifierOutput(
+            logits=output.logits,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions
+        )
+
 
 class TFSegformerMLP(tf.keras.layers.Layer):
     """
@@ -876,3 +894,11 @@ def call(
             hidden_states=outputs.hidden_states if output_hidden_states else None,
             attentions=outputs.attentions,
         )
+
+    def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput:
+        # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
+        return TFSemanticSegmenterOutput(
+            logits=output.logits,
+            hidden_states=output.hidden_states,
+            attentions=output.attentions
+        )

From 2ed20252b695c0be1a017d056abca5c2864ca807 Mon Sep 17 00:00:00 2001
From: Amy Roberts <amyeroberts@users.noreply.github.com>
Date: Fri, 22 Jul 2022 12:03:29 +0100
Subject: [PATCH 22/22] Fixup

---
 .../models/segformer/modeling_tf_segformer.py          | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/transformers/models/segformer/modeling_tf_segformer.py b/src/transformers/models/segformer/modeling_tf_segformer.py
index 0e7da4a60f47..a23abbc933c9 100644
--- a/src/transformers/models/segformer/modeling_tf_segformer.py
+++ b/src/transformers/models/segformer/modeling_tf_segformer.py
@@ -635,7 +635,7 @@ def serving_output(self, output: TFBaseModelOutput) -> TFBaseModelOutput:
         return TFBaseModelOutput(
             last_hidden_state=output.last_hidden_state,
             hidden_states=output.hidden_states,
-            attentions=output.attentions
+            attentions=output.attentions,
         )
 
 
@@ -705,9 +705,7 @@ def call(
     def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput:
         # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFSequenceClassifierOutput(
-            logits=output.logits,
-            hidden_states=output.hidden_states,
-            attentions=output.attentions
+            logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
         )
 
 
@@ -898,7 +896,5 @@ def call(
     def serving_output(self, output: TFSemanticSegmenterOutput) -> TFSemanticSegmenterOutput:
         # hidden_states and attention not converted to Tensor with tf.convert_to_tensor as they are all of different dimensions
         return TFSemanticSegmenterOutput(
-            logits=output.logits,
-            hidden_states=output.hidden_states,
-            attentions=output.attentions
+            logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions
         )