From 1991da07f7fe1f2dca0bb49e964aa971beca5746 Mon Sep 17 00:00:00 2001 From: N Date: Wed, 17 Nov 2021 21:24:39 +0100 Subject: [PATCH] [WIP] Ensure TF model configs can be converted to proper JSON (#14415) * test: make sure model configs are jsonifiable * fix: return python dict instead of config object * fix: accept pretrained config and use correct class * Re-enabling slow tests and applying them to core models only * Re-enabling slow tests and applying them to core models only * Add new test file to fetcher * Remove tooslow tests from test_modeling_tf_common.py * make style * Style fixes * Style fixes * Style fixes * Style fixes * Adding core tests to GPT2 and BART * Removing unused imports Co-authored-by: niklas.fruehauf Co-authored-by: matt --- src/transformers/modeling_tf_utils.py | 6 +- tests/test_modeling_tf_bart.py | 3 +- tests/test_modeling_tf_bert.py | 3 +- tests/test_modeling_tf_common.py | 223 +--------------- tests/test_modeling_tf_core.py | 357 ++++++++++++++++++++++++++ tests/test_modeling_tf_gpt2.py | 3 +- utils/tests_fetcher.py | 2 +- 7 files changed, 374 insertions(+), 223 deletions(-) create mode 100644 tests/test_modeling_tf_core.py diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py index d7fcb6e56ecb..d15d9023e4bc 100644 --- a/src/transformers/modeling_tf_utils.py +++ b/src/transformers/modeling_tf_utils.py @@ -693,11 +693,13 @@ def __init__(self, config, *inputs, **kwargs): self.name_or_path = config.name_or_path def get_config(self): - return self.config + return self.config.to_dict() @classmethod def from_config(cls, config, **kwargs): - return cls._from_config(config, **kwargs) + if isinstance(config, PretrainedConfig): + return cls._from_config(config, **kwargs) + return cls._from_config(cls.config_class.from_dict(config, **kwargs)) @classmethod def _from_config(cls, config, **kwargs): diff --git a/tests/test_modeling_tf_bart.py b/tests/test_modeling_tf_bart.py index e88659b9887d..951f42d1eb81 100644 --- a/tests/test_modeling_tf_bart.py +++ b/tests/test_modeling_tf_bart.py @@ -23,6 +23,7 @@ from .test_configuration_common import ConfigTester from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor +from .test_modeling_tf_core import TFCoreModelTesterMixin if is_tf_available(): @@ -177,7 +178,7 @@ def prepare_bart_inputs_dict( @require_tf -class TFBartModelTest(TFModelTesterMixin, unittest.TestCase): +class TFBartModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestCase): all_model_classes = (TFBartForConditionalGeneration, TFBartModel) if is_tf_available() else () all_generative_model_classes = (TFBartForConditionalGeneration,) if is_tf_available() else () is_encoder_decoder = True diff --git a/tests/test_modeling_tf_bert.py b/tests/test_modeling_tf_bert.py index 47cf3f730054..cb566f7e491c 100644 --- a/tests/test_modeling_tf_bert.py +++ b/tests/test_modeling_tf_bert.py @@ -22,6 +22,7 @@ from .test_configuration_common import ConfigTester from .test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from .test_modeling_tf_core import TFCoreModelTesterMixin if is_tf_available(): @@ -284,7 +285,7 @@ def prepare_config_and_inputs_for_common(self): @require_tf -class TFBertModelTest(TFModelTesterMixin, unittest.TestCase): +class TFBertModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestCase): all_model_classes = ( ( diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py index 30a7052dafff..1484d651ecf5 100644 --- a/tests/test_modeling_tf_common.py +++ b/tests/test_modeling_tf_common.py @@ -28,6 +28,7 @@ from requests.exceptions import HTTPError from transformers import is_tf_available from transformers.models.auto import get_values +from transformers.testing_utils import tooslow # noqa: F401 from transformers.testing_utils import ( PASS, USER, @@ -38,7 +39,6 @@ require_keras2onnx, require_tf, slow, - tooslow, ) from transformers.utils import logging @@ -169,42 +169,18 @@ def test_save_load_config(self): for model_class in self.all_model_classes: model = model_class(config) outputs = model(self._prepare_for_class(inputs_dict, model_class)) - + model_config = model.get_config() + # make sure that returned config is jsonifiable, which is required by keras + json.dumps(model_config) new_model = model_class.from_config(model.get_config()) + # make sure it also accepts a normal config + _ = model_class.from_config(model.config) _ = new_model(self._prepare_for_class(inputs_dict, model_class)) # Build model new_model.set_weights(model.get_weights()) after_outputs = new_model(self._prepare_for_class(inputs_dict, model_class)) self.assert_outputs_same(after_outputs, outputs) - @tooslow - def test_graph_mode(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - for model_class in self.all_model_classes: - inputs = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - - @tf.function - def run_in_graph_mode(): - return model(inputs) - - outputs = run_in_graph_mode() - self.assertIsNotNone(outputs) - - @tooslow - def test_xla_mode(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - for model_class in self.all_model_classes: - inputs = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - - @tf.function(experimental_compile=True) - def run_in_graph_mode(): - return model(inputs) - - outputs = run_in_graph_mode() - self.assertIsNotNone(outputs) - def test_forward_signature(self): config, _ = self.model_tester.prepare_config_and_inputs_for_common() @@ -236,75 +212,6 @@ def test_forward_signature(self): expected_arg_names = ["input_ids"] self.assertListEqual(arg_names[:1], expected_arg_names) - @tooslow - def test_saved_model_creation(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = False - config.output_attentions = False - - if hasattr(config, "use_cache"): - config.use_cache = False - - model_class = self.all_model_classes[0] - - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - - model(class_inputs_dict) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=True) - saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") - self.assertTrue(os.path.exists(saved_model_dir)) - - @tooslow - def test_saved_model_creation_extended(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - config.output_hidden_states = True - config.output_attentions = True - - if hasattr(config, "use_cache"): - config.use_cache = True - - encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) - encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - num_out = len(model(class_inputs_dict)) - - with tempfile.TemporaryDirectory() as tmpdirname: - model.save_pretrained(tmpdirname, saved_model=True) - saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") - model = tf.keras.models.load_model(saved_model_dir) - outputs = model(class_inputs_dict) - - if self.is_encoder_decoder: - output_hidden_states = outputs["encoder_hidden_states"] - output_attentions = outputs["encoder_attentions"] - else: - output_hidden_states = outputs["hidden_states"] - output_attentions = outputs["attentions"] - - self.assertEqual(len(outputs), num_out) - - expected_num_layers = getattr( - self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 - ) - - self.assertEqual(len(output_hidden_states), expected_num_layers) - self.assertListEqual( - list(output_hidden_states[0].shape[-2:]), - [self.model_tester.seq_length, self.model_tester.hidden_size], - ) - - self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers) - self.assertListEqual( - list(output_attentions[0].shape[-3:]), - [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], - ) - def test_onnx_compliancy(self): if not self.test_onnx: return @@ -366,21 +273,6 @@ def test_onnx_runtime_optimize(self): onnxruntime.InferenceSession(onnx_model.SerializeToString()) - @tooslow - def test_mixed_precision(self): - tf.keras.mixed_precision.experimental.set_policy("mixed_float16") - - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) - model = model_class(config) - outputs = model(class_inputs_dict) - - self.assertIsNotNone(outputs) - - tf.keras.mixed_precision.experimental.set_policy("float32") - def test_keras_save_load(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -548,76 +440,6 @@ def test_pt_tf_model_equivalence(self): max_diff = np.amax(np.abs(tfo - pto)) self.assertLessEqual(max_diff, 4e-2) - @tooslow - def test_train_pipeline_custom_model(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - # head_mask and decoder_head_mask has different shapes than other input args - if "head_mask" in inputs_dict: - del inputs_dict["head_mask"] - if "decoder_head_mask" in inputs_dict: - del inputs_dict["decoder_head_mask"] - if "cross_attn_head_mask" in inputs_dict: - del inputs_dict["cross_attn_head_mask"] - tf_main_layer_classes = set( - module_member - for model_class in self.all_model_classes - for module in (import_module(model_class.__module__),) - for module_member_name in dir(module) - if module_member_name.endswith("MainLayer") - for module_member in (getattr(module, module_member_name),) - if isinstance(module_member, type) - and tf.keras.layers.Layer in module_member.__bases__ - and getattr(module_member, "_keras_serializable", False) - ) - - for main_layer_class in tf_main_layer_classes: - # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter - if "T5" in main_layer_class.__name__: - # Take the same values than in TFT5ModelTester for this shared layer - shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared") - config.use_cache = False - main_layer = main_layer_class(config, embed_tokens=shared) - else: - main_layer = main_layer_class(config) - - symbolic_inputs = { - name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items() - } - - if hasattr(self.model_tester, "num_labels"): - num_labels = self.model_tester.num_labels - else: - num_labels = 2 - - X = tf.data.Dataset.from_tensor_slices( - (inputs_dict, np.ones((self.model_tester.batch_size, self.model_tester.seq_length, num_labels, 1))) - ).batch(1) - - hidden_states = main_layer(symbolic_inputs)[0] - outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states) - model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs]) - - model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_accuracy"]) - model.fit(X, epochs=1) - - with tempfile.TemporaryDirectory() as tmpdirname: - filepath = os.path.join(tmpdirname, "keras_model.h5") - model.save(filepath) - if "T5" in main_layer_class.__name__: - model = tf.keras.models.load_model( - filepath, - custom_objects={ - main_layer_class.__name__: main_layer_class, - "TFSharedEmbeddings": TFSharedEmbeddings, - }, - ) - else: - model = tf.keras.models.load_model( - filepath, custom_objects={main_layer_class.__name__: main_layer_class} - ) - assert isinstance(model, tf.keras.Model) - model(inputs_dict) - def test_compile_tf_model(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() max_input = getattr(self.model_tester, "max_position_embeddings", 512) @@ -989,39 +811,6 @@ def test_inputs_embeds(self): model(inputs) - @tooslow - def test_graph_mode_with_inputs_embeds(self): - config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() - - for model_class in self.all_model_classes: - model = model_class(config) - - inputs = copy.deepcopy(inputs_dict) - - if not self.is_encoder_decoder: - input_ids = inputs["input_ids"] - del inputs["input_ids"] - else: - encoder_input_ids = inputs["input_ids"] - decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids) - del inputs["input_ids"] - inputs.pop("decoder_input_ids", None) - - if not self.is_encoder_decoder: - inputs["inputs_embeds"] = model.get_input_embeddings()(input_ids) - else: - inputs["inputs_embeds"] = model.get_input_embeddings()(encoder_input_ids) - inputs["decoder_inputs_embeds"] = model.get_input_embeddings()(decoder_input_ids) - - inputs = self._prepare_for_class(inputs, model_class) - - @tf.function - def run_in_graph_mode(): - return model(inputs) - - outputs = run_in_graph_mode() - self.assertIsNotNone(outputs) - def test_numpy_arrays_inputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() diff --git a/tests/test_modeling_tf_core.py b/tests/test_modeling_tf_core.py new file mode 100644 index 000000000000..bd4e0b0942c4 --- /dev/null +++ b/tests/test_modeling_tf_core.py @@ -0,0 +1,357 @@ +# coding=utf-8 +# Copyright 2019 HuggingFace Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import copy +import os +import tempfile +from importlib import import_module + +from transformers import is_tf_available +from transformers.models.auto import get_values +from transformers.testing_utils import _tf_gpu_memory_limit, require_tf, slow + +from .test_modeling_tf_common import ids_tensor + + +if is_tf_available(): + import numpy as np + import tensorflow as tf + + from transformers import ( + TF_MODEL_FOR_CAUSAL_LM_MAPPING, + TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING, + TF_MODEL_FOR_MASKED_LM_MAPPING, + TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING, + TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING, + TF_MODEL_FOR_PRETRAINING_MAPPING, + TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING, + TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING, + TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING, + TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING, + TFSharedEmbeddings, + ) + + if _tf_gpu_memory_limit is not None: + gpus = tf.config.list_physical_devices("GPU") + for gpu in gpus: + # Restrict TensorFlow to only allocate x GB of memory on the GPUs + try: + tf.config.set_logical_device_configuration( + gpu, [tf.config.LogicalDeviceConfiguration(memory_limit=_tf_gpu_memory_limit)] + ) + logical_gpus = tf.config.list_logical_devices("GPU") + print("Logical GPUs", logical_gpus) + except RuntimeError as e: + # Virtual devices must be set before GPUs have been initialized + print(e) + + +@require_tf +class TFCoreModelTesterMixin: + + model_tester = None + all_model_classes = () + all_generative_model_classes = () + test_mismatched_shapes = True + test_resize_embeddings = True + test_head_masking = True + is_encoder_decoder = False + + def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict: + inputs_dict = copy.deepcopy(inputs_dict) + + if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): + inputs_dict = { + k: tf.tile(tf.expand_dims(v, 1), (1, self.model_tester.num_choices) + (1,) * (v.ndim - 1)) + if isinstance(v, tf.Tensor) and v.ndim > 0 + else v + for k, v in inputs_dict.items() + } + + if return_labels: + if model_class in get_values(TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING): + inputs_dict["labels"] = tf.ones(self.model_tester.batch_size, dtype=tf.int32) + elif model_class in get_values(TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING): + inputs_dict["start_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) + inputs_dict["end_positions"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) + elif model_class in [ + *get_values(TF_MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING), + *get_values(TF_MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING), + ]: + inputs_dict["labels"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) + elif model_class in get_values(TF_MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING): + inputs_dict["next_sentence_label"] = tf.zeros(self.model_tester.batch_size, dtype=tf.int32) + elif model_class in [ + *get_values(TF_MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING), + *get_values(TF_MODEL_FOR_CAUSAL_LM_MAPPING), + *get_values(TF_MODEL_FOR_MASKED_LM_MAPPING), + *get_values(TF_MODEL_FOR_PRETRAINING_MAPPING), + *get_values(TF_MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING), + ]: + inputs_dict["labels"] = tf.zeros( + (self.model_tester.batch_size, self.model_tester.seq_length), dtype=tf.int32 + ) + return inputs_dict + + @slow + def test_graph_mode(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + for model_class in self.all_model_classes: + inputs = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + + @tf.function + def run_in_graph_mode(): + return model(inputs) + + outputs = run_in_graph_mode() + self.assertIsNotNone(outputs) + + @slow + def test_xla_mode(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + for model_class in self.all_model_classes: + inputs = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + + @tf.function(experimental_compile=True) + def run_in_graph_mode(): + return model(inputs) + + outputs = run_in_graph_mode() + self.assertIsNotNone(outputs) + + @slow + def test_saved_model_creation(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.output_hidden_states = False + config.output_attentions = False + + if hasattr(config, "use_cache"): + config.use_cache = False + + model_class = self.all_model_classes[0] + + class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + + model(class_inputs_dict) + + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname, saved_model=True) + saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") + self.assertTrue(os.path.exists(saved_model_dir)) + + @slow + def test_saved_model_creation_extended(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + config.output_hidden_states = True + config.output_attentions = True + + if hasattr(config, "use_cache"): + config.use_cache = True + + encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length) + encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length) + + for model_class in self.all_model_classes: + class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + num_out = len(model(class_inputs_dict)) + + with tempfile.TemporaryDirectory() as tmpdirname: + model.save_pretrained(tmpdirname, saved_model=True) + saved_model_dir = os.path.join(tmpdirname, "saved_model", "1") + model = tf.keras.models.load_model(saved_model_dir) + outputs = model(class_inputs_dict) + + if self.is_encoder_decoder: + output_hidden_states = outputs["encoder_hidden_states"] + output_attentions = outputs["encoder_attentions"] + else: + output_hidden_states = outputs["hidden_states"] + output_attentions = outputs["attentions"] + + self.assertEqual(len(outputs), num_out) + + expected_num_layers = getattr( + self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1 + ) + + self.assertEqual(len(output_hidden_states), expected_num_layers) + self.assertListEqual( + list(output_hidden_states[0].shape[-2:]), + [self.model_tester.seq_length, self.model_tester.hidden_size], + ) + + self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers) + self.assertListEqual( + list(output_attentions[0].shape[-3:]), + [self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length], + ) + + @slow + def test_mixed_precision(self): + tf.keras.mixed_precision.experimental.set_policy("mixed_float16") + + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + for model_class in self.all_model_classes: + class_inputs_dict = self._prepare_for_class(inputs_dict, model_class) + model = model_class(config) + outputs = model(class_inputs_dict) + + self.assertIsNotNone(outputs) + + tf.keras.mixed_precision.experimental.set_policy("float32") + + @slow + def test_train_pipeline_custom_model(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + # head_mask and decoder_head_mask has different shapes than other input args + if "head_mask" in inputs_dict: + del inputs_dict["head_mask"] + if "decoder_head_mask" in inputs_dict: + del inputs_dict["decoder_head_mask"] + if "cross_attn_head_mask" in inputs_dict: + del inputs_dict["cross_attn_head_mask"] + tf_main_layer_classes = set( + module_member + for model_class in self.all_model_classes + for module in (import_module(model_class.__module__),) + for module_member_name in dir(module) + if module_member_name.endswith("MainLayer") + for module_member in (getattr(module, module_member_name),) + if isinstance(module_member, type) + and tf.keras.layers.Layer in module_member.__bases__ + and getattr(module_member, "_keras_serializable", False) + ) + + for main_layer_class in tf_main_layer_classes: + # T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter + if "T5" in main_layer_class.__name__: + # Take the same values than in TFT5ModelTester for this shared layer + shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared") + config.use_cache = False + main_layer = main_layer_class(config, embed_tokens=shared) + else: + main_layer = main_layer_class(config) + + symbolic_inputs = { + name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items() + } + + if hasattr(self.model_tester, "num_labels"): + num_labels = self.model_tester.num_labels + else: + num_labels = 2 + + X = tf.data.Dataset.from_tensor_slices( + (inputs_dict, np.ones((self.model_tester.batch_size, self.model_tester.seq_length, num_labels, 1))) + ).batch(1) + + hidden_states = main_layer(symbolic_inputs)[0] + outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states) + model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs]) + + model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_accuracy"]) + model.fit(X, epochs=1) + + with tempfile.TemporaryDirectory() as tmpdirname: + filepath = os.path.join(tmpdirname, "keras_model.h5") + model.save(filepath) + if "T5" in main_layer_class.__name__: + model = tf.keras.models.load_model( + filepath, + custom_objects={ + main_layer_class.__name__: main_layer_class, + "TFSharedEmbeddings": TFSharedEmbeddings, + }, + ) + else: + model = tf.keras.models.load_model( + filepath, custom_objects={main_layer_class.__name__: main_layer_class} + ) + assert isinstance(model, tf.keras.Model) + model(inputs_dict) + + @slow + def test_graph_mode_with_inputs_embeds(self): + config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() + + for model_class in self.all_model_classes: + model = model_class(config) + + inputs = copy.deepcopy(inputs_dict) + + if not self.is_encoder_decoder: + input_ids = inputs["input_ids"] + del inputs["input_ids"] + else: + encoder_input_ids = inputs["input_ids"] + decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids) + del inputs["input_ids"] + inputs.pop("decoder_input_ids", None) + + if not self.is_encoder_decoder: + inputs["inputs_embeds"] = model.get_input_embeddings()(input_ids) + else: + inputs["inputs_embeds"] = model.get_input_embeddings()(encoder_input_ids) + inputs["decoder_inputs_embeds"] = model.get_input_embeddings()(decoder_input_ids) + + inputs = self._prepare_for_class(inputs, model_class) + + @tf.function + def run_in_graph_mode(): + return model(inputs) + + outputs = run_in_graph_mode() + self.assertIsNotNone(outputs) + + def _generate_random_bad_tokens(self, num_bad_tokens, model): + # special tokens cannot be bad tokens + special_tokens = [] + if model.config.bos_token_id is not None: + special_tokens.append(model.config.bos_token_id) + if model.config.pad_token_id is not None: + special_tokens.append(model.config.pad_token_id) + if model.config.eos_token_id is not None: + special_tokens.append(model.config.eos_token_id) + + # create random bad tokens that are not special tokens + bad_tokens = [] + while len(bad_tokens) < num_bad_tokens: + token = tf.squeeze(ids_tensor((1, 1), self.model_tester.vocab_size), 0).numpy()[0] + if token not in special_tokens: + bad_tokens.append(token) + return bad_tokens + + def _check_generated_ids(self, output_ids): + for token_id in output_ids[0].numpy().tolist(): + self.assertGreaterEqual(token_id, 0) + self.assertLess(token_id, self.model_tester.vocab_size) + + def _check_match_tokens(self, generated_ids, bad_words_ids): + # for all bad word tokens + for bad_word_ids in bad_words_ids: + # for all slices in batch + for generated_ids_slice in generated_ids: + # for all word idx + for i in range(len(bad_word_ids), len(generated_ids_slice)): + # if tokens match + if generated_ids_slice[i - len(bad_word_ids) : i] == bad_word_ids: + return True + return False diff --git a/tests/test_modeling_tf_gpt2.py b/tests/test_modeling_tf_gpt2.py index f3e1a373aa80..d653329a5e82 100644 --- a/tests/test_modeling_tf_gpt2.py +++ b/tests/test_modeling_tf_gpt2.py @@ -20,6 +20,7 @@ from .test_configuration_common import ConfigTester from .test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor +from .test_modeling_tf_core import TFCoreModelTesterMixin if is_tf_available(): @@ -352,7 +353,7 @@ def prepare_config_and_inputs_for_common(self): @require_tf -class TFGPT2ModelTest(TFModelTesterMixin, unittest.TestCase): +class TFGPT2ModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestCase): all_model_classes = ( (TFGPT2Model, TFGPT2LMHeadModel, TFGPT2ForSequenceClassification, TFGPT2DoubleHeadsModel) diff --git a/utils/tests_fetcher.py b/utils/tests_fetcher.py index d80f194a089f..dbfd77b62ee5 100644 --- a/utils/tests_fetcher.py +++ b/utils/tests_fetcher.py @@ -252,7 +252,7 @@ def create_reverse_dependency_map(): "file_utils.py": ["test_file_utils.py", "test_model_output.py"], "modelcard.py": "test_model_card.py", "modeling_flax_utils.py": "test_modeling_flax_common.py", - "modeling_tf_utils.py": "test_modeling_tf_common.py", + "modeling_tf_utils.py": ["test_modeling_tf_common.py", "test_modeling_tf_core.py"], "modeling_utils.py": ["test_modeling_common.py", "test_offline.py"], "models/auto/modeling_auto.py": ["test_modeling_auto.py", "test_modeling_tf_pytorch.py", "test_modeling_bort.py"], "models/auto/modeling_flax_auto.py": "test_flax_auto.py",