Skip to content

Commit

Permalink
[WIP] Ensure TF model configs can be converted to proper JSON (#14415)
Browse files Browse the repository at this point in the history
* test: make sure model configs are jsonifiable

* fix: return python dict instead of config object

* fix: accept pretrained config and use correct class

* Re-enabling slow tests and applying them to core models only

* Re-enabling slow tests and applying them to core models only

* Add new test file to fetcher

* Remove tooslow tests from test_modeling_tf_common.py

* make style

* Style fixes

* Style fixes

* Style fixes

* Style fixes

* Adding core tests to GPT2 and BART

* Removing unused imports

Co-authored-by: niklas.fruehauf <niklas.fruehauf@sovanta.com>
Co-authored-by: matt <rocketknight1@gmail.com>
  • Loading branch information
3 people authored Nov 17, 2021
1 parent 754202d commit 1991da0
Show file tree
Hide file tree
Showing 7 changed files with 374 additions and 223 deletions.
6 changes: 4 additions & 2 deletions src/transformers/modeling_tf_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -693,11 +693,13 @@ def __init__(self, config, *inputs, **kwargs):
self.name_or_path = config.name_or_path

def get_config(self):
return self.config
return self.config.to_dict()

@classmethod
def from_config(cls, config, **kwargs):
return cls._from_config(config, **kwargs)
if isinstance(config, PretrainedConfig):
return cls._from_config(config, **kwargs)
return cls._from_config(cls.config_class.from_dict(config, **kwargs))

@classmethod
def _from_config(cls, config, **kwargs):
Expand Down
3 changes: 2 additions & 1 deletion tests/test_modeling_tf_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from .test_configuration_common import ConfigTester
from .test_modeling_tf_common import TFModelTesterMixin, ids_tensor
from .test_modeling_tf_core import TFCoreModelTesterMixin


if is_tf_available():
Expand Down Expand Up @@ -177,7 +178,7 @@ def prepare_bart_inputs_dict(


@require_tf
class TFBartModelTest(TFModelTesterMixin, unittest.TestCase):
class TFBartModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestCase):
all_model_classes = (TFBartForConditionalGeneration, TFBartModel) if is_tf_available() else ()
all_generative_model_classes = (TFBartForConditionalGeneration,) if is_tf_available() else ()
is_encoder_decoder = True
Expand Down
3 changes: 2 additions & 1 deletion tests/test_modeling_tf_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from .test_configuration_common import ConfigTester
from .test_modeling_tf_common import TFModelTesterMixin, floats_tensor, ids_tensor
from .test_modeling_tf_core import TFCoreModelTesterMixin


if is_tf_available():
Expand Down Expand Up @@ -284,7 +285,7 @@ def prepare_config_and_inputs_for_common(self):


@require_tf
class TFBertModelTest(TFModelTesterMixin, unittest.TestCase):
class TFBertModelTest(TFModelTesterMixin, TFCoreModelTesterMixin, unittest.TestCase):

all_model_classes = (
(
Expand Down
223 changes: 6 additions & 217 deletions tests/test_modeling_tf_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from requests.exceptions import HTTPError
from transformers import is_tf_available
from transformers.models.auto import get_values
from transformers.testing_utils import tooslow # noqa: F401
from transformers.testing_utils import (
PASS,
USER,
Expand All @@ -38,7 +39,6 @@
require_keras2onnx,
require_tf,
slow,
tooslow,
)
from transformers.utils import logging

Expand Down Expand Up @@ -169,42 +169,18 @@ def test_save_load_config(self):
for model_class in self.all_model_classes:
model = model_class(config)
outputs = model(self._prepare_for_class(inputs_dict, model_class))

model_config = model.get_config()
# make sure that returned config is jsonifiable, which is required by keras
json.dumps(model_config)
new_model = model_class.from_config(model.get_config())
# make sure it also accepts a normal config
_ = model_class.from_config(model.config)
_ = new_model(self._prepare_for_class(inputs_dict, model_class)) # Build model
new_model.set_weights(model.get_weights())
after_outputs = new_model(self._prepare_for_class(inputs_dict, model_class))

self.assert_outputs_same(after_outputs, outputs)

@tooslow
def test_graph_mode(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)

@tf.function
def run_in_graph_mode():
return model(inputs)

outputs = run_in_graph_mode()
self.assertIsNotNone(outputs)

@tooslow
def test_xla_mode(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
for model_class in self.all_model_classes:
inputs = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)

@tf.function(experimental_compile=True)
def run_in_graph_mode():
return model(inputs)

outputs = run_in_graph_mode()
self.assertIsNotNone(outputs)

def test_forward_signature(self):
config, _ = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down Expand Up @@ -236,75 +212,6 @@ def test_forward_signature(self):
expected_arg_names = ["input_ids"]
self.assertListEqual(arg_names[:1], expected_arg_names)

@tooslow
def test_saved_model_creation(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = False
config.output_attentions = False

if hasattr(config, "use_cache"):
config.use_cache = False

model_class = self.all_model_classes[0]

class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)

model(class_inputs_dict)

with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
self.assertTrue(os.path.exists(saved_model_dir))

@tooslow
def test_saved_model_creation_extended(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
config.output_hidden_states = True
config.output_attentions = True

if hasattr(config, "use_cache"):
config.use_cache = True

encoder_seq_length = getattr(self.model_tester, "encoder_seq_length", self.model_tester.seq_length)
encoder_key_length = getattr(self.model_tester, "key_length", encoder_seq_length)

for model_class in self.all_model_classes:
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
num_out = len(model(class_inputs_dict))

with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, saved_model=True)
saved_model_dir = os.path.join(tmpdirname, "saved_model", "1")
model = tf.keras.models.load_model(saved_model_dir)
outputs = model(class_inputs_dict)

if self.is_encoder_decoder:
output_hidden_states = outputs["encoder_hidden_states"]
output_attentions = outputs["encoder_attentions"]
else:
output_hidden_states = outputs["hidden_states"]
output_attentions = outputs["attentions"]

self.assertEqual(len(outputs), num_out)

expected_num_layers = getattr(
self.model_tester, "expected_num_hidden_layers", self.model_tester.num_hidden_layers + 1
)

self.assertEqual(len(output_hidden_states), expected_num_layers)
self.assertListEqual(
list(output_hidden_states[0].shape[-2:]),
[self.model_tester.seq_length, self.model_tester.hidden_size],
)

self.assertEqual(len(output_attentions), self.model_tester.num_hidden_layers)
self.assertListEqual(
list(output_attentions[0].shape[-3:]),
[self.model_tester.num_attention_heads, encoder_seq_length, encoder_key_length],
)

def test_onnx_compliancy(self):
if not self.test_onnx:
return
Expand Down Expand Up @@ -366,21 +273,6 @@ def test_onnx_runtime_optimize(self):

onnxruntime.InferenceSession(onnx_model.SerializeToString())

@tooslow
def test_mixed_precision(self):
tf.keras.mixed_precision.experimental.set_policy("mixed_float16")

config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

for model_class in self.all_model_classes:
class_inputs_dict = self._prepare_for_class(inputs_dict, model_class)
model = model_class(config)
outputs = model(class_inputs_dict)

self.assertIsNotNone(outputs)

tf.keras.mixed_precision.experimental.set_policy("float32")

def test_keras_save_load(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down Expand Up @@ -548,76 +440,6 @@ def test_pt_tf_model_equivalence(self):
max_diff = np.amax(np.abs(tfo - pto))
self.assertLessEqual(max_diff, 4e-2)

@tooslow
def test_train_pipeline_custom_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
# head_mask and decoder_head_mask has different shapes than other input args
if "head_mask" in inputs_dict:
del inputs_dict["head_mask"]
if "decoder_head_mask" in inputs_dict:
del inputs_dict["decoder_head_mask"]
if "cross_attn_head_mask" in inputs_dict:
del inputs_dict["cross_attn_head_mask"]
tf_main_layer_classes = set(
module_member
for model_class in self.all_model_classes
for module in (import_module(model_class.__module__),)
for module_member_name in dir(module)
if module_member_name.endswith("MainLayer")
for module_member in (getattr(module, module_member_name),)
if isinstance(module_member, type)
and tf.keras.layers.Layer in module_member.__bases__
and getattr(module_member, "_keras_serializable", False)
)

for main_layer_class in tf_main_layer_classes:
# T5MainLayer needs an embed_tokens parameter when called without the inputs_embeds parameter
if "T5" in main_layer_class.__name__:
# Take the same values than in TFT5ModelTester for this shared layer
shared = TFSharedEmbeddings(self.model_tester.vocab_size, self.model_tester.hidden_size, name="shared")
config.use_cache = False
main_layer = main_layer_class(config, embed_tokens=shared)
else:
main_layer = main_layer_class(config)

symbolic_inputs = {
name: tf.keras.Input(tensor.shape[1:], dtype=tensor.dtype) for name, tensor in inputs_dict.items()
}

if hasattr(self.model_tester, "num_labels"):
num_labels = self.model_tester.num_labels
else:
num_labels = 2

X = tf.data.Dataset.from_tensor_slices(
(inputs_dict, np.ones((self.model_tester.batch_size, self.model_tester.seq_length, num_labels, 1)))
).batch(1)

hidden_states = main_layer(symbolic_inputs)[0]
outputs = tf.keras.layers.Dense(num_labels, activation="softmax", name="outputs")(hidden_states)
model = tf.keras.models.Model(inputs=symbolic_inputs, outputs=[outputs])

model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["binary_accuracy"])
model.fit(X, epochs=1)

with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "keras_model.h5")
model.save(filepath)
if "T5" in main_layer_class.__name__:
model = tf.keras.models.load_model(
filepath,
custom_objects={
main_layer_class.__name__: main_layer_class,
"TFSharedEmbeddings": TFSharedEmbeddings,
},
)
else:
model = tf.keras.models.load_model(
filepath, custom_objects={main_layer_class.__name__: main_layer_class}
)
assert isinstance(model, tf.keras.Model)
model(inputs_dict)

def test_compile_tf_model(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
max_input = getattr(self.model_tester, "max_position_embeddings", 512)
Expand Down Expand Up @@ -989,39 +811,6 @@ def test_inputs_embeds(self):

model(inputs)

@tooslow
def test_graph_mode_with_inputs_embeds(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

for model_class in self.all_model_classes:
model = model_class(config)

inputs = copy.deepcopy(inputs_dict)

if not self.is_encoder_decoder:
input_ids = inputs["input_ids"]
del inputs["input_ids"]
else:
encoder_input_ids = inputs["input_ids"]
decoder_input_ids = inputs.get("decoder_input_ids", encoder_input_ids)
del inputs["input_ids"]
inputs.pop("decoder_input_ids", None)

if not self.is_encoder_decoder:
inputs["inputs_embeds"] = model.get_input_embeddings()(input_ids)
else:
inputs["inputs_embeds"] = model.get_input_embeddings()(encoder_input_ids)
inputs["decoder_inputs_embeds"] = model.get_input_embeddings()(decoder_input_ids)

inputs = self._prepare_for_class(inputs, model_class)

@tf.function
def run_in_graph_mode():
return model(inputs)

outputs = run_in_graph_mode()
self.assertIsNotNone(outputs)

def test_numpy_arrays_inputs(self):
config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

Expand Down
Loading

0 comments on commit 1991da0

Please sign in to comment.