-
Notifications
You must be signed in to change notification settings - Fork 27.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update serving code to enable saved_model=True
#18153
Changes from 13 commits
cc93471
0a8ed80
2e948ca
bf684e7
c230382
ac7019b
505cb77
a86b369
cf747bf
0ff7d8c
88e7888
899c76c
57cf29a
1a378c3
5178ab8
6f0aa42
e9486db
db264ea
006d9e5
b6e7d06
a0b70e9
02caeae
2ed2025
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1127,12 +1127,14 @@ def call( | |||||||||
training=training, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing copied from across models as the tensors in the tuples for hidden_states and activations are different sizes so can't be called with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A quick look in Funnel docstring, I could not find why they have different sizes. transformers/src/transformers/models/funnel/modeling_funnel.py Lines 843 to 845 in 2c5747e
Maybe the docstring is wrong, and we should update it (in another PR for sure) ..? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems because it uses pooling, the sequence length is different after each block:
|
||||||||||
def serving_output(self, output): | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFBaseModelOutput( | ||||||||||
last_hidden_state=output.last_hidden_state, | ||||||||||
hidden_states=output.hidden_states, | ||||||||||
attentions=output.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1175,12 +1177,14 @@ def call( | |||||||||
training=training, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output | ||||||||||
def serving_output(self, output): | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFBaseModelOutput( | ||||||||||
last_hidden_state=output.last_hidden_state, | ||||||||||
hidden_states=output.hidden_states, | ||||||||||
attentions=output.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1249,10 +1253,11 @@ def call( | |||||||||
) | ||||||||||
|
||||||||||
def serving_output(self, output): | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFFunnelForPreTrainingOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFFunnelForPreTrainingOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING) | ||||||||||
|
@@ -1322,12 +1327,10 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output | ||||||||||
def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFMaskedLMOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1398,12 +1401,12 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output | ||||||||||
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFSequenceClassifierOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1514,12 +1517,12 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: | |||||||||
|
||||||||||
return self.serving_output(output=output) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output | ||||||||||
def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFMultipleChoiceModelOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1592,12 +1595,12 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output | ||||||||||
def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFTokenClassifierOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1683,11 +1686,12 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output | ||||||||||
def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFQuestionAnsweringModelOutput( | ||||||||||
start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns | ||||||||||
start_logits=output.start_logits, | ||||||||||
end_logits=output.end_logits, | ||||||||||
hidden_states=output.hidden_states, | ||||||||||
attentions=output.attentions, | ||||||||||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -227,12 +227,17 @@ def _compute_mask_indices( | |
f" `sequence_length`: {sequence_length}`" | ||
) | ||
# compute number of masked spans in batch | ||
num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,))) | ||
num_masked_spans = max(num_masked_spans, min_masks) | ||
num_masked_spans = mask_prob * sequence_length / mask_length + tf.random.uniform((1,)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can't directly use |
||
num_masked_spans = tf.maximum(num_masked_spans, min_masks) | ||
num_masked_spans = tf.cast(num_masked_spans, tf.int32) | ||
|
||
# make sure num masked indices <= sequence_length | ||
if num_masked_spans * mask_length > sequence_length: | ||
num_masked_spans = sequence_length // mask_length | ||
num_masked_spans = tf.cond( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In graph mode, we can't used a tensor in an if/else statement. You get the following error: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks good 👍 Perhaps for readability, if it is allowed, it can be rewritten as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good shout. Updated 👍 |
||
num_masked_spans * mask_length > sequence_length, | ||
true_fn=lambda: sequence_length // mask_length, | ||
false_fn=lambda: num_masked_spans, | ||
) | ||
num_masked_spans = tf.squeeze(num_masked_spans) | ||
|
||
# SpecAugment mask to fill | ||
spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32) | ||
|
@@ -256,7 +261,7 @@ def _compute_mask_indices( | |
|
||
# scatter indices to mask | ||
spec_aug_mask = _scatter_values_on_batch_indices( | ||
tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, spec_aug_mask.shape | ||
tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, shape_list(spec_aug_mask) | ||
amyeroberts marked this conversation as resolved.
Show resolved
Hide resolved
|
||
) | ||
|
||
return spec_aug_mask | ||
|
@@ -1319,7 +1324,15 @@ def __init__(self, config, *inputs, **kwargs): | |
"to train/fine-tine this model, you need a GPU or a TPU" | ||
) | ||
|
||
@tf.function | ||
@tf.function( | ||
input_signature=[ | ||
{ | ||
"input_values": tf.TensorSpec((None, None), tf.int32, name="input_ids"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have some doubt here: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated the name 👍 Following your comment below, input_values are set to float and attention_maks and token_types_ids as int. |
||
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), | ||
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), | ||
} | ||
] | ||
) | ||
def serving(self, inputs): | ||
output = self.call(input_values=inputs, training=False) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding in
self.serving(output)
return to: