-
Notifications
You must be signed in to change notification settings - Fork 27.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update serving code to enable saved_model=True
#18153
Changes from all commits
cc93471
0a8ed80
2e948ca
bf684e7
c230382
ac7019b
505cb77
a86b369
cf747bf
0ff7d8c
88e7888
899c76c
57cf29a
1a378c3
5178ab8
6f0aa42
e9486db
db264ea
006d9e5
b6e7d06
a0b70e9
02caeae
2ed2025
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1127,12 +1127,14 @@ def call( | |||||||||
training=training, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing copied from across models as the tensors in the tuples for hidden_states and activations are different sizes so can't be called with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A quick look in Funnel docstring, I could not find why they have different sizes. transformers/src/transformers/models/funnel/modeling_funnel.py Lines 843 to 845 in 2c5747e
Maybe the docstring is wrong, and we should update it (in another PR for sure) ..? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems because it uses pooling, the sequence length is different after each block:
|
||||||||||
def serving_output(self, output): | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFBaseModelOutput( | ||||||||||
last_hidden_state=output.last_hidden_state, | ||||||||||
hidden_states=output.hidden_states, | ||||||||||
attentions=output.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1175,12 +1177,14 @@ def call( | |||||||||
training=training, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.distilbert.modeling_tf_distilbert.TFDistilBertModel.serving_output | ||||||||||
def serving_output(self, output): | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFBaseModelOutput( | ||||||||||
last_hidden_state=output.last_hidden_state, | ||||||||||
hidden_states=output.hidden_states, | ||||||||||
attentions=output.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1249,10 +1253,11 @@ def call( | |||||||||
) | ||||||||||
|
||||||||||
def serving_output(self, output): | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFFunnelForPreTrainingOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFFunnelForPreTrainingOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings("""Funnel Model with a `language modeling` head on top.""", FUNNEL_START_DOCSTRING) | ||||||||||
|
@@ -1322,12 +1327,10 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForMaskedLM.serving_output | ||||||||||
def serving_output(self, output: TFMaskedLMOutput) -> TFMaskedLMOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFMaskedLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFMaskedLMOutput(logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1398,12 +1401,12 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForSequenceClassification.serving_output | ||||||||||
def serving_output(self, output: TFSequenceClassifierOutput) -> TFSequenceClassifierOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFSequenceClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFSequenceClassifierOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1503,9 +1506,9 @@ def call( | |||||||||
@tf.function( | ||||||||||
input_signature=[ | ||||||||||
{ | ||||||||||
"input_ids": tf.TensorSpec((None, None, None), tf.int32, name="input_ids"), | ||||||||||
"attention_mask": tf.TensorSpec((None, None, None), tf.int32, name="attention_mask"), | ||||||||||
"token_type_ids": tf.TensorSpec((None, None, None), tf.int32, name="token_type_ids"), | ||||||||||
"input_ids": tf.TensorSpec((None, None), tf.int32, name="input_ids"), | ||||||||||
"attention_mask": tf.TensorSpec((None, None), tf.float32, name="attention_mask"), | ||||||||||
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), | ||||||||||
} | ||||||||||
] | ||||||||||
) | ||||||||||
|
@@ -1514,12 +1517,12 @@ def serving(self, inputs: Dict[str, tf.Tensor]) -> TFMultipleChoiceModelOutput: | |||||||||
|
||||||||||
return self.serving_output(output=output) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForMultipleChoice.serving_output | ||||||||||
def serving_output(self, output: TFMultipleChoiceModelOutput) -> TFMultipleChoiceModelOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFMultipleChoiceModelOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFMultipleChoiceModelOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1592,12 +1595,12 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForTokenClassification.serving_output | ||||||||||
def serving_output(self, output: TFTokenClassifierOutput) -> TFTokenClassifierOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
return TFTokenClassifierOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFTokenClassifierOutput( | ||||||||||
logits=output.logits, hidden_states=output.hidden_states, attentions=output.attentions | ||||||||||
) | ||||||||||
|
||||||||||
|
||||||||||
@add_start_docstrings( | ||||||||||
|
@@ -1683,11 +1686,12 @@ def call( | |||||||||
attentions=outputs.attentions, | ||||||||||
) | ||||||||||
|
||||||||||
# Copied from transformers.models.bert.modeling_tf_bert.TFBertForQuestionAnswering.serving_output | ||||||||||
def serving_output(self, output: TFQuestionAnsweringModelOutput) -> TFQuestionAnsweringModelOutput: | ||||||||||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||||||||||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||||||||||
|
||||||||||
# hidden_states and attentions not converted to Tensor with tf.convert_to_tensor as they are all of | ||||||||||
# different dimensions | ||||||||||
return TFQuestionAnsweringModelOutput( | ||||||||||
start_logits=output.start_logits, end_logits=output.end_logits, hidden_states=hs, attentions=attns | ||||||||||
start_logits=output.start_logits, | ||||||||||
end_logits=output.end_logits, | ||||||||||
hidden_states=output.hidden_states, | ||||||||||
attentions=output.attentions, | ||||||||||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -227,12 +227,13 @@ def _compute_mask_indices( | |
f" `sequence_length`: {sequence_length}`" | ||
) | ||
# compute number of masked spans in batch | ||
num_masked_spans = int(mask_prob * sequence_length / mask_length + tf.random.uniform((1,))) | ||
num_masked_spans = max(num_masked_spans, min_masks) | ||
num_masked_spans = mask_prob * sequence_length / mask_length + tf.random.uniform((1,)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can't directly use |
||
num_masked_spans = tf.maximum(num_masked_spans, min_masks) | ||
num_masked_spans = tf.cast(num_masked_spans, tf.int32) | ||
|
||
# make sure num masked indices <= sequence_length | ||
if num_masked_spans * mask_length > sequence_length: | ||
num_masked_spans = sequence_length // mask_length | ||
num_masked_spans = tf.math.minimum(sequence_length // mask_length, num_masked_spans) | ||
num_masked_spans = tf.squeeze(num_masked_spans) | ||
|
||
# SpecAugment mask to fill | ||
spec_aug_mask = tf.zeros((batch_size, sequence_length), dtype=tf.int32) | ||
|
@@ -256,7 +257,7 @@ def _compute_mask_indices( | |
|
||
# scatter indices to mask | ||
spec_aug_mask = _scatter_values_on_batch_indices( | ||
tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, spec_aug_mask.shape | ||
tf.ones_like(spec_aug_mask_idxs), spec_aug_mask_idxs, tf.shape(spec_aug_mask) | ||
) | ||
|
||
return spec_aug_mask | ||
|
@@ -1319,7 +1320,15 @@ def __init__(self, config, *inputs, **kwargs): | |
"to train/fine-tine this model, you need a GPU or a TPU" | ||
) | ||
|
||
@tf.function | ||
@tf.function( | ||
input_signature=[ | ||
{ | ||
"input_values": tf.TensorSpec((None, None), tf.float32, name="input_values"), | ||
"attention_mask": tf.TensorSpec((None, None), tf.int32, name="attention_mask"), | ||
"token_type_ids": tf.TensorSpec((None, None), tf.int32, name="token_type_ids"), | ||
} | ||
] | ||
) | ||
def serving(self, inputs): | ||
output = self.call(input_values=inputs, training=False) | ||
|
||
|
@@ -1511,10 +1520,11 @@ def call( | |
return outputs | ||
|
||
def serving_output(self, output): | ||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||
|
||
return TFBaseModelOutput(last_hidden_state=output.last_hidden_state, hidden_states=hs, attentions=attns) | ||
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||
return TFBaseModelOutput( | ||
last_hidden_state=output.last_hidden_state, hidden_states=hidden_states, attentions=attentions | ||
) | ||
|
||
|
||
@add_start_docstrings( | ||
|
@@ -1685,6 +1695,6 @@ def call( | |
) | ||
|
||
def serving_output(self, output: TFCausalLMOutput) -> TFCausalLMOutput: | ||
hs = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||
attns = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||
return TFCausalLMOutput(logits=output.logits, hidden_states=hs, attentions=attns) | ||
hidden_states = tf.convert_to_tensor(output.hidden_states) if self.config.output_hidden_states else None | ||
attentions = tf.convert_to_tensor(output.attentions) if self.config.output_attentions else None | ||
return TFCausalLMOutput(logits=output.logits, hidden_states=hidden_states, attentions=attentions) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Adding in
self.serving(output)
return to: