From 5abae30d469bb0e4a2119cbdf1c6fecb84171cfc Mon Sep 17 00:00:00 2001 From: LysandreJik Date: Wed, 25 Mar 2020 17:42:53 -0400 Subject: [PATCH] Force the return of token type IDs --- examples/utils_multiple_choice.py | 4 +++- src/transformers/data/processors/squad.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/utils_multiple_choice.py b/examples/utils_multiple_choice.py index 8e19c5141416..d374e3a290bb 100644 --- a/examples/utils_multiple_choice.py +++ b/examples/utils_multiple_choice.py @@ -320,7 +320,9 @@ def convert_examples_to_features( else: text_b = example.question + " " + ending - inputs = tokenizer.encode_plus(text_a, text_b, add_special_tokens=True, max_length=max_length,) + inputs = tokenizer.encode_plus( + text_a, text_b, add_special_tokens=True, max_length=max_length, return_token_type_ids=True + ) if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0: logger.info( "Attention! you are cropping tokens (swag task is ok). " diff --git a/src/transformers/data/processors/squad.py b/src/transformers/data/processors/squad.py index e6e6a589a199..0f7bc54b382a 100644 --- a/src/transformers/data/processors/squad.py +++ b/src/transformers/data/processors/squad.py @@ -139,6 +139,7 @@ def squad_convert_example_to_features(example, max_seq_length, doc_stride, max_q pad_to_max_length=True, stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens, truncation_strategy="only_second" if tokenizer.padding_side == "right" else "only_first", + return_token_type_ids=True, ) paragraph_len = min(