Skip to content

Commit

Permalink
Force the return of token type IDs (#3439)
Browse files Browse the repository at this point in the history
  • Loading branch information
LysandreJik authored Mar 26, 2020
1 parent 010e046 commit ffcffeb
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
4 changes: 3 additions & 1 deletion examples/utils_multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,9 @@ def convert_examples_to_features(
else:
text_b = example.question + " " + ending

inputs = tokenizer.encode_plus(text_a, text_b, add_special_tokens=True, max_length=max_length,)
inputs = tokenizer.encode_plus(
text_a, text_b, add_special_tokens=True, max_length=max_length, return_token_type_ids=True
)
if "num_truncated_tokens" in inputs and inputs["num_truncated_tokens"] > 0:
logger.info(
"Attention! you are cropping tokens (swag task is ok). "
Expand Down
1 change: 1 addition & 0 deletions src/transformers/data/processors/squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ def squad_convert_example_to_features(example, max_seq_length, doc_stride, max_q
pad_to_max_length=True,
stride=max_seq_length - doc_stride - len(truncated_query) - sequence_pair_added_tokens,
truncation_strategy="only_second" if tokenizer.padding_side == "right" else "only_first",
return_token_type_ids=True,
)

paragraph_len = min(
Expand Down

0 comments on commit ffcffeb

Please sign in to comment.