Skip to content

Commit

Permalink
Fix bugs (NVIDIA#5036) (NVIDIA#5039)
Browse files Browse the repository at this point in the history
Co-authored-by: Zhilin Wang <wangzhilin12061996@hotmail.com>
Signed-off-by: Hainan Xu <hainanx@nvidia.com>
  • Loading branch information
2 people authored and Hainan Xu committed Nov 29, 2022
1 parent fe34bed commit 57b2df3
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 4 deletions.
1 change: 1 addition & 0 deletions examples/nlp/question_answering/conf/qa_conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ trainer:
num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
enable_checkpointing: False # provided by exp_manager
logger: False # provided by exp_manager
strategy: ddp

model:
tensor_model_parallel_size: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,6 @@ def generate_candidates(self, labels, template_length, input_ids, attn_masks):
for i in range(input_ids.size(0)):
param_dict = {
"input_ids": input_ids[i : i + 1, : template_length[i]],
"attention_masks": attn_masks[i : i + 1, : template_length[i]],
"max_length": template_length[i] + tokens_to_generate,
"pad_token_id": self.tokenizer.tokenizer.pad_token_id,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ def generate_candidates(self, labels, template_length, input_ids, attn_masks):
for i in range(input_ids.size(0)):
param_dict = {
"input_ids": input_ids[i : i + 1, : template_length[i]],
"attention_masks": attn_masks[i : i + 1, : template_length[i]],
"max_length": template_length[i] + tokens_to_generate,
"pad_token_id": self.tokenizer.tokenizer.pad_token_id,
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,10 @@ def _generate_candidates(self, input_ids, input_attn_mask, training_mask_end):
for i in range(input_ids.size(0)):
param_dict = {
"input_ids": input_ids[i : i + 1, : training_mask_end[i]],
"attention_masks": input_attn_mask[i : i + 1, : training_mask_end[i]],
"max_length": training_mask_end[i] + num_tokens_to_generate,
"pad_token_id": self.tokenizer.tokenizer.pad_token_id,
}
generated_token_ids.append(self.language_model.generate(**param_dict, skip_special_tokens=True))
generated_token_ids.append(self.language_model.generate(**param_dict))
max_length = max(max_length, generated_token_ids[-1].size(1))

# pad each generated to ensure they are of same length in dim 1, therefore stack-able
Expand Down

0 comments on commit 57b2df3

Please sign in to comment.