Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
Signed-off-by: Evelina <ebakhturina@nvidia.com>
  • Loading branch information
ekmb committed Jun 1, 2023
2 parents a031e9b + 2b6777f commit 9a19637
Show file tree
Hide file tree
Showing 10 changed files with 57 additions and 11 deletions.
25 changes: 24 additions & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -3407,7 +3407,30 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"'''
trainer.num_nodes=1"
}
}

stage('L2: Megatron GPT SFT Eval (inference seq len > training seq len)') {
when {
anyOf {
branch 'main'
changeRequest target: 'main'
}
}
failFast true
steps{
sh "python examples/nlp/language_modeling/tuning/megatron_gpt_peft_eval.py \
model.restore_from_path=/home/TestData/nlp/megatron_gpt_sft/megatron_gpt_rope_sft.nemo \
model.peft.restore_from_path=null \
model.data.test_ds.file_names=['/home/TestData/nlp/megatron_gpt_sft/sample.jsonl'] \
model.data.test_ds.names=['test'] \
model.data.test_ds.global_batch_size=1 \
model.data.test_ds.micro_batch_size=1 \
model.data.test_ds.tokens_to_generate=30 \
model.data.test_ds.max_seq_length=6000 \
inference.greedy=True \
inference.repetition_penalty=1.0 \
inference.outfile_path='examples/nlp/language_modeling/out.jsonl' && \
rm -rf examples/nlp/language_modeling/out.jsonl"
}
}
stage('L2: Megatron GPT Prompt Tuning TP1 PP1') {
when {
anyOf {
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/language_modeling/megatron_gpt_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def main(cfg) -> None:
print(response)
print("***************************")

# Second method of running text generation, call trainer.predict
# Second method of running text generation, call trainer.predict [recommended]
ds = RequestDataSet(OmegaConf.to_container(cfg.prompts))
request_dl = DataLoader(dataset=ds, batch_size=2)
config = OmegaConf.to_container(cfg.inference)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,4 +127,4 @@ inference:
repetition_penalty: 1.2 # The parameter for repetition penalty. 1.0 means no penalty.
min_tokens_to_generate: 0 # The minimum length of the sequence to be generated.
compute_logprob: False # a flag used to compute logprob of all the input text, a very special case of running inference, default False
outfile_path: /home/adithyare/exp/foo.txt
outfile_path: output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ exp_manager:
monitor: validation_${model.data.validation_ds.metric.name}
save_top_k: 2
mode: max
save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below,
save_nemo_on_train_end: False
filename: 'megatron_gpt_sft--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}-{consumed_samples}'
model_parallel_size: ${model.tensor_model_parallel_size}
save_best_model: True
Expand Down
7 changes: 7 additions & 0 deletions examples/nlp/language_modeling/tuning/megatron_gpt_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ def _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False):
gpt_cfg.hidden_dropout = cfg.model.get('hidden_dropout', 0.0)
gpt_cfg.attention_dropout = cfg.model.get('attention_dropout', 0.0)
gpt_cfg.ffn_dropout = cfg.model.ffn_dropout
sft_cls = MegatronGPTSFTModel
gpt_cfg.target = f"{sft_cls.__module__}.{sft_cls.__name__}"

# This is needed when modifying a hparam file directly to load `.ckpt` files.
# This is not needed to modify the cfg in `.nemo` files.
Expand Down Expand Up @@ -167,6 +169,10 @@ def main(cfg) -> None:

trainer._checkpoint_connector = CheckpointConnector(trainer, resume_from_checkpoint=resume_from_checkpoint)

# hydra interpolation does not work here as the interpolation key is lost when PTL saves hparams
with open_dict(cfg):
cfg.model.precision = cfg.trainer.precision

if cfg.model.restore_from_path:
save_restore_connector = NLPSaveRestoreConnector()
if os.path.isdir(cfg.model.restore_from_path):
Expand All @@ -177,6 +183,7 @@ def main(cfg) -> None:
return_config=True,
save_restore_connector=save_restore_connector,
)
gpt_cfg = _modify_config(gpt_cfg, cfg, add_cfg_to_tree=False)
model = load_from_nemo(MegatronGPTSFTModel, cfg, trainer, gpt_cfg, modify_confg_fn=_modify_config)
else:
validate_checkpoint_loading_args(cfg.model.pretrained_checkpoint)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def _process_example(self, example):
tokenized_text = pre_pad + self.tokenizer.text_to_ids(text)
context_ids = pre_pad + self.tokenizer.text_to_ids(context)
answer_ids = tokenized_text[len(context_ids) :]
total_ids = len(context_ids) + max(len(answer_ids), self.tokens_to_generate)
total_ids = len(context_ids) + len(answer_ids)
if self.add_bos:
total_ids += 1
if self.add_sep:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,13 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: Optional[int]
return compute_prob_response
else:
del inference_config['compute_logprob']
inference_config['inputs'] = (batch['contexts'].cuda(), batch['context_lengths'].cuda())

# for megatron_gpt_eval.py
if isinstance(batch, list):
inference_config['inputs'] = batch
else:
# peft_eval.py
inference_config['inputs'] = (batch['contexts'].cuda(), batch['context_lengths'].cuda())
return generate(self, **inference_config)

def write_predictions_to_file(self, outputs, output_file_path_prefix):
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/nlp/modules/common/megatron/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def forward(self, *inputs, **kwargs):
if getattr(self.module, 'pre_process', True):
inputs = fp32_to_float16(inputs, self.float16_converter)
outputs = self.module(*inputs, **kwargs)
if parallel_state.is_pipeline_last_stage():
if parallel_state.is_pipeline_last_stage() and self.training:
outputs = float16_to_fp32(outputs)
return outputs

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,11 @@ def __init__(self, model):

def clip_max_len(self, maxlen: int) -> int:
""" clip the max len based on the LM model max sequence length"""
if maxlen > self.model.cfg.encoder_seq_length + 1:
maxlen = self.model.cfg.encoder_seq_length + 1

# for positional embedding types that allow length extrapolation, don't clip the max length
if self.model.cfg.get("position_embedding_type", "learned_absolute") == "learned_absolute":
if maxlen > self.model.cfg.encoder_seq_length + 1:
maxlen = self.model.cfg.encoder_seq_length + 1
return maxlen

def init_batch(self, context_tokens: torch.Tensor, context_length: int):
Expand Down
11 changes: 9 additions & 2 deletions nemo/collections/nlp/modules/common/text_generation_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,11 @@ def switch(val1, val2, boolean):
return (1 - boolean) * val1 + boolean * val2


def _convert_to_float(model):
# enable conversion to float when inference is done via model.generate() and PP > 1 (could results in larger memory consumption)
return model.cfg.get('pipeline_model_parallel_size', 1) > 1 and model._inference_config is None


def sample_sequence_batch(
model,
inference_strategy,
Expand Down Expand Up @@ -667,10 +672,12 @@ def sample_sequence_batch(
output = inference_strategy.forward_step(batch, tensor_shape)

if parallel_state.is_pipeline_last_stage():
output = output[0]['logits'].float()
output = output[0]['logits']
if _convert_to_float(model):
output = output.float()

output = tensor_parallel.gather_from_tensor_model_parallel_region(output)
assert output is not None
output = output.float()
logits = output[:, -1].view(batch_size, -1).contiguous()

# make sure it will generate at least min_length
Expand Down

0 comments on commit 9a19637

Please sign in to comment.