diff --git a/nemo/collections/nlp/parts/megatron_trainer_builder.py b/nemo/collections/nlp/parts/megatron_trainer_builder.py index e1a780f09756..03cf5fb755bd 100644 --- a/nemo/collections/nlp/parts/megatron_trainer_builder.py +++ b/nemo/collections/nlp/parts/megatron_trainer_builder.py @@ -145,7 +145,7 @@ def _plugins(self) -> list: use_dist_ckpt = not self.cfg.model.get('fsdp', False) and ( self.cfg.model.get('mcore_gpt', False) or self.cfg.model.get('mcore_bert', False) ) - async_save = self.cfg.exp_manager.checkpoint_callback_params.get('async_save', False) + async_save = self.cfg.exp_manager.get('checkpoint_callback_params', {}).get('async_save', False) if use_dist_ckpt: checkpoint_io = DistributedCheckpointIO.from_config(self.cfg.model, async_save) if async_save: @@ -170,7 +170,7 @@ def _callbacks(self, callbacks: Optional[list]) -> list: if 'enable_progress_bar' not in self.cfg.trainer or self.cfg.trainer.enable_progress_bar: callbacks.append(CustomProgressBar()) - if self.cfg.exp_manager.checkpoint_callback_params.get('async_save', False): + if self.cfg.exp_manager.get('checkpoint_callback_params', {}).get('async_save', False): callbacks.append(AsyncFinalizerCallback()) return callbacks