diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py
index eede54aafc2c..293b8a3f5bce 100644
--- a/nemo/collections/nlp/parts/nlp_overrides.py
+++ b/nemo/collections/nlp/parts/nlp_overrides.py
@@ -426,7 +426,7 @@ def remove_checkpoint(self, filepath: Union[str, Path]) -> None:
             and self.lightning_module.sharded_state_dict() is not None
         ):
             if self.is_global_zero:
-                shutil.rmtree(ckpt_to_dir(filepath))
+                shutil.rmtree(ckpt_to_dir(filepath), ignore_errors=True)
 
         # legacy checkpoint logic, does not use megatron core
         else:
diff --git a/nemo/utils/callbacks/nemo_model_checkpoint.py b/nemo/utils/callbacks/nemo_model_checkpoint.py
index 87fe1d0a2e06..a290152907db 100644
--- a/nemo/utils/callbacks/nemo_model_checkpoint.py
+++ b/nemo/utils/callbacks/nemo_model_checkpoint.py
@@ -227,7 +227,7 @@ def _del_model_without_trainer(self, filepath: str) -> None:
             if is_global_rank_zero():
                 try:
                     dist_ckpt = ckpt_to_dir(filepath)
-                    shutil.rmtree(dist_ckpt)
+                    shutil.rmtree(dist_ckpt, ignore_errors=True)
                     logging.info(f"Removed distributed checkpoint: {dist_ckpt}")
                 except:
                     logging.info(f"Tried to remove distributed checkpoint: {dist_ckpt} but failed.")