Merge branch 'main' into format_fix

tbartley94 · Jul 26, 2024 · 53b40b9 · 53b40b9
2 parents 3c9754f + 940bdb3
commit 53b40b9
Show file tree

Hide file tree

Showing 2 changed files with 1 addition and 3 deletions.
diff --git a/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py b/nemo/collections/nlp/parts/mixins/nlp_adapter_mixins.py
@@ -223,8 +223,6 @@ def add_adapter(self, peft_cfgs: Union[PEFTConfig, List[PEFTConfig]]):
             peft_cfgs: One or more PEFTConfig objects that specify the PEFT method configuration
         """
 
-        if self.cfg.get('virtual_pipeline_model_parallel_size', None):
-            raise ValueError('Virtual pipeline model parallel is not supported when using PEFT')
         if self.cfg.optim.name == "distributed_fused_adam":
             raise ValueError('distributed_fused_adam is not supported for PEFT. Please use fused_adam')
 

diff --git a/tutorials/llm/mamba/mamba.rst b/tutorials/llm/mamba/mamba.rst
@@ -75,7 +75,7 @@ Convert the Pytorch Checkpoint to a NeMo Checkpoint
                                     --output_path <path to target .nemo model> \
                                     --mamba_ssm_ngroups 8 \
                                     --precision bf16 \
-                                    --tokenizer_path=<path to tokenizer.model>
+                                    --tokenizer_model_dir=<path to tokenizer.model> # Remove this line (or set it to None) for 130m, 370m, 780m, 1.3b, and 2.7b models.
                                     
 
 * Note: the ``mamba_ssm_ngroups`` parameter should be 1 for the Mamba2 models from the `Transformers are SSMs paper <https://arxiv.org/pdf/2405.21060>`__ (130m, 370m, 780m, 1.3b, and 2.7b) and 8 for the Mamba2 and Mamba2-Hybrid models by `NVIDIA <https://arxiv.org/pdf/2406.07887>`__ (both 8b).