diff --git a/nemo/collections/multimodal/modules/speechllm_perception.py b/nemo/collections/multimodal/modules/speechllm_perception.py index 3745c5971c73..a656646bac63 100644 --- a/nemo/collections/multimodal/modules/speechllm_perception.py +++ b/nemo/collections/multimodal/modules/speechllm_perception.py @@ -68,7 +68,10 @@ def __init__(self, cfg: DictConfig): # Initialize components self.preprocessor = self.from_config_dict(cfg.preprocessor) self.encoder = self.from_config_dict(cfg.encoder) - self.spec_augmentation = self.from_config_dict(cfg.spec_augment) + if 'spec_augment' in cfg and cfg.spec_augment is not None: + self.spec_augmentation = self.from_config_dict(cfg.spec_augment) + else: + self.spec_augmentation = None self.matcher = self.from_config_dict(cfg.matcher) self.proj = nn.Linear(cfg.matcher.d_model, cfg.output_dim)