disable ctc modality adapter

Signed-off-by: andrusenkoau <andrusenkoau@gmail.com>
andrusenkoau · Sep 11, 2024 · e6c6eb4 · e6c6eb4
1 parent f75a8f7
commit e6c6eb4
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 2 deletions.
diff --git a/nemo/collections/multimodal/speech_llm/models/modular_models.py b/nemo/collections/multimodal/speech_llm/models/modular_models.py
@@ -519,7 +519,8 @@ def loss_func(output_tensor):
                 cp_size = self.cfg.get('context_parallel_size', 1)
 
                 # compute ctc loss
-                ctc_encoded, ctc_encoded_len = self.perception.ctc_modality_adapter(audio_signal=audio_encoder_outputs[0], length=audio_encoder_outputs[1])
+                # ctc_encoded, ctc_encoded_len = self.perception.ctc_modality_adapter(audio_signal=audio_encoder_outputs[0], length=audio_encoder_outputs[1])
+                ctc_encoded, ctc_encoded_len = audio_encoder_outputs[0], audio_encoder_outputs[1]
                 ctc_log_probs = self.perception.ctc_decoder(encoder_output=ctc_encoded)
                 ctc_input_lengths = ctc_encoded_len
                 # ctc_log_probs = self.ctc_decoder(encoder_output=audio_encoder_outputs[0])

diff --git a/nemo/collections/multimodal/speech_llm/modules/perception_modules.py b/nemo/collections/multimodal/speech_llm/modules/perception_modules.py
@@ -102,7 +102,7 @@ def __init__(self, cfg: DictConfig):
             raise ValueError(
                 "The config need to have a section for the CTC decoder named as aux_ctc for Hybrid models."
             )
-        self.ctc_modality_adapter = self.from_config_dict(cfg.aux_ctc.modality_adapter)
+        # self.ctc_modality_adapter = self.from_config_dict(cfg.aux_ctc.modality_adapter)
 
         self.cfg.aux_ctc.decoder.vocabulary = [1]*self.cfg.aux_ctc.decoder.num_classes
         # self.cfg.aux_ctc.decoder.num_classes = len(ctc_tokenizer.vocab)