remove ipdb traces

Signed-off-by: Nithin Rao Koluguri <nithinraok>
NVIDIA · Oct 31, 2024 · 22d677f · 22d677f
1 parent 8a815be
commit 22d677f
Show file tree

Hide file tree

Showing 2 changed files with 8 additions and 14 deletions.
diff --git a/examples/asr/transcribe_speech.py b/examples/asr/transcribe_speech.py
@@ -47,14 +47,9 @@
   model_path: path to .nemo ASR checkpoint
   pretrained_name: name of pretrained ASR model (from NGC registry)
   audio_dir: path to directory with audio files
-  dataset_manifest: path to dataset JSON manifest file (in NeMo format)
-
-  timestamps: Bool to request greedy time stamp information (if the model supports it)
+  dataset_manifest: path to dataset JSON manifest file (in NeMo formats
   compute_langs: Bool to request language ID information (if the model supports it)
-
-  (Optionally: You can limit the type of timestamp computations using below overrides)
-  ctc_decoding.ctc_timestamp_type="all"  # (default all, can be [all, char, word])
-  rnnt_decoding.rnnt_timestamp_type="all"  # (default all, can be [all, char, word])
+  timestamps: Bool to request greedy time stamp information (if the model supports it) by default None 
 
   (Optionally: You can limit the type of timestamp computations using below overrides)
   ctc_decoding.ctc_timestamp_type="all"  # (default all, can be [all, char, word, segment])
@@ -136,10 +131,10 @@ class TranscriptionConfig:
     random_seed: Optional[int] = None  # seed number going to be used in seed_everything()
 
     # Set to True to output greedy timestamp information (only supported models) and returns full alignment hypotheses
-    timestamps: bool = False
+    timestamps: Optional[bool] = None
 
-    # Set to False to return text instead of hypotheses from the transcribe function, so as to save memory
-    return_hypotheses: bool = True
+    # Set to True to return hypotheses instead of text from the transcribe function
+    return_hypotheses: bool = False
 
     # Set to True to output language ID information
     compute_langs: bool = False

diff --git a/nemo/collections/asr/models/aed_multitask_models.py b/nemo/collections/asr/models/aed_multitask_models.py
@@ -455,7 +455,7 @@ def transcribe(
         channel_selector: Optional[ChannelSelectorType] = None,
         augmentor: DictConfig = None,
         verbose: bool = True,
-        timestamps: bool = False,
+        timestamps: Optional[bool] = None,
         override_config: Optional[MultiTaskTranscriptionConfig] = None,
         **prompt,
     ) -> Union[List[str], List[Hypothesis]]:
@@ -473,7 +473,8 @@ def transcribe(
             num_workers: (int) number of workers for DataLoader
             channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`.
             augmentor: (DictConfig): Augment audio samples during transcription if augmentor is applied.
-            timestamps: Bool: whether to provide timestamps along with the transcriptions, currently its not supported for AED models.
+            timestamps: Optional(Bool): timestamps will be returned if set to True as part of hypothesis object (output.timestep['segment']/output.timestep['word']). Refer to `Hypothesis` class for more details. Default is None and would retain the previous state set by using self.change_decoding_strategy(). 
+            Note: Currently its not supported for AED models.
             verbose: (bool) whether to display tqdm progress bar
             override_config: (Optional[MultiTaskTranscriptionConfig]) A config to override the default config.
             **prompt: Optional input to construct the prompts for the model. Accepted formats are: 1) legacy Canary-1B API source_lang=<lang>, target_lang=<lang>, etc. 2) explicit single-turn role=<role>, slots={<slot>: <value>, ...} 3) explicit multi-turn: turns=[{"role": <role>, "slots": {<slot>: <value>, ...}}]
@@ -501,9 +502,7 @@ def transcribe(
                     f"but got {type(override_config)}"
                 )
             trcfg = override_config
-        import pdb
 
-        pdb.set_trace()
         return super().transcribe(audio=audio, override_config=trcfg)
 
     def _setup_dataloader_from_config(self, config: Optional[Dict]):