Skip to content

Commit

Permalink
remove ipdb traces
Browse files Browse the repository at this point in the history
Signed-off-by: Nithin Rao Koluguri <nithinraok>
  • Loading branch information
Nithin Rao Koluguri committed Oct 31, 2024
1 parent 8a815be commit 22d677f
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 14 deletions.
15 changes: 5 additions & 10 deletions examples/asr/transcribe_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,9 @@
model_path: path to .nemo ASR checkpoint
pretrained_name: name of pretrained ASR model (from NGC registry)
audio_dir: path to directory with audio files
dataset_manifest: path to dataset JSON manifest file (in NeMo format)
timestamps: Bool to request greedy time stamp information (if the model supports it)
dataset_manifest: path to dataset JSON manifest file (in NeMo formats
compute_langs: Bool to request language ID information (if the model supports it)
(Optionally: You can limit the type of timestamp computations using below overrides)
ctc_decoding.ctc_timestamp_type="all" # (default all, can be [all, char, word])
rnnt_decoding.rnnt_timestamp_type="all" # (default all, can be [all, char, word])
timestamps: Bool to request greedy time stamp information (if the model supports it) by default None
(Optionally: You can limit the type of timestamp computations using below overrides)
ctc_decoding.ctc_timestamp_type="all" # (default all, can be [all, char, word, segment])
Expand Down Expand Up @@ -136,10 +131,10 @@ class TranscriptionConfig:
random_seed: Optional[int] = None # seed number going to be used in seed_everything()

# Set to True to output greedy timestamp information (only supported models) and returns full alignment hypotheses
timestamps: bool = False
timestamps: Optional[bool] = None

# Set to False to return text instead of hypotheses from the transcribe function, so as to save memory
return_hypotheses: bool = True
# Set to True to return hypotheses instead of text from the transcribe function
return_hypotheses: bool = False

# Set to True to output language ID information
compute_langs: bool = False
Expand Down
7 changes: 3 additions & 4 deletions nemo/collections/asr/models/aed_multitask_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ def transcribe(
channel_selector: Optional[ChannelSelectorType] = None,
augmentor: DictConfig = None,
verbose: bool = True,
timestamps: bool = False,
timestamps: Optional[bool] = None,
override_config: Optional[MultiTaskTranscriptionConfig] = None,
**prompt,
) -> Union[List[str], List[Hypothesis]]:
Expand All @@ -473,7 +473,8 @@ def transcribe(
num_workers: (int) number of workers for DataLoader
channel_selector (int | Iterable[int] | str): select a single channel or a subset of channels from multi-channel audio. If set to `'average'`, it performs averaging across channels. Disabled if set to `None`. Defaults to `None`.
augmentor: (DictConfig): Augment audio samples during transcription if augmentor is applied.
timestamps: Bool: whether to provide timestamps along with the transcriptions, currently its not supported for AED models.
timestamps: Optional(Bool): timestamps will be returned if set to True as part of hypothesis object (output.timestep['segment']/output.timestep['word']). Refer to `Hypothesis` class for more details. Default is None and would retain the previous state set by using self.change_decoding_strategy().
Note: Currently its not supported for AED models.
verbose: (bool) whether to display tqdm progress bar
override_config: (Optional[MultiTaskTranscriptionConfig]) A config to override the default config.
**prompt: Optional input to construct the prompts for the model. Accepted formats are: 1) legacy Canary-1B API source_lang=<lang>, target_lang=<lang>, etc. 2) explicit single-turn role=<role>, slots={<slot>: <value>, ...} 3) explicit multi-turn: turns=[{"role": <role>, "slots": {<slot>: <value>, ...}}]
Expand Down Expand Up @@ -501,9 +502,7 @@ def transcribe(
f"but got {type(override_config)}"
)
trcfg = override_config
import pdb

pdb.set_trace()
return super().transcribe(audio=audio, override_config=trcfg)

def _setup_dataloader_from_config(self, config: Optional[Dict]):
Expand Down

0 comments on commit 22d677f

Please sign in to comment.