Add lhotse fixes for rnnt model training and WER hanging issue with f… (

#10821) * Add lhotse fixes for rnnt model training and WER hanging issue with f… (#10787) * Add lhotse fixes for rnnt model training and WER hanging issue with fuse batching Signed-off-by: Nithin Rao Koluguri <nithinraok> * Apply isort and black reformatting Signed-off-by: nithinraok <nithinraok@users.noreply.github.com> --------- Signed-off-by: Nithin Rao Koluguri <nithinraok> Signed-off-by: nithinraok <nithinraok@users.noreply.github.com> Co-authored-by: Nithin Rao Koluguri <nithinraok> Co-authored-by: nithinraok <nithinraok@users.noreply.github.com> * Apply isort and black reformatting Signed-off-by: nithinraok <nithinraok@users.noreply.github.com> * Apply isort and black reformatting Signed-off-by: artbataev <artbataev@users.noreply.github.com> --------- Signed-off-by: Nithin Rao Koluguri <nithinraok> Signed-off-by: nithinraok <nithinraok@users.noreply.github.com> Signed-off-by: artbataev <artbataev@users.noreply.github.com> Co-authored-by: nithinraok <nithinraok@users.noreply.github.com> Co-authored-by: artbataev <artbataev@users.noreply.github.com>
NVIDIA · Oct 20, 2024 · ade44dc · ade44dc
1 parent 153e067
commit ade44dc
Show file tree

Hide file tree

Showing 4 changed files with 15 additions and 11 deletions.
diff --git a/nemo/collections/asr/data/audio_to_text_lhotse.py b/nemo/collections/asr/data/audio_to_text_lhotse.py
@@ -51,15 +51,12 @@ def __init__(self, tokenizer):
     def __getitem__(self, cuts) -> Tuple[torch.Tensor, ...]:
         audio, audio_lens, cuts = self.load_audio(cuts)
         tokens = [
-            torch.as_tensor(
-                sum(
-                    (
-                        # Supervisions may come pre-tokenized from the dataloader.
-                        s.tokens if hasattr(s, "tokens") else self.tokenizer(s.text, s.language)
-                        for s in c.supervisions
-                    ),
-                    start=[],
-                )
+            torch.cat(
+                [
+                    torch.as_tensor(s.tokens if hasattr(s, "tokens") else self.tokenizer(s.text, s.language))
+                    for s in c.supervisions
+                ],
+                dim=0,
             )
             for c in cuts
         ]

diff --git a/nemo/collections/asr/metrics/wer.py b/nemo/collections/asr/metrics/wer.py
@@ -254,8 +254,9 @@ def __init__(
         fold_consecutive=True,
         batch_dim_index=0,
         dist_sync_on_step=False,
+        sync_on_compute=True,
     ):
-        super().__init__(dist_sync_on_step=dist_sync_on_step)
+        super().__init__(dist_sync_on_step=dist_sync_on_step, sync_on_compute=sync_on_compute)
 
         self.decoding = decoding
         self.use_cer = use_cer

diff --git a/nemo/collections/asr/modules/rnnt.py b/nemo/collections/asr/modules/rnnt.py
@@ -1457,6 +1457,10 @@ def forward(
                     sub_transcripts = sub_transcripts.detach()
 
                     # Update WER on each process without syncing
+                    if self.training:
+                        original_sync = self.wer._to_sync
+                        self.wer._to_sync = False
+
                     self.wer.update(
                         predictions=sub_enc,
                         predictions_lengths=sub_enc_lens,
@@ -1467,6 +1471,9 @@ def forward(
                     wer, wer_num, wer_denom = self.wer.compute()
                     self.wer.reset()
 
+                    if self.training:
+                        self.wer._to_sync = original_sync
+
                     wers.append(wer)
                     wer_nums.append(wer_num)
                     wer_denoms.append(wer_denom)

diff --git a/nemo/collections/common/data/lhotse/dataloader.py b/nemo/collections/common/data/lhotse/dataloader.py
@@ -317,7 +317,6 @@ def get_lhotse_dataloader_from_config(
             duration_bins=determine_bucket_duration_bins(config),
             num_cuts_for_bins_estimate=config.num_cuts_for_bins_estimate,
             buffer_size=config.bucket_buffer_size,
-            concurrent=config.concurrent_bucketing,
             rank=0 if is_tarred else global_rank,
             world_size=1 if is_tarred else world_size,
         )