Skip to content

Commit

Permalink
Merge branch 'main' into prompt-learning-pipeline-parallel
Browse files Browse the repository at this point in the history
  • Loading branch information
vadam5 committed Jun 2, 2022
2 parents 4f17803 + f6936ce commit 1790ea0
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 5 deletions.
5 changes: 4 additions & 1 deletion docs/source/asr/data/benchmark_en.csv
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,13 @@ stt_en_contextnet_1024,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models
stt_en_conformer_ctc_small,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_small"
stt_en_conformer_ctc_medium,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_medium"
stt_en_conformer_ctc_large,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_large"
stt_en_conformer_ctc_xlarge,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_xlarge"
stt_en_conformer_ctc_small_ls,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_small_ls"
stt_en_conformer_ctc_medium_ls,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_medium_ls"
stt_en_conformer_ctc_large_ls,EncDecCTCModelBPE,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_large_ls"
stt_en_conformer_transducer_large_ls,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_large_ls"
stt_en_conformer_transducer_small,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_small"
stt_en_conformer_transducer_medium,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_medium"
stt_en_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_large"
stt_en_conformer_transducer_large,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_large"
stt_en_conformer_transducer_xlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xlarge"
stt_en_conformer_transducer_xxlarge,EncDecRNNTBPEModel,"https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge"
7 changes: 6 additions & 1 deletion examples/asr/transcribe_speech.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import json
import os
from dataclasses import dataclass, is_dataclass
from pathlib import Path
from typing import Optional

import pytorch_lightning as pl
Expand Down Expand Up @@ -158,6 +159,7 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
logging.error(f"The input dataset_manifest {cfg.dataset_manifest} is empty. Exiting!")
return None

manifest_dir = Path(cfg.dataset_manifest).parent
with open(cfg.dataset_manifest, 'r') as f:
has_two_fields = []
for line in f:
Expand All @@ -166,7 +168,10 @@ def main(cfg: TranscriptionConfig) -> TranscriptionConfig:
has_two_fields.append(True)
else:
has_two_fields.append(False)
filepaths.append(item['audio_filepath'])
audio_file = Path(item['audio_filepath'])
if not audio_file.is_file() and not audio_file.is_absolute():
audio_file = manifest_dir / audio_file
filepaths.append(str(audio_file.absolute()))
partial_audio = all(has_two_fields)

logging.info(f"\nTranscribing {len(filepaths)} files...\n")
Expand Down
7 changes: 7 additions & 0 deletions nemo/collections/asr/models/ctc_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,13 @@ def list_available_models(cls) -> Optional[PretrainedModelInfo]:
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_en_conformer_ctc_xlarge",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_xlarge",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_conformer_ctc_xlarge/versions/1.10.0/files/stt_en_conformer_ctc_xlarge.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_en_conformer_ctc_small_ls",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_ctc_small_ls",
Expand Down
14 changes: 14 additions & 0 deletions nemo/collections/asr/models/rnnt_bpe_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,20 @@ def list_available_models(cls) -> List[PretrainedModelInfo]:
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_en_conformer_transducer_xlarge",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xlarge",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_conformer_transducer_xlarge/versions/1.10.0/files/stt_en_conformer_transducer_xlarge.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_en_conformer_transducer_xxlarge",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xxlarge",
location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_conformer_transducer_xxlarge/versions/1.8.0/files/stt_en_conformer_transducer_xxlarge.nemo",
)
results.append(model)

model = PretrainedModelInfo(
pretrained_model_name="stt_de_contextnet_1024",
description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_de_contextnet_1024",
Expand Down
16 changes: 15 additions & 1 deletion nemo/collections/common/parts/preprocessing/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import json
from os.path import expanduser
from pathlib import Path
from typing import Any, Callable, Dict, Iterator, List, Optional, Union


Expand Down Expand Up @@ -87,7 +88,20 @@ def __parse_item(line: str, manifest_file: str) -> Dict[str, Any]:
raise ValueError(
f"Manifest file {manifest_file} has invalid json line structure: {line} without proper audio file key."
)
item['audio_file'] = expanduser(item['audio_file'])

# If the audio path is relative, and not using tarred dataset,
# attach the parent directory of manifest to the audio path.
# Assume "audio_file" starts with a dir, such as "wavs/xxxxx.wav".
# If using a tarred dataset, the "audio_path" is like "_home_data_tarred_wavs_xxxx.wav",
# so we will just ignore it.
manifest_dir = Path(manifest_file).parent
audio_file = Path(item['audio_file'])
if not audio_file.is_file() and not audio_file.is_absolute() and audio_file.parent != Path("."):
# assume the wavs/ dir and manifest are under the same parent dir
audio_file = manifest_dir / audio_file
item['audio_file'] = str(audio_file.absolute())
else:
item['audio_file'] = expanduser(item['audio_file'])

# Duration.
if 'duration' not in item:
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/tts/models/fastpitch.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,7 +463,7 @@ def __setup_dataloader_from_config(self, cfg, shuffle_should_be: bool = True, na
cfg.dataloader_params.shuffle = True
elif not cfg.dataloader_params.shuffle:
logging.error(f"The {name} dataloader for {self} has shuffle set to False!!!")
elif not shuffle_should_be and cfg.dataloader_params.shuffle:
elif cfg.dataloader_params.shuffle:
logging.error(f"The {name} dataloader for {self} has shuffle set to True!!!")

if cfg.dataset._target_ == "nemo.collections.tts.torch.data.TTSDataset":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import json
import os
import pickle
from pathlib import Path

import editdistance
import kenlm_utils
Expand Down Expand Up @@ -230,12 +231,16 @@ def main():
)

target_transcripts = []
manifest_dir = Path(args.input_manifest).parent
with open(args.input_manifest, 'r') as manifest_file:
audio_file_paths = []
for line in tqdm(manifest_file, desc=f"Reading Manifest {args.input_manifest} ...", ncols=120):
data = json.loads(line)
audio_file = Path(data['audio_filepath'])
if not audio_file.is_file() and not audio_file.is_absolute():
audio_file = manifest_dir / audio_file
target_transcripts.append(data['text'])
audio_file_paths.append(data['audio_filepath'])
audio_file_paths.append(str(audio_file.absolute()))

if args.probs_cache_file and os.path.exists(args.probs_cache_file):
logging.info(f"Found a pickle file of probabilities at '{args.probs_cache_file}'.")
Expand Down

0 comments on commit 1790ea0

Please sign in to comment.