From 5d5fedd552e74095bd5b31d9683119ac400f998c Mon Sep 17 00:00:00 2001 From: Zhilin Wang Date: Tue, 15 Feb 2022 10:18:29 -0800 Subject: [PATCH 1/5] Dialogue state tracking refactor/ SGDGEN patch 2 (#3674) * refactor dialogue state tracking for modelling/dataset interoperability Signed-off-by: Zhilin Wang * fix style changes Signed-off-by: Zhilin Wang * fix typo Signed-off-by: Zhilin Wang * fix style raised by lgtm Signed-off-by: Zhilin Wang * fix style formatting Signed-off-by: Zhilin Wang * update template to include description of intent Signed-off-by: Zhilin Wang * update Jenkinsfile Signed-off-by: Zhilin Wang * changes based on requests in review Signed-off-by: Zhilin Wang * add compatibility with assistant dataset Signed-off-by: Zhilin Wang * update Jenkins Signed-off-by: Zhilin Wang * remove dialogue_state_tracking Signed-off-by: Zhilin Wang * update huggingface utils for dialogue Signed-off-by: Zhilin Wang * rename dialogue_state_tracking_hybrid to dialogue_state_tracking_sgdqa Signed-off-by: Zhilin Wang * style fix Signed-off-by: Zhilin Wang * fix style Signed-off-by: Zhilin Wang * style fix nemo/collections/nlp/models/dialogue_state_tracking_sgdqa/__init__.py Signed-off-by: Zhilin Wang * update Jenkinsfile for SGDGEN Signed-off-by: Zhilin Wang * update Jenkinsfile for SGDGEN Signed-off-by: Zhilin Wang * update Jenkinsfile for SGDGEN Signed-off-by: Zhilin Wang * update Jenkinsfile for SGDGEN Signed-off-by: Zhilin Wang * update Jenkinsfile for SGDGEN Signed-off-by: Zhilin Wang * fix typo Signed-off-by: Zhilin Wang * add docstrings for assistant data processsor Signed-off-by: Zhilin Wang * update Jenkins for SGDGEN local checkpoint Signed-off-by: Zhilin Wang * update style Signed-off-by: Zhilin Wang * use local vocab file for Jenkinsfile Signed-off-by: Zhilin Wang * patch for Jenkins CI using local file Signed-off-by: Zhilin Wang Co-authored-by: Zhilin Wang Co-authored-by: Oleksii Kuchaiev Co-authored-by: Yang Zhang --- Jenkinsfile | 6 +- .../conf/dialogue_config.yaml | 6 +- .../sgd_gen.py | 15 ++- .../dialogue_gpt_model.py | 110 +++++++++++------- 4 files changed, 90 insertions(+), 47 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index a7f6cbd014d7..f5fe167b28f0 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -734,8 +734,10 @@ pipeline { trainer.val_check_interval=0.0 \ trainer.gpus=[0] \ model.dataset.use_cache=false \ - model.tokenizer.special_tokens={pad_token:"endoftext"}\ - model.language_model.pretrained_model_name=gpt2 \ + model.tokenizer.special_tokens={pad_token:"endoftext"} \ + model.tokenizer.tokenizer_name=gpt2 \ + model.tokenizer.vocab_file=/home/TestData/nlp/gpt2/vocab.json\ + model.language_model.pretrained_model_name=/home/TestData/nlp/gpt2 \ trainer.accelerator=ddp \ exp_manager=null && \ rm -rf sgd_gen_outputs' diff --git a/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml b/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml index 1a12803012f8..51a30eb2384e 100644 --- a/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml +++ b/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml @@ -29,10 +29,12 @@ trainer: resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc. num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it checkpoint_callback: False # Provided by exp_manager - logger: False # Provided by exp_manager - + logger: False # Provided by exp_manager + model: + tensor_model_parallel_size: 1 nemo_path: null # filename to save the model and associated artifacts to .nemo file + library: huggingface # huggingface or megatron tokenizer: tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece vocab_file: null # path to vocab file diff --git a/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py b/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py index dd79985d289c..2b33e7a026f8 100644 --- a/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py +++ b/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py @@ -106,8 +106,11 @@ from nemo.collections.nlp.models.dialogue_state_tracking_generative.dialogue_gpt_model import DialogueGPTModel from nemo.collections.nlp.models.dialogue_state_tracking_sgdqa.sgdqa_model import SGDQAModel +from nemo.collections.nlp.modules.common.megatron.megatron_utils import compute_model_parallel_rank +from nemo.collections.nlp.parts.nlp_overrides import NLPDDPPlugin from nemo.core.config import hydra_runner from nemo.utils import logging +from nemo.utils.app_state import AppState from nemo.utils.exp_manager import exp_manager @@ -115,9 +118,17 @@ def main(cfg: DictConfig) -> None: pl.seed_everything(42) logging.info(f'Config: {OmegaConf.to_yaml(cfg)}') - trainer = pl.Trainer(**cfg.trainer) + + plugin = NLPDDPPlugin() + trainer = pl.Trainer(**cfg.trainer, plugins=plugin) + exp_manager(trainer, cfg.get("exp_manager", None)) + app_state = AppState() + if cfg.model.tensor_model_parallel_size > 1: + app_state.model_parallel_size = cfg.model.tensor_model_parallel_size + app_state.model_parallel_rank = compute_model_parallel_rank(trainer.local_rank, app_state.model_parallel_size) + if 'bert' in cfg.model.language_model.pretrained_model_name: model_class = SGDQAModel elif 'gpt' in cfg.model.language_model.pretrained_model_name.lower(): @@ -155,7 +166,7 @@ def main(cfg: DictConfig) -> None: if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.ds_item is not None: gpu = 1 if cfg.trainer.gpus != 0 else 0 - trainer = pl.Trainer(gpus=gpu) + trainer = pl.Trainer(gpus=gpu, plugins=plugin, precision=16) model.setup_multiple_test_data(test_data_config=cfg.model.test_ds) if model.prepare_test(trainer): trainer.test(model) diff --git a/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py b/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py index a489e9573786..4e6b0fd3ca55 100644 --- a/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py +++ b/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py @@ -27,6 +27,7 @@ from omegaconf import DictConfig, OmegaConf from pytorch_lightning import Trainer from torch.utils.data import DataLoader +from transformers import AutoModelWithLMHead from nemo.collections.nlp.data.dialogue_state_tracking_generative import ( DialogueGPTDataset, @@ -37,8 +38,9 @@ DialogueAssistantDataProcessor, ) from nemo.collections.nlp.metrics.classification_report import ClassificationReport +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel from nemo.collections.nlp.models.nlp_model import NLPModel -from nemo.collections.nlp.modules.common.lm_utils import get_lm_model +from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group from nemo.core.classes.common import PretrainedModelInfo from nemo.utils import logging from nemo.utils.get_rank import is_global_rank_zero @@ -49,18 +51,17 @@ class DialogueGPTModel(NLPModel): - def __init__(self, cfg: DictConfig, trainer: Trainer = None): + def __init__( + self, cfg: DictConfig, trainer: Trainer = None, + ): self.data_prepared = False self.setup_tokenizer(cfg.tokenizer) super().__init__(cfg=cfg, trainer=trainer) - self.language_model = get_lm_model( - pretrained_model_name=cfg.language_model.pretrained_model_name, - config_file=self.register_artifact('language_model.config_file', cfg.language_model.config_file), - config_dict=OmegaConf.to_container(cfg.language_model.config) if cfg.language_model.config else None, - checkpoint_file=cfg.language_model.lm_checkpoint, - vocab_file=self.register_artifact('tokenizer.vocab_file', cfg.tokenizer.vocab_file), - ) - self.language_model.resize_token_embeddings(len(self.tokenizer.tokenizer)) + if cfg.library == "huggingface": + self.language_model = AutoModelWithLMHead.from_pretrained(cfg.language_model.pretrained_model_name) + self.language_model.resize_token_embeddings(len(self.tokenizer.tokenizer)) + elif cfg.library == "megatron": + self.language_model = MegatronGPTModel.restore_from(cfg.language_model.lm_checkpoint, trainer=trainer) all_labels = list( self._train_dl.dataset.all_possible_labels.union( @@ -79,6 +80,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None): num_classes=len(self.label_to_ids) + 1, mode='micro', label_ids=self.label_to_ids, dist_sync_on_step=True ) self.eval_mode = cfg.eval_mode + self.cfg = cfg def training_step(self, batch, batch_idx): ( @@ -107,9 +109,9 @@ def training_step(self, batch, batch_idx): attn_masks = torch.stack(new_attn_masks) labels = self.get_binary_score_labels(input_ids) - loss, logits = self.language_model(input_ids=input_ids, attention_mask=attn_masks, labels=labels) + loss = self(input_ids, attn_masks, labels) self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True) - return {'loss': loss, 'logits': logits} + return {'loss': loss} def validation_step(self, batch, batch_idx): return self.eval_step_helper(batch=batch) @@ -136,12 +138,33 @@ def test_step(self, batch, batch_idx): # for inference only def predict_step(self, batch, batch_idx, dataloader_idx=None): - # return self.model(batch) + # return self(batch) raise NotImplementedError() - def forward(self, input_ids, token_type_ids, attention_mask, labels): - loss, logits = self.language_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels) - return loss, logits + def forward(self, input_ids, attention_mask, labels): + + if self.cfg.library == "huggingface": + output = self.language_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels) + loss = output['loss'] + elif self.cfg.library == "megatron": + position_ids = torch.arange(input_ids.size(1), dtype=torch.long, device=input_ids.device) + position_ids = position_ids.unsqueeze(0).repeat(input_ids.size(0), 1) + unmasked_unreduced_loss = self.language_model( + input_ids, position_ids, attention_mask=attention_mask > 0, labels=labels + ) + + # labels_mask = torch.tensor([0 if (i == -100 or i == self.tokenizer.tokenizer.pad_token_id) else 1 for i in labels]) + filler = torch.zeros_like(labels) + labels_mask_0 = torch.where(labels != -100, labels, filler) + labels_mask_1 = torch.abs(torch.where(labels != self.tokenizer.tokenizer.pad_token_id, labels, filler)) + # labels_mask is where labels is neither -100 nor the pad token id + labels_mask_with_id = torch.minimum(labels_mask_0, labels_mask_1) + labels_mask = labels_mask_with_id > 0 + + loss = self.language_model.loss_func(labels_mask, unmasked_unreduced_loss) + loss = average_losses_across_data_parallel_group([loss]) + + return loss def decode(self, tokens): if tokens not in self.token_to_words: @@ -172,10 +195,10 @@ def binary_score_candidates( start_yes = j if j // 2 == correct_candidate[i].item() else j + 1 - cand_loss, _ = self.language_model( - input_ids=candidate_input_ids[i, start_yes : start_yes + 1, :], - attention_mask=candidate_attn_masks[i, start_yes : start_yes + 1, :], - labels=self.get_binary_score_labels(candidate_input_ids[i, start_yes : start_yes + 1, :]), + cand_loss = self( + candidate_input_ids[i, start_yes : start_yes + 1, :], + candidate_attn_masks[i, start_yes : start_yes + 1, :], + self.get_binary_score_labels(candidate_input_ids[i, start_yes : start_yes + 1, :]), ) considered_loss = cand_loss.item() @@ -183,10 +206,10 @@ def binary_score_candidates( if minus_negative: start_no = j + 1 if j // 2 == correct_candidate[i].item() else j - negative_cand_loss, _ = self.language_model( - input_ids=candidate_input_ids[i, start_no : start_no + 1, :], - attention_mask=candidate_attn_masks[i, start_no : start_no + 1, :], - labels=self.get_binary_score_labels(candidate_input_ids[i, start_no : start_no + 1, :]), + negative_cand_loss = self( + candidate_input_ids[i, start_no : start_no + 1, :], + candidate_attn_masks[i, start_no : start_no + 1, :], + self.get_binary_score_labels(candidate_input_ids[i, start_no : start_no + 1, :]), ) considered_loss -= negative_cand_loss.item() @@ -234,19 +257,19 @@ def rank_candidates( ): break - cand_loss, _ = self.language_model( - input_ids=candidate_input_ids[i, j : j + 1, :], - attention_mask=candidate_attn_masks[i, j : j + 1, :], - labels=candidate_input_ids[i, j : j + 1, :], + cand_loss = self( + candidate_input_ids[i, j : j + 1, :], + candidate_attn_masks[i, j : j + 1, :], + candidate_input_ids[i, j : j + 1, :], ) considered_loss = cand_loss.item() if minus_prior: - utterance_free_cand_loss, _ = self.language_model( - input_ids=candidate_input_ids[i, j : j + 1, utterance_end:], - attention_mask=candidate_attn_masks[i, j : j + 1, utterance_end:], - labels=candidate_input_ids[i, j : j + 1, utterance_end:], + utterance_free_cand_loss = self( + candidate_input_ids[i, j : j + 1, utterance_end:], + candidate_attn_masks[i, j : j + 1, utterance_end:], + candidate_input_ids[i, j : j + 1, utterance_end:], ) considered_loss -= utterance_free_cand_loss.item() @@ -263,14 +286,17 @@ def rank_candidates( return generated_field, ground_truth_field def generate_candidates(self, generate_input_ids, generate_attn_masks, labels): - param_dict = { - "input_ids": generate_input_ids, - "attention_masks": generate_attn_masks, - "max_length": self._cfg.dataset.max_seq_length + 32, - "pad_token_id": self.tokenizer.tokenizer.pad_token_id, - } + if self.cfg.library == "huggingface": + param_dict = { + "input_ids": generate_input_ids, + "attention_masks": generate_attn_masks, + "max_length": self._cfg.dataset.max_seq_length + 32, + "pad_token_id": self.tokenizer.tokenizer.pad_token_id, + } - generated_tokens = self.language_model.generate(**param_dict) + generated_tokens = self.language_model.generate(**param_dict) + elif self.cfg.library == "megatron": + raise NotImplementedError() generated_field, ground_truth_field = self.process_into_structured_fields(generated_tokens, labels) return generated_field, ground_truth_field @@ -289,7 +315,7 @@ def eval_step_helper(self, batch, mode='val'): correct_candidate, ) = batch - loss, logits = self.language_model(input_ids=input_ids, attention_mask=attn_masks, labels=labels) + loss = self(input_ids, attn_masks, labels) self.log("{}_loss".format(mode), loss, on_step=True, on_epoch=True, prog_bar=True, logger=True) @@ -309,7 +335,9 @@ def eval_step_helper(self, batch, mode='val'): ) else: - raise ValueError("{} is not among supported options (ranking, generation)".format(self.eval_mode)) + raise ValueError( + "{} is not among supported options (ranking, generation, binary_score)".format(self.eval_mode) + ) generated_field_ids = torch.tensor( [self.label_to_ids[label.strip()] for label in generated_field], dtype=int From 277b088c8f58face69c1cdf69923a2a085142ebb Mon Sep 17 00:00:00 2001 From: Sandeep Subramanian Date: Tue, 15 Feb 2022 12:49:35 -0800 Subject: [PATCH 2/5] Test HF online for SGD-GEN only (#3681) * Test HF online for SGD only Signed-off-by: MaximumEntropy * Fix typo Signed-off-by: MaximumEntropy Co-authored-by: Yang Zhang --- Jenkinsfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f5fe167b28f0..1223b7e1e640 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -718,7 +718,7 @@ pipeline { parallel { stage('SGD-GEN') { steps { - sh 'cd examples/nlp/dialogue_state_tracking_generative && \ + sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue_state_tracking_generative && \ python sgd_gen.py \ model.dataset.data_dir=/home/TestData/nlp/sgd_small \ model.language_model.lm_checkpoint=/home/TestData/nlp/gpt2/pytorch_model.bin\ @@ -745,7 +745,7 @@ pipeline { } stage('SGD-GEN Backward compatible with SGDQA') { steps { - sh 'cd examples/nlp/dialogue_state_tracking_generative && \ + sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue_state_tracking_generative && \ python sgd_gen.py \ model.dataset.data_dir=/home/TestData/nlp/sgd_small \ model.dataset.dialogues_example_dir=sgd_gen_bert_outputs \ @@ -762,7 +762,7 @@ pipeline { model.language_model.pretrained_model_name=bert-base-cased \ trainer.accelerator=ddp \ exp_manager=null && \ - rm -rf sgd_gen_bert_outputs' + rm -rf sgd_gen_bert_outputs && TRANSFORMERS_OFFLINE=1' } } } From b466ebc7288912eef74d2822abbaf69f66af29ff Mon Sep 17 00:00:00 2001 From: Vahid Noroozi Date: Tue, 15 Feb 2022 14:33:51 -0800 Subject: [PATCH 3/5] Fixing the bug in the stateful rnnt decoder. (#3673) * fixed the bug in the stateful rnnt decoder. Signed-off-by: Vahid * addressed comments. Signed-off-by: Vahid * addressed comments. Signed-off-by: Vahid * FIXED. Signed-off-by: Vahid --- .../parts/submodules/rnnt_greedy_decoding.py | 20 ++++++++----------- nemo/utils/distributed.py | 1 + 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py index 2ae09db182b3..3403e59f36a3 100644 --- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py +++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py @@ -281,11 +281,11 @@ def _greedy_decode( # out_len: [seq_len] # Initialize blank state and empty label set in Hypothesis - hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[]) + hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[], last_token=None) if partial_hypotheses is not None: - if len(partial_hypotheses.y_sequence) > 0: - hypothesis.y_sequence.append(partial_hypotheses.y_sequence[-1].cpu().numpy()) + hypothesis.last_token = partial_hypotheses.last_token + if partial_hypotheses.dec_state is not None: hypothesis.dec_state = self.decoder.batch_concat_states([partial_hypotheses.dec_state]) hypothesis.dec_state = _states_to_device(hypothesis.dec_state, x.device) @@ -308,11 +308,10 @@ def _greedy_decode( while not_blank and (self.max_symbols is None or symbols_added < self.max_symbols): # In the first timestep, we initialize the network with RNNT Blank # In later timesteps, we provide previous predicted label as input. - last_label = ( - self._SOS - if (hypothesis.y_sequence == [] and hypothesis.dec_state is None) - else hypothesis.y_sequence[-1] - ) + if hypothesis.last_token is None and hypothesis.dec_state is None: + last_label = self._SOS + else: + last_label = label_collate([[hypothesis.last_token]]) # Perform prediction network and joint network steps. g, hidden_prime = self._pred_step(last_label, hypothesis.dec_state) @@ -347,6 +346,7 @@ def _greedy_decode( hypothesis.score += float(v) hypothesis.timestep.append(time_idx) hypothesis.dec_state = hidden_prime + hypothesis.last_token = k # Increment token counter. symbols_added += 1 @@ -359,10 +359,6 @@ def _greedy_decode( # Unpack the hidden states hypothesis.dec_state = self.decoder.batch_select_state(hypothesis.dec_state, 0) - # Remove the original input label if partial hypothesis was provided - if partial_hypotheses is not None: - hypothesis.y_sequence = hypothesis.y_sequence[1:] - return hypothesis diff --git a/nemo/utils/distributed.py b/nemo/utils/distributed.py index 5783fd662f0e..4712a2e44c5f 100644 --- a/nemo/utils/distributed.py +++ b/nemo/utils/distributed.py @@ -13,6 +13,7 @@ # limitations under the License. import os + import torch from nemo.utils import logging From 7b1e82c40c102ca77eaf75ed741822667d9d7ea8 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Tue, 15 Feb 2022 16:08:43 -0800 Subject: [PATCH 4/5] Update Librosa support to 0.9 (#3682) Signed-off-by: smajumdar --- nemo/collections/asr/parts/preprocessing/features.py | 7 ++++--- nemo/collections/asr/parts/preprocessing/perturb.py | 4 +++- nemo/collections/asr/parts/preprocessing/segment.py | 4 ++-- nemo/collections/tts/data/datalayers.py | 4 ++-- nemo/collections/tts/models/degli.py | 2 +- nemo/collections/tts/torch/data.py | 2 +- scripts/dataset_processing/process_vad_data.py | 4 ++-- scripts/freesound_download_resample/freesound_download.py | 2 +- scripts/freesound_download_resample/freesound_resample.py | 2 +- 9 files changed, 17 insertions(+), 14 deletions(-) diff --git a/nemo/collections/asr/parts/preprocessing/features.py b/nemo/collections/asr/parts/preprocessing/features.py index 6671dcfea9f3..df5d76e96eb5 100644 --- a/nemo/collections/asr/parts/preprocessing/features.py +++ b/nemo/collections/asr/parts/preprocessing/features.py @@ -169,8 +169,8 @@ def inverse(self, magnitude, phase): if self.window is not None: window_sum = librosa.filters.window_sumsquare( - self.window, - magnitude.size(-1), + window=self.window, + n_frames=magnitude.size(-1), hop_length=self.hop_length, win_length=self.win_length, n_fft=self.filter_length, @@ -302,7 +302,8 @@ def __init__( highfreq = highfreq or sample_rate / 2 filterbanks = torch.tensor( - librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq), dtype=torch.float + librosa.filters.mel(sr=sample_rate, n_fft=self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq), + dtype=torch.float, ).unsqueeze(0) self.register_buffer("fb", filterbanks) diff --git a/nemo/collections/asr/parts/preprocessing/perturb.py b/nemo/collections/asr/parts/preprocessing/perturb.py index b4283691eda4..b0b677a5f0d1 100644 --- a/nemo/collections/asr/parts/preprocessing/perturb.py +++ b/nemo/collections/asr/parts/preprocessing/perturb.py @@ -162,7 +162,9 @@ def perturb(self, data): return new_sr = int(self._sr * speed_rate) - data._samples = librosa.core.resample(data._samples, self._sr, new_sr, res_type=self._res_type) + data._samples = librosa.core.resample( + data._samples, orig_sr=self._sr, target_sr=new_sr, res_type=self._res_type + ) class TimeStretchPerturbation(Perturbation): diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py index 419a931b4252..76c23488cdaf 100644 --- a/nemo/collections/asr/parts/preprocessing/segment.py +++ b/nemo/collections/asr/parts/preprocessing/segment.py @@ -72,10 +72,10 @@ def __init__(self, samples, sample_rate, target_sr=None, trim=False, trim_db=60, """ samples = self._convert_samples_to_float32(samples) if target_sr is not None and target_sr != sample_rate: - samples = librosa.core.resample(samples, sample_rate, target_sr) + samples = librosa.core.resample(samples, orig_sr=sample_rate, target_sr=target_sr) sample_rate = target_sr if trim: - samples, _ = librosa.effects.trim(samples, trim_db) + samples, _ = librosa.effects.trim(samples, top_db=trim_db) self._samples = samples self._sample_rate = sample_rate if self._samples.ndim >= 2: diff --git a/nemo/collections/tts/data/datalayers.py b/nemo/collections/tts/data/datalayers.py index faffa1a79b4f..2de1c2308a2f 100644 --- a/nemo/collections/tts/data/datalayers.py +++ b/nemo/collections/tts/data/datalayers.py @@ -459,7 +459,7 @@ def setup_noise_augmented_dataset(files_list, num_snr, kwargs_stft, dest, desc): for line in list_file_pbar: audio_file = line.split('|')[0] speech = sf.read(audio_file)[0].astype(np.float32) - spec_clean = np.ascontiguousarray(librosa.stft(speech, **kwargs_stft)) + spec_clean = np.ascontiguousarray(librosa.stft(y=speech, **kwargs_stft)) mag_clean = np.ascontiguousarray(np.abs(spec_clean)[..., np.newaxis]) signal_power = np.mean(np.abs(speech) ** 2) @@ -472,7 +472,7 @@ def setup_noise_augmented_dataset(files_list, num_snr, kwargs_stft, dest, desc): snr = librosa.db_to_power(snr_db) noise_power = signal_power / snr noisy = speech + np.sqrt(noise_power) * np.random.randn(len(speech)) - spec_noisy = librosa.stft(noisy, **kwargs_stft) + spec_noisy = librosa.stft(y=noisy, **kwargs_stft) spec_noisy = np.ascontiguousarray(spec_noisy) T_x = spec_noisy.shape[1] x = spec_noisy.view(dtype=np.float32).reshape((*spec_noisy.shape, 2)) diff --git a/nemo/collections/tts/models/degli.py b/nemo/collections/tts/models/degli.py index f6227ac8f4fd..e2cb69a2d983 100755 --- a/nemo/collections/tts/models/degli.py +++ b/nemo/collections/tts/models/degli.py @@ -94,7 +94,7 @@ def reconstruct_wave(*args: ndarray, kwargs_istft, n_sample=-1) -> ndarray: if spec is None: spec = mag * np.exp(1j * phase) - wave = librosa.istft(spec, **kwargs_istft, **kwarg_len) + wave = librosa.istft(stft_matrix=spec, **kwargs_istft, **kwarg_len) return wave diff --git a/nemo/collections/tts/torch/data.py b/nemo/collections/tts/torch/data.py index 6fe6d58efb6e..d9e6a8589b92 100644 --- a/nemo/collections/tts/torch/data.py +++ b/nemo/collections/tts/torch/data.py @@ -226,7 +226,7 @@ def __init__( self.hop_len = self.hop_length or self.n_fft // 4 self.fb = torch.tensor( librosa.filters.mel( - self.sample_rate, self.n_fft, n_mels=self.n_mels, fmin=self.lowfreq, fmax=self.highfreq + sr=self.sample_rate, n_fft=self.n_fft, n_mels=self.n_mels, fmin=self.lowfreq, fmax=self.highfreq ), dtype=torch.float, ).unsqueeze(0) diff --git a/scripts/dataset_processing/process_vad_data.py b/scripts/dataset_processing/process_vad_data.py index 8f55ad073fb0..6daa8859258e 100644 --- a/scripts/dataset_processing/process_vad_data.py +++ b/scripts/dataset_processing/process_vad_data.py @@ -184,7 +184,7 @@ def write_manifest( try: x, _sr = librosa.load(file, sr=sr) - duration = librosa.get_duration(x, sr=sr) + duration = librosa.get_duration(y=x, sr=sr) except Exception: continue @@ -312,7 +312,7 @@ def generate_variety_noise(data_dir, filename, prefix): files = allfile.read().splitlines() for file in files: - y, sr = librosa.load(file, sr=sampling_rate) + y, sr = librosa.load(path=file, sr=sampling_rate) for i in range( 0, len(y) - sampling_rate, silence_stride * 100 diff --git a/scripts/freesound_download_resample/freesound_download.py b/scripts/freesound_download_resample/freesound_download.py index 076c69bfa606..37e42ea4df28 100644 --- a/scripts/freesound_download_resample/freesound_download.py +++ b/scripts/freesound_download_resample/freesound_download.py @@ -324,7 +324,7 @@ def download_song(basepath, id, name, download_url): # Delete and then re-download if os.path.exists(fp): try: - _ = librosa.load(fp) + _ = librosa.load(path=fp) except Exception: # File is currupted, delete and re-download. os.remove(fp) diff --git a/scripts/freesound_download_resample/freesound_resample.py b/scripts/freesound_download_resample/freesound_resample.py index 79a01c094614..9e48620ff7ac 100644 --- a/scripts/freesound_download_resample/freesound_resample.py +++ b/scripts/freesound_download_resample/freesound_resample.py @@ -64,7 +64,7 @@ def resample_file(resampled_dir, filepath, ext, sample_rate): try: # Check if the file is readable - librosa.load(filepath) + librosa.load(path=filepath) # if it is, force input format and try again transform.set_input_format(file_type=ext) From b5012d0859e4a1ae108f9dd4cf87de97f9dc8388 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Tue, 15 Feb 2022 17:10:59 -0800 Subject: [PATCH 5/5] Comment out numba r 22.01 release (#3685) Signed-off-by: smajumdar Co-authored-by: Eric Harper --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index b8f25ef63cb4..c424ce08d1f9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -81,9 +81,9 @@ RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]" python -c "import nemo.collections.tts as nemo_tts" && \ python -c "import nemo_text_processing.text_normalization as text_normalization" -# TODO: Try to remove once 21.07 container is the base container +# TODO: Update to newer numba 0.56.0RC1 for 22.02 container # install pinned numba version -RUN conda install -c conda-forge numba=0.54.1 +# RUN conda install -c conda-forge numba==0.54.1 # copy scripts/examples/tests into container for end user WORKDIR /workspace/nemo