diff --git a/Dockerfile b/Dockerfile
index b8f25ef63cb4..c424ce08d1f9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -81,9 +81,9 @@ RUN --mount=from=nemo-src,target=/tmp/nemo cd /tmp/nemo && pip install ".[all]"
     python -c "import nemo.collections.tts as nemo_tts" && \
     python -c "import nemo_text_processing.text_normalization as text_normalization"
 
-# TODO: Try to remove once 21.07 container is the base container
+# TODO: Update to newer numba 0.56.0RC1 for 22.02 container
 # install pinned numba version
-RUN conda install -c conda-forge numba=0.54.1
+# RUN conda install -c conda-forge numba==0.54.1
 
 # copy scripts/examples/tests into container for end user
 WORKDIR /workspace/nemo
diff --git a/Jenkinsfile b/Jenkinsfile
index a7f6cbd014d7..1223b7e1e640 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -718,7 +718,7 @@ pipeline {
       parallel {
         stage('SGD-GEN') {
           steps {
-            sh 'cd examples/nlp/dialogue_state_tracking_generative && \
+            sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue_state_tracking_generative && \
             python sgd_gen.py \
             model.dataset.data_dir=/home/TestData/nlp/sgd_small \
             model.language_model.lm_checkpoint=/home/TestData/nlp/gpt2/pytorch_model.bin\
@@ -734,8 +734,10 @@ pipeline {
             trainer.val_check_interval=0.0 \
             trainer.gpus=[0] \
             model.dataset.use_cache=false \
-            model.tokenizer.special_tokens={pad_token:"endoftext"}\
-            model.language_model.pretrained_model_name=gpt2 \
+            model.tokenizer.special_tokens={pad_token:"endoftext"} \
+            model.tokenizer.tokenizer_name=gpt2 \
+            model.tokenizer.vocab_file=/home/TestData/nlp/gpt2/vocab.json\
+            model.language_model.pretrained_model_name=/home/TestData/nlp/gpt2 \
             trainer.accelerator=ddp \
             exp_manager=null  && \
             rm -rf sgd_gen_outputs'
@@ -743,7 +745,7 @@ pipeline {
         }
         stage('SGD-GEN Backward compatible with SGDQA') {
           steps {
-            sh 'cd examples/nlp/dialogue_state_tracking_generative && \
+            sh 'TRANSFORMERS_OFFLINE=0 && cd examples/nlp/dialogue_state_tracking_generative && \
             python sgd_gen.py \
             model.dataset.data_dir=/home/TestData/nlp/sgd_small \
             model.dataset.dialogues_example_dir=sgd_gen_bert_outputs \
@@ -760,7 +762,7 @@ pipeline {
             model.language_model.pretrained_model_name=bert-base-cased \
             trainer.accelerator=ddp \
             exp_manager=null  && \
-            rm -rf sgd_gen_bert_outputs'
+            rm -rf sgd_gen_bert_outputs && TRANSFORMERS_OFFLINE=1'
           }
         }
       }
diff --git a/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml b/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml
index 1a12803012f8..51a30eb2384e 100644
--- a/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml
+++ b/examples/nlp/dialogue_state_tracking_generative/conf/dialogue_config.yaml
@@ -29,10 +29,12 @@ trainer:
   resume_from_checkpoint: null # The path to a checkpoint file to continue the training, restores the whole state including the epoch, step, LR schedulers, apex, etc.
   num_sanity_val_steps: 0 # number of steps to perform validation steps for sanity check the validation process before starting the training, setting to 0 disables it
   checkpoint_callback: False  # Provided by exp_manager
-  logger: False  # Provided by exp_manager
-
+  logger: False  # Provided by exp_manager 
+  
 model:
+  tensor_model_parallel_size: 1
   nemo_path: null # filename to save the model and associated artifacts to .nemo file
+  library: huggingface # huggingface or megatron
   tokenizer:
       tokenizer_name: ${model.language_model.pretrained_model_name} # or sentencepiece
       vocab_file: null # path to vocab file
diff --git a/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py b/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py
index dd79985d289c..2b33e7a026f8 100644
--- a/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py
+++ b/examples/nlp/dialogue_state_tracking_generative/sgd_gen.py
@@ -106,8 +106,11 @@
 
 from nemo.collections.nlp.models.dialogue_state_tracking_generative.dialogue_gpt_model import DialogueGPTModel
 from nemo.collections.nlp.models.dialogue_state_tracking_sgdqa.sgdqa_model import SGDQAModel
+from nemo.collections.nlp.modules.common.megatron.megatron_utils import compute_model_parallel_rank
+from nemo.collections.nlp.parts.nlp_overrides import NLPDDPPlugin
 from nemo.core.config import hydra_runner
 from nemo.utils import logging
+from nemo.utils.app_state import AppState
 from nemo.utils.exp_manager import exp_manager
 
 
@@ -115,9 +118,17 @@
 def main(cfg: DictConfig) -> None:
     pl.seed_everything(42)
     logging.info(f'Config: {OmegaConf.to_yaml(cfg)}')
-    trainer = pl.Trainer(**cfg.trainer)
+
+    plugin = NLPDDPPlugin()
+    trainer = pl.Trainer(**cfg.trainer, plugins=plugin)
+
     exp_manager(trainer, cfg.get("exp_manager", None))
 
+    app_state = AppState()
+    if cfg.model.tensor_model_parallel_size > 1:
+        app_state.model_parallel_size = cfg.model.tensor_model_parallel_size
+        app_state.model_parallel_rank = compute_model_parallel_rank(trainer.local_rank, app_state.model_parallel_size)
+
     if 'bert' in cfg.model.language_model.pretrained_model_name:
         model_class = SGDQAModel
     elif 'gpt' in cfg.model.language_model.pretrained_model_name.lower():
@@ -155,7 +166,7 @@ def main(cfg: DictConfig) -> None:
 
     if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.ds_item is not None:
         gpu = 1 if cfg.trainer.gpus != 0 else 0
-        trainer = pl.Trainer(gpus=gpu)
+        trainer = pl.Trainer(gpus=gpu, plugins=plugin, precision=16)
         model.setup_multiple_test_data(test_data_config=cfg.model.test_ds)
         if model.prepare_test(trainer):
             trainer.test(model)
diff --git a/nemo/collections/asr/parts/preprocessing/features.py b/nemo/collections/asr/parts/preprocessing/features.py
index 6671dcfea9f3..df5d76e96eb5 100644
--- a/nemo/collections/asr/parts/preprocessing/features.py
+++ b/nemo/collections/asr/parts/preprocessing/features.py
@@ -169,8 +169,8 @@ def inverse(self, magnitude, phase):
 
         if self.window is not None:
             window_sum = librosa.filters.window_sumsquare(
-                self.window,
-                magnitude.size(-1),
+                window=self.window,
+                n_frames=magnitude.size(-1),
                 hop_length=self.hop_length,
                 win_length=self.win_length,
                 n_fft=self.filter_length,
@@ -302,7 +302,8 @@ def __init__(
         highfreq = highfreq or sample_rate / 2
 
         filterbanks = torch.tensor(
-            librosa.filters.mel(sample_rate, self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq), dtype=torch.float
+            librosa.filters.mel(sr=sample_rate, n_fft=self.n_fft, n_mels=nfilt, fmin=lowfreq, fmax=highfreq),
+            dtype=torch.float,
         ).unsqueeze(0)
         self.register_buffer("fb", filterbanks)
 
diff --git a/nemo/collections/asr/parts/preprocessing/perturb.py b/nemo/collections/asr/parts/preprocessing/perturb.py
index b4283691eda4..b0b677a5f0d1 100644
--- a/nemo/collections/asr/parts/preprocessing/perturb.py
+++ b/nemo/collections/asr/parts/preprocessing/perturb.py
@@ -162,7 +162,9 @@ def perturb(self, data):
             return
 
         new_sr = int(self._sr * speed_rate)
-        data._samples = librosa.core.resample(data._samples, self._sr, new_sr, res_type=self._res_type)
+        data._samples = librosa.core.resample(
+            data._samples, orig_sr=self._sr, target_sr=new_sr, res_type=self._res_type
+        )
 
 
 class TimeStretchPerturbation(Perturbation):
diff --git a/nemo/collections/asr/parts/preprocessing/segment.py b/nemo/collections/asr/parts/preprocessing/segment.py
index 419a931b4252..76c23488cdaf 100644
--- a/nemo/collections/asr/parts/preprocessing/segment.py
+++ b/nemo/collections/asr/parts/preprocessing/segment.py
@@ -72,10 +72,10 @@ def __init__(self, samples, sample_rate, target_sr=None, trim=False, trim_db=60,
         """
         samples = self._convert_samples_to_float32(samples)
         if target_sr is not None and target_sr != sample_rate:
-            samples = librosa.core.resample(samples, sample_rate, target_sr)
+            samples = librosa.core.resample(samples, orig_sr=sample_rate, target_sr=target_sr)
             sample_rate = target_sr
         if trim:
-            samples, _ = librosa.effects.trim(samples, trim_db)
+            samples, _ = librosa.effects.trim(samples, top_db=trim_db)
         self._samples = samples
         self._sample_rate = sample_rate
         if self._samples.ndim >= 2:
diff --git a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
index 2ae09db182b3..3403e59f36a3 100644
--- a/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
+++ b/nemo/collections/asr/parts/submodules/rnnt_greedy_decoding.py
@@ -281,11 +281,11 @@ def _greedy_decode(
         # out_len: [seq_len]
 
         # Initialize blank state and empty label set in Hypothesis
-        hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[])
+        hypothesis = rnnt_utils.Hypothesis(score=0.0, y_sequence=[], dec_state=None, timestep=[], last_token=None)
 
         if partial_hypotheses is not None:
-            if len(partial_hypotheses.y_sequence) > 0:
-                hypothesis.y_sequence.append(partial_hypotheses.y_sequence[-1].cpu().numpy())
+            hypothesis.last_token = partial_hypotheses.last_token
+            if partial_hypotheses.dec_state is not None:
                 hypothesis.dec_state = self.decoder.batch_concat_states([partial_hypotheses.dec_state])
                 hypothesis.dec_state = _states_to_device(hypothesis.dec_state, x.device)
 
@@ -308,11 +308,10 @@ def _greedy_decode(
             while not_blank and (self.max_symbols is None or symbols_added < self.max_symbols):
                 # In the first timestep, we initialize the network with RNNT Blank
                 # In later timesteps, we provide previous predicted label as input.
-                last_label = (
-                    self._SOS
-                    if (hypothesis.y_sequence == [] and hypothesis.dec_state is None)
-                    else hypothesis.y_sequence[-1]
-                )
+                if hypothesis.last_token is None and hypothesis.dec_state is None:
+                    last_label = self._SOS
+                else:
+                    last_label = label_collate([[hypothesis.last_token]])
 
                 # Perform prediction network and joint network steps.
                 g, hidden_prime = self._pred_step(last_label, hypothesis.dec_state)
@@ -347,6 +346,7 @@ def _greedy_decode(
                     hypothesis.score += float(v)
                     hypothesis.timestep.append(time_idx)
                     hypothesis.dec_state = hidden_prime
+                    hypothesis.last_token = k
 
                 # Increment token counter.
                 symbols_added += 1
@@ -359,10 +359,6 @@ def _greedy_decode(
         # Unpack the hidden states
         hypothesis.dec_state = self.decoder.batch_select_state(hypothesis.dec_state, 0)
 
-        # Remove the original input label if partial hypothesis was provided
-        if partial_hypotheses is not None:
-            hypothesis.y_sequence = hypothesis.y_sequence[1:]
-
         return hypothesis
 
 
diff --git a/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py b/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py
index a489e9573786..4e6b0fd3ca55 100644
--- a/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py
+++ b/nemo/collections/nlp/models/dialogue_state_tracking_generative/dialogue_gpt_model.py
@@ -27,6 +27,7 @@
 from omegaconf import DictConfig, OmegaConf
 from pytorch_lightning import Trainer
 from torch.utils.data import DataLoader
+from transformers import AutoModelWithLMHead
 
 from nemo.collections.nlp.data.dialogue_state_tracking_generative import (
     DialogueGPTDataset,
@@ -37,8 +38,9 @@
     DialogueAssistantDataProcessor,
 )
 from nemo.collections.nlp.metrics.classification_report import ClassificationReport
+from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel
 from nemo.collections.nlp.models.nlp_model import NLPModel
-from nemo.collections.nlp.modules.common.lm_utils import get_lm_model
+from nemo.collections.nlp.modules.common.megatron.utils import average_losses_across_data_parallel_group
 from nemo.core.classes.common import PretrainedModelInfo
 from nemo.utils import logging
 from nemo.utils.get_rank import is_global_rank_zero
@@ -49,18 +51,17 @@
 
 
 class DialogueGPTModel(NLPModel):
-    def __init__(self, cfg: DictConfig, trainer: Trainer = None):
+    def __init__(
+        self, cfg: DictConfig, trainer: Trainer = None,
+    ):
         self.data_prepared = False
         self.setup_tokenizer(cfg.tokenizer)
         super().__init__(cfg=cfg, trainer=trainer)
-        self.language_model = get_lm_model(
-            pretrained_model_name=cfg.language_model.pretrained_model_name,
-            config_file=self.register_artifact('language_model.config_file', cfg.language_model.config_file),
-            config_dict=OmegaConf.to_container(cfg.language_model.config) if cfg.language_model.config else None,
-            checkpoint_file=cfg.language_model.lm_checkpoint,
-            vocab_file=self.register_artifact('tokenizer.vocab_file', cfg.tokenizer.vocab_file),
-        )
-        self.language_model.resize_token_embeddings(len(self.tokenizer.tokenizer))
+        if cfg.library == "huggingface":
+            self.language_model = AutoModelWithLMHead.from_pretrained(cfg.language_model.pretrained_model_name)
+            self.language_model.resize_token_embeddings(len(self.tokenizer.tokenizer))
+        elif cfg.library == "megatron":
+            self.language_model = MegatronGPTModel.restore_from(cfg.language_model.lm_checkpoint, trainer=trainer)
 
         all_labels = list(
             self._train_dl.dataset.all_possible_labels.union(
@@ -79,6 +80,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer = None):
             num_classes=len(self.label_to_ids) + 1, mode='micro', label_ids=self.label_to_ids, dist_sync_on_step=True
         )
         self.eval_mode = cfg.eval_mode
+        self.cfg = cfg
 
     def training_step(self, batch, batch_idx):
         (
@@ -107,9 +109,9 @@ def training_step(self, batch, batch_idx):
             attn_masks = torch.stack(new_attn_masks)
             labels = self.get_binary_score_labels(input_ids)
 
-        loss, logits = self.language_model(input_ids=input_ids, attention_mask=attn_masks, labels=labels)
+        loss = self(input_ids, attn_masks, labels)
         self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
-        return {'loss': loss, 'logits': logits}
+        return {'loss': loss}
 
     def validation_step(self, batch, batch_idx):
         return self.eval_step_helper(batch=batch)
@@ -136,12 +138,33 @@ def test_step(self, batch, batch_idx):
 
     # for inference only
     def predict_step(self, batch, batch_idx, dataloader_idx=None):
-        # return self.model(batch)
+        # return self(batch)
         raise NotImplementedError()
 
-    def forward(self, input_ids, token_type_ids, attention_mask, labels):
-        loss, logits = self.language_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
-        return loss, logits
+    def forward(self, input_ids, attention_mask, labels):
+
+        if self.cfg.library == "huggingface":
+            output = self.language_model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
+            loss = output['loss']
+        elif self.cfg.library == "megatron":
+            position_ids = torch.arange(input_ids.size(1), dtype=torch.long, device=input_ids.device)
+            position_ids = position_ids.unsqueeze(0).repeat(input_ids.size(0), 1)
+            unmasked_unreduced_loss = self.language_model(
+                input_ids, position_ids, attention_mask=attention_mask > 0, labels=labels
+            )
+
+            # labels_mask = torch.tensor([0 if (i == -100 or i == self.tokenizer.tokenizer.pad_token_id) else 1 for i in labels])
+            filler = torch.zeros_like(labels)
+            labels_mask_0 = torch.where(labels != -100, labels, filler)
+            labels_mask_1 = torch.abs(torch.where(labels != self.tokenizer.tokenizer.pad_token_id, labels, filler))
+            # labels_mask is where labels is neither -100 nor the pad token id
+            labels_mask_with_id = torch.minimum(labels_mask_0, labels_mask_1)
+            labels_mask = labels_mask_with_id > 0
+
+            loss = self.language_model.loss_func(labels_mask, unmasked_unreduced_loss)
+            loss = average_losses_across_data_parallel_group([loss])
+
+        return loss
 
     def decode(self, tokens):
         if tokens not in self.token_to_words:
@@ -172,10 +195,10 @@ def binary_score_candidates(
 
                 start_yes = j if j // 2 == correct_candidate[i].item() else j + 1
 
-                cand_loss, _ = self.language_model(
-                    input_ids=candidate_input_ids[i, start_yes : start_yes + 1, :],
-                    attention_mask=candidate_attn_masks[i, start_yes : start_yes + 1, :],
-                    labels=self.get_binary_score_labels(candidate_input_ids[i, start_yes : start_yes + 1, :]),
+                cand_loss = self(
+                    candidate_input_ids[i, start_yes : start_yes + 1, :],
+                    candidate_attn_masks[i, start_yes : start_yes + 1, :],
+                    self.get_binary_score_labels(candidate_input_ids[i, start_yes : start_yes + 1, :]),
                 )
 
                 considered_loss = cand_loss.item()
@@ -183,10 +206,10 @@ def binary_score_candidates(
                 if minus_negative:
                     start_no = j + 1 if j // 2 == correct_candidate[i].item() else j
 
-                    negative_cand_loss, _ = self.language_model(
-                        input_ids=candidate_input_ids[i, start_no : start_no + 1, :],
-                        attention_mask=candidate_attn_masks[i, start_no : start_no + 1, :],
-                        labels=self.get_binary_score_labels(candidate_input_ids[i, start_no : start_no + 1, :]),
+                    negative_cand_loss = self(
+                        candidate_input_ids[i, start_no : start_no + 1, :],
+                        candidate_attn_masks[i, start_no : start_no + 1, :],
+                        self.get_binary_score_labels(candidate_input_ids[i, start_no : start_no + 1, :]),
                     )
                     considered_loss -= negative_cand_loss.item()
 
@@ -234,19 +257,19 @@ def rank_candidates(
                 ):
                     break
 
-                cand_loss, _ = self.language_model(
-                    input_ids=candidate_input_ids[i, j : j + 1, :],
-                    attention_mask=candidate_attn_masks[i, j : j + 1, :],
-                    labels=candidate_input_ids[i, j : j + 1, :],
+                cand_loss = self(
+                    candidate_input_ids[i, j : j + 1, :],
+                    candidate_attn_masks[i, j : j + 1, :],
+                    candidate_input_ids[i, j : j + 1, :],
                 )
 
                 considered_loss = cand_loss.item()
 
                 if minus_prior:
-                    utterance_free_cand_loss, _ = self.language_model(
-                        input_ids=candidate_input_ids[i, j : j + 1, utterance_end:],
-                        attention_mask=candidate_attn_masks[i, j : j + 1, utterance_end:],
-                        labels=candidate_input_ids[i, j : j + 1, utterance_end:],
+                    utterance_free_cand_loss = self(
+                        candidate_input_ids[i, j : j + 1, utterance_end:],
+                        candidate_attn_masks[i, j : j + 1, utterance_end:],
+                        candidate_input_ids[i, j : j + 1, utterance_end:],
                     )
                     considered_loss -= utterance_free_cand_loss.item()
 
@@ -263,14 +286,17 @@ def rank_candidates(
         return generated_field, ground_truth_field
 
     def generate_candidates(self, generate_input_ids, generate_attn_masks, labels):
-        param_dict = {
-            "input_ids": generate_input_ids,
-            "attention_masks": generate_attn_masks,
-            "max_length": self._cfg.dataset.max_seq_length + 32,
-            "pad_token_id": self.tokenizer.tokenizer.pad_token_id,
-        }
+        if self.cfg.library == "huggingface":
+            param_dict = {
+                "input_ids": generate_input_ids,
+                "attention_masks": generate_attn_masks,
+                "max_length": self._cfg.dataset.max_seq_length + 32,
+                "pad_token_id": self.tokenizer.tokenizer.pad_token_id,
+            }
 
-        generated_tokens = self.language_model.generate(**param_dict)
+            generated_tokens = self.language_model.generate(**param_dict)
+        elif self.cfg.library == "megatron":
+            raise NotImplementedError()
         generated_field, ground_truth_field = self.process_into_structured_fields(generated_tokens, labels)
 
         return generated_field, ground_truth_field
@@ -289,7 +315,7 @@ def eval_step_helper(self, batch, mode='val'):
             correct_candidate,
         ) = batch
 
-        loss, logits = self.language_model(input_ids=input_ids, attention_mask=attn_masks, labels=labels)
+        loss = self(input_ids, attn_masks, labels)
 
         self.log("{}_loss".format(mode), loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
 
@@ -309,7 +335,9 @@ def eval_step_helper(self, batch, mode='val'):
             )
 
         else:
-            raise ValueError("{} is not among supported options (ranking, generation)".format(self.eval_mode))
+            raise ValueError(
+                "{} is not among supported options (ranking, generation, binary_score)".format(self.eval_mode)
+            )
 
         generated_field_ids = torch.tensor(
             [self.label_to_ids[label.strip()] for label in generated_field], dtype=int
diff --git a/nemo/collections/tts/data/datalayers.py b/nemo/collections/tts/data/datalayers.py
index faffa1a79b4f..2de1c2308a2f 100644
--- a/nemo/collections/tts/data/datalayers.py
+++ b/nemo/collections/tts/data/datalayers.py
@@ -459,7 +459,7 @@ def setup_noise_augmented_dataset(files_list, num_snr, kwargs_stft, dest, desc):
         for line in list_file_pbar:
             audio_file = line.split('|')[0]
             speech = sf.read(audio_file)[0].astype(np.float32)
-            spec_clean = np.ascontiguousarray(librosa.stft(speech, **kwargs_stft))
+            spec_clean = np.ascontiguousarray(librosa.stft(y=speech, **kwargs_stft))
             mag_clean = np.ascontiguousarray(np.abs(spec_clean)[..., np.newaxis])
             signal_power = np.mean(np.abs(speech) ** 2)
 
@@ -472,7 +472,7 @@ def setup_noise_augmented_dataset(files_list, num_snr, kwargs_stft, dest, desc):
                 snr = librosa.db_to_power(snr_db)
                 noise_power = signal_power / snr
                 noisy = speech + np.sqrt(noise_power) * np.random.randn(len(speech))
-                spec_noisy = librosa.stft(noisy, **kwargs_stft)
+                spec_noisy = librosa.stft(y=noisy, **kwargs_stft)
                 spec_noisy = np.ascontiguousarray(spec_noisy)
                 T_x = spec_noisy.shape[1]
                 x = spec_noisy.view(dtype=np.float32).reshape((*spec_noisy.shape, 2))
diff --git a/nemo/collections/tts/models/degli.py b/nemo/collections/tts/models/degli.py
index f6227ac8f4fd..e2cb69a2d983 100755
--- a/nemo/collections/tts/models/degli.py
+++ b/nemo/collections/tts/models/degli.py
@@ -94,7 +94,7 @@ def reconstruct_wave(*args: ndarray, kwargs_istft, n_sample=-1) -> ndarray:
     if spec is None:
         spec = mag * np.exp(1j * phase)
 
-    wave = librosa.istft(spec, **kwargs_istft, **kwarg_len)
+    wave = librosa.istft(stft_matrix=spec, **kwargs_istft, **kwarg_len)
     return wave
 
 
diff --git a/nemo/collections/tts/torch/data.py b/nemo/collections/tts/torch/data.py
index 6fe6d58efb6e..d9e6a8589b92 100644
--- a/nemo/collections/tts/torch/data.py
+++ b/nemo/collections/tts/torch/data.py
@@ -226,7 +226,7 @@ def __init__(
         self.hop_len = self.hop_length or self.n_fft // 4
         self.fb = torch.tensor(
             librosa.filters.mel(
-                self.sample_rate, self.n_fft, n_mels=self.n_mels, fmin=self.lowfreq, fmax=self.highfreq
+                sr=self.sample_rate, n_fft=self.n_fft, n_mels=self.n_mels, fmin=self.lowfreq, fmax=self.highfreq
             ),
             dtype=torch.float,
         ).unsqueeze(0)
diff --git a/nemo/utils/distributed.py b/nemo/utils/distributed.py
index 5783fd662f0e..4712a2e44c5f 100644
--- a/nemo/utils/distributed.py
+++ b/nemo/utils/distributed.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import os
+
 import torch
 
 from nemo.utils import logging
diff --git a/scripts/dataset_processing/process_vad_data.py b/scripts/dataset_processing/process_vad_data.py
index 8f55ad073fb0..6daa8859258e 100644
--- a/scripts/dataset_processing/process_vad_data.py
+++ b/scripts/dataset_processing/process_vad_data.py
@@ -184,7 +184,7 @@ def write_manifest(
 
             try:
                 x, _sr = librosa.load(file, sr=sr)
-                duration = librosa.get_duration(x, sr=sr)
+                duration = librosa.get_duration(y=x, sr=sr)
 
             except Exception:
                 continue
@@ -312,7 +312,7 @@ def generate_variety_noise(data_dir, filename, prefix):
         files = allfile.read().splitlines()
 
     for file in files:
-        y, sr = librosa.load(file, sr=sampling_rate)
+        y, sr = librosa.load(path=file, sr=sampling_rate)
 
         for i in range(
             0, len(y) - sampling_rate, silence_stride * 100
diff --git a/scripts/freesound_download_resample/freesound_download.py b/scripts/freesound_download_resample/freesound_download.py
index 076c69bfa606..37e42ea4df28 100644
--- a/scripts/freesound_download_resample/freesound_download.py
+++ b/scripts/freesound_download_resample/freesound_download.py
@@ -324,7 +324,7 @@ def download_song(basepath, id, name, download_url):
     # Delete and then re-download
     if os.path.exists(fp):
         try:
-            _ = librosa.load(fp)
+            _ = librosa.load(path=fp)
         except Exception:
             # File is currupted, delete and re-download.
             os.remove(fp)
diff --git a/scripts/freesound_download_resample/freesound_resample.py b/scripts/freesound_download_resample/freesound_resample.py
index 79a01c094614..9e48620ff7ac 100644
--- a/scripts/freesound_download_resample/freesound_resample.py
+++ b/scripts/freesound_download_resample/freesound_resample.py
@@ -64,7 +64,7 @@ def resample_file(resampled_dir, filepath, ext, sample_rate):
 
         try:
             # Check if the file is readable
-            librosa.load(filepath)
+            librosa.load(path=filepath)
 
             # if it is, force input format and try again
             transform.set_input_format(file_type=ext)