diff --git a/examples/tts/conf/fastpitch/fastpitch_22050.yaml b/examples/tts/conf/fastpitch/fastpitch_22050.yaml
new file mode 100644
index 000000000000..846d09edcfee
--- /dev/null
+++ b/examples/tts/conf/fastpitch/fastpitch_22050.yaml
@@ -0,0 +1,286 @@
+# This config contains the default values for training an English 22.05kHz FastPitch model.
+# If you want to train a model on other dataset, you can change config values according to your dataset.
+# Most dataset-specific arguments are in the head of the config file, see below.
+
+name: FastPitch
+
+max_epochs: ???
+batch_size: 32
+weighted_sampling_steps_per_epoch: null
+
+n_speakers: ???
+speaker_path: null
+feature_stats_path: null
+
+train_ds_meta: ???
+val_ds_meta: ???
+log_ds_meta: ???
+
+phoneme_dict_path: ???
+heteronyms_path: ???
+
+log_dir: ???
+vocoder_type: ???
+vocoder_name: null
+vocoder_checkpoint_path: null
+
+# The below feature config should match the feature.yaml config used during preprocessing.
+sample_rate: 22050
+win_length: 1024
+hop_length: 256
+
+mel_feature:
+  _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
+  mel_dim: 80
+  lowfreq: 0
+  highfreq: null
+
+pitch_feature:
+  _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
+  pitch_fmin: 60
+  pitch_fmax: 640
+
+energy_feature:
+  _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer
+  spec_featurizer: ${mel_feature}
+
+featurizers:
+  pitch: ${pitch_feature}
+  energy: ${energy_feature}
+
+
+model:
+  learn_alignment: true
+  bin_loss_warmup_epochs: 100
+
+  n_speakers: ${n_speakers}
+  n_mel_channels: ${mel_feature.mel_dim}
+  min_token_duration: 1
+  max_token_duration: 75
+  symbols_embedding_dim: 384
+  pitch_embedding_kernel_size: 3
+  energy_embedding_kernel_size: 3
+  speaker_emb_condition_prosody: true
+  speaker_emb_condition_aligner: true
+  use_log_energy: false
+  dur_loss_scale: 0.1
+  pitch_loss_scale: 0.1
+  energy_loss_scale: 0.1
+  aligner_loss_scale: 0.1
+
+  preprocessor:
+    _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
+    features: ${mel_feature.mel_dim}
+    lowfreq: ${mel_feature.lowfreq}
+    highfreq: ${mel_feature.highfreq}
+    n_fft: ${win_length}
+    n_window_size: ${win_length}
+    window_size: false
+    n_window_stride: ${hop_length}
+    window_stride: false
+    pad_to: 1
+    pad_value: 0
+    sample_rate: ${sample_rate}
+    window: hann
+    normalize: null
+    preemph: null
+    dither: 0.0
+    frame_splicing: 1
+    log: true
+    log_zero_guard_type: add
+    log_zero_guard_value: 1.0
+    mag_power: 1.0
+    mel_norm: null
+
+  text_tokenizer:
+    _target_: nemo.collections.common.tokenizers.text_to_speech.tts_tokenizers.IPATokenizer
+    punct: true
+    apostrophe: true
+    pad_with_space: true
+    g2p:
+      _target_: nemo.collections.tts.g2p.models.i18n_ipa.IpaG2p
+      phoneme_dict: ${phoneme_dict_path}
+      heteronyms: ${heteronyms_path}
+      phoneme_probability: 0.8
+      ignore_ambiguous_words: false
+      use_chars: true
+      use_stresses: true
+
+  pitch_processor:
+    _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization
+    field: pitch
+    stats_path: ${feature_stats_path}
+
+  energy_processor:
+    _target_: nemo.collections.tts.parts.preprocessing.feature_processors.MeanVarianceSpeakerNormalization
+    field: energy
+    stats_path: ${feature_stats_path}
+
+  train_ds:
+    dataset:
+      _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset
+      dataset_meta: ${train_ds_meta}
+      weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch}
+      sample_rate: ${sample_rate}
+      speaker_path: ${speaker_path}
+      align_prior_hop_length: ${hop_length}
+      featurizers: ${featurizers}
+      feature_processors:
+        pitch: ${model.pitch_processor}
+        energy: ${model.energy_processor}
+      min_duration: 0.1
+      max_duration: 10.0
+
+    dataloader_params:
+      batch_size: ${batch_size}
+      num_workers: 4
+
+  validation_ds:
+    dataset:
+      _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset
+      dataset_meta: ${val_ds_meta}
+      sample_rate: ${sample_rate}
+      speaker_path: ${speaker_path}
+      align_prior_hop_length: ${hop_length}
+      featurizers: ${featurizers}
+      feature_processors:
+        pitch: ${model.pitch_processor}
+        energy: ${model.energy_processor}
+
+    dataloader_params:
+      batch_size: ${batch_size}
+      num_workers: 2
+
+  log_config:
+    log_dir: ${log_dir}
+    log_epochs: [10, 50]
+    epoch_frequency: 100
+    log_tensorboard: false
+    log_wandb: false
+
+    generators:
+      - _target_: nemo.collections.tts.parts.utils.callbacks.FastPitchArtifactGenerator
+        log_spectrogram: true
+        log_alignment: true
+        audio_params:
+          _target_: nemo.collections.tts.parts.utils.callbacks.LogAudioParams
+          log_audio_gta: true
+          vocoder_type: ${vocoder_type}
+          vocoder_name: ${vocoder_name}
+          vocoder_checkpoint_path: ${vocoder_checkpoint_path}
+
+    dataset:
+      _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset
+      text_tokenizer: ${model.text_tokenizer}
+      sample_rate: ${sample_rate}
+      speaker_path: ${speaker_path}
+      align_prior_hop_length: ${hop_length}
+      featurizers: ${featurizers}
+
+      feature_processors:
+        pitch: ${model.pitch_processor}
+        energy: ${model.energy_processor}
+
+      dataset_meta: ${log_ds_meta}
+
+    dataloader_params:
+      batch_size: 8
+      num_workers: 2
+
+  input_fft:
+    _target_: nemo.collections.tts.modules.transformer.FFTransformerEncoder
+    n_layer: 6
+    n_head: 2
+    d_model: ${model.symbols_embedding_dim}
+    d_head: 64
+    d_inner: 1536
+    kernel_size: 3
+    dropout: 0.1
+    dropatt: 0.1
+    dropemb: 0.0
+    d_embed: ${model.symbols_embedding_dim}
+
+  output_fft:
+    _target_: nemo.collections.tts.modules.transformer.FFTransformerDecoder
+    n_layer: 6
+    n_head: 1
+    d_model: ${model.symbols_embedding_dim}
+    d_head: 64
+    d_inner: 1536
+    kernel_size: 3
+    dropout: 0.1
+    dropatt: 0.1
+    dropemb: 0.0
+
+  alignment_module:
+    _target_: nemo.collections.tts.modules.aligner.AlignmentEncoder
+    n_text_channels: ${model.symbols_embedding_dim}
+    dist_type: cosine
+    temperature: 15.0
+
+  duration_predictor:
+    _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor
+    input_size: ${model.symbols_embedding_dim}
+    kernel_size: 3
+    filter_size: 256
+    dropout: 0.1
+    n_layers: 2
+
+  pitch_predictor:
+    _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor
+    input_size: ${model.symbols_embedding_dim}
+    kernel_size: 3
+    filter_size: 256
+    dropout: 0.1
+    n_layers: 2
+
+  energy_predictor:
+    _target_: nemo.collections.tts.modules.fastpitch.TemporalPredictor
+    input_size: ${model.symbols_embedding_dim}
+    kernel_size: 3
+    filter_size: 256
+    dropout: 0.1
+    n_layers: 2
+
+  optim:
+    name: adamw
+    lr: 1e-3
+    betas: [0.9, 0.999]
+    weight_decay: 1e-6
+
+    sched:
+      name: NoamAnnealing
+      warmup_steps: 1000
+      last_epoch: -1
+      d_model: 1  # Disable scaling based on model dim
+
+trainer:
+  num_nodes: 1
+  devices: 1
+  accelerator: gpu
+  strategy: ddp
+  precision: 16
+  max_epochs: ${max_epochs}
+  accumulate_grad_batches: 1
+  gradient_clip_val: 10.0
+  enable_checkpointing: false # Provided by exp_manager
+  logger: false # Provided by exp_manager
+  log_every_n_steps: 100
+  check_val_every_n_epoch: 10
+  benchmark: false
+
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  checkpoint_callback_params:
+    monitor: val_loss
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
diff --git a/examples/tts/conf/fastpitch/fastpitch.yaml b/examples/tts/conf/fastpitch/fastpitch_44100.yaml
similarity index 81%
rename from examples/tts/conf/fastpitch/fastpitch.yaml
rename to examples/tts/conf/fastpitch/fastpitch_44100.yaml
index 39d5f395afbc..da9e9a29b1e7 100644
--- a/examples/tts/conf/fastpitch/fastpitch.yaml
+++ b/examples/tts/conf/fastpitch/fastpitch_44100.yaml
@@ -1,12 +1,9 @@
-# This config contains the default values for training an English FastPitch model.
+# This config contains the default values for training an English 44.1kHz FastPitch model.
 # If you want to train a model on other dataset, you can change config values according to your dataset.
 # Most dataset-specific arguments are in the head of the config file, see below.
 
 name: FastPitch
 
-defaults:
-  - feature: ???
-
 max_epochs: ???
 batch_size: 32
 weighted_sampling_steps_per_epoch: null
@@ -27,12 +24,43 @@ vocoder_type: ???
 vocoder_name: null
 vocoder_checkpoint_path: null
 
+# The below feature config should match the feature.yaml config used during preprocessing.
+sample_rate: 44100
+win_length: 2048
+hop_length: 512
+
+mel_feature:
+  _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
+  mel_dim: 80
+  lowfreq: 0
+  highfreq: null
+
+pitch_feature:
+  _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
+  pitch_fmin: 60
+  pitch_fmax: 640
+
+energy_feature:
+  _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer
+  spec_featurizer: ${mel_feature}
+
+featurizers:
+  pitch: ${pitch_feature}
+  energy: ${energy_feature}
+
+
 model:
   learn_alignment: true
   bin_loss_warmup_epochs: 100
 
   n_speakers: ${n_speakers}
-  n_mel_channels: ${feature.mel_feature.mel_dim}
+  n_mel_channels: ${mel_feature.mel_dim}
   min_token_duration: 1
   max_token_duration: 75
   symbols_embedding_dim: 384
@@ -48,17 +76,17 @@ model:
 
   preprocessor:
     _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
-    features: ${feature.mel_feature.mel_dim}
-    lowfreq: ${feature.mel_feature.lowfreq}
-    highfreq: ${feature.mel_feature.highfreq}
-    n_fft: ${feature.win_length}
-    n_window_size: ${feature.win_length}
+    features: ${mel_feature.mel_dim}
+    lowfreq: ${mel_feature.lowfreq}
+    highfreq: ${mel_feature.highfreq}
+    n_fft: ${win_length}
+    n_window_size: ${win_length}
     window_size: false
-    n_window_stride: ${feature.hop_length}
+    n_window_stride: ${hop_length}
     window_stride: false
     pad_to: 1
     pad_value: 0
-    sample_rate: ${feature.sample_rate}
+    sample_rate: ${sample_rate}
     window: hann
     normalize: null
     preemph: null
@@ -99,10 +127,10 @@ model:
       _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset
       dataset_meta: ${train_ds_meta}
       weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch}
-      sample_rate: ${feature.sample_rate}
+      sample_rate: ${sample_rate}
       speaker_path: ${speaker_path}
-      align_prior_hop_length: ${feature.hop_length}
-      featurizers: ${feature.featurizers}
+      align_prior_hop_length: ${hop_length}
+      featurizers: ${featurizers}
       feature_processors:
         pitch: ${model.pitch_processor}
         energy: ${model.energy_processor}
@@ -117,10 +145,10 @@ model:
     dataset:
       _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset
       dataset_meta: ${val_ds_meta}
-      sample_rate: ${feature.sample_rate}
+      sample_rate: ${sample_rate}
       speaker_path: ${speaker_path}
-      align_prior_hop_length: ${feature.hop_length}
-      featurizers: ${feature.featurizers}
+      align_prior_hop_length: ${hop_length}
+      featurizers: ${featurizers}
       feature_processors:
         pitch: ${model.pitch_processor}
         energy: ${model.energy_processor}
@@ -150,10 +178,10 @@ model:
     dataset:
       _target_: nemo.collections.tts.data.text_to_speech_dataset.TextToSpeechDataset
       text_tokenizer: ${model.text_tokenizer}
-      sample_rate: ${feature.sample_rate}
+      sample_rate: ${sample_rate}
       speaker_path: ${speaker_path}
-      align_prior_hop_length: ${feature.hop_length}
-      featurizers: ${feature.featurizers}
+      align_prior_hop_length: ${hop_length}
+      featurizers: ${featurizers}
 
       feature_processors:
         pitch: ${model.pitch_processor}
diff --git a/examples/tts/conf/feature/feature_22050.yaml b/examples/tts/conf/feature/feature_22050.yaml
index 1b159bc66ddf..8071eb7933bb 100644
--- a/examples/tts/conf/feature/feature_22050.yaml
+++ b/examples/tts/conf/feature/feature_22050.yaml
@@ -4,25 +4,25 @@ hop_length: 256
 
 mel_feature:
   _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer
-  sample_rate: ${..sample_rate}
-  win_length: ${..win_length}
-  hop_length: ${..hop_length}
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
   mel_dim: 80
   lowfreq: 0
-  highfreq: 8000
+  highfreq: null
 
 pitch_feature:
   _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer
-  sample_rate: ${..sample_rate}
-  win_length: ${..win_length}
-  hop_length: ${..hop_length}
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
   pitch_fmin: 60
   pitch_fmax: 640
 
 energy_feature:
   _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer
-  spec_featurizer: ${..mel_feature}
+  spec_featurizer: ${mel_feature}
 
 featurizers:
-  pitch: ${..pitch_feature}
-  energy: ${..energy_feature}
+  pitch: ${pitch_feature}
+  energy: ${energy_feature}
diff --git a/examples/tts/conf/feature/feature_44100.yaml b/examples/tts/conf/feature/feature_44100.yaml
index e852a93a2d6c..0cfc27f4dab3 100644
--- a/examples/tts/conf/feature/feature_44100.yaml
+++ b/examples/tts/conf/feature/feature_44100.yaml
@@ -4,25 +4,25 @@ hop_length: 512
 
 mel_feature:
   _target_: nemo.collections.tts.parts.preprocessing.features.MelSpectrogramFeaturizer
-  sample_rate: ${..sample_rate}
-  win_length: ${..win_length}
-  hop_length: ${..hop_length}
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
   mel_dim: 80
   lowfreq: 0
   highfreq: null
 
 pitch_feature:
   _target_: nemo.collections.tts.parts.preprocessing.features.PitchFeaturizer
-  sample_rate: ${..sample_rate}
-  win_length: ${..win_length}
-  hop_length: ${..hop_length}
+  sample_rate: ${sample_rate}
+  win_length: ${win_length}
+  hop_length: ${hop_length}
   pitch_fmin: 60
   pitch_fmax: 640
 
 energy_feature:
   _target_: nemo.collections.tts.parts.preprocessing.features.EnergyFeaturizer
-  spec_featurizer: ${..mel_feature}
+  spec_featurizer: ${mel_feature}
 
 featurizers:
-  pitch: ${..pitch_feature}
-  energy: ${..energy_feature}
+  pitch: ${pitch_feature}
+  energy: ${energy_feature}
diff --git a/examples/tts/conf/hifigan/sample/sample_22050.yaml b/examples/tts/conf/hifigan/sample/sample_22050.yaml
deleted file mode 100644
index 18bc206e2566..000000000000
--- a/examples/tts/conf/hifigan/sample/sample_22050.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-# Audio dataset sampling config for 22.05khz sampling rate
-train_n_samples: 8192
-val_n_samples: 66048
diff --git a/examples/tts/conf/hifigan/sample/sample_44100.yaml b/examples/tts/conf/hifigan/sample/sample_44100.yaml
deleted file mode 100644
index d8315623bbbe..000000000000
--- a/examples/tts/conf/hifigan/sample/sample_44100.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-# Audio dataset sampling config for 44.1khz sampling rate
-train_n_samples: 16384
-val_n_samples: 131072
diff --git a/examples/tts/conf/hifigan/hifigan_data.yaml b/examples/tts/conf/hifigan_dataset/hifigan_22050.yaml
similarity index 71%
rename from examples/tts/conf/hifigan/hifigan_data.yaml
rename to examples/tts/conf/hifigan_dataset/hifigan_22050.yaml
index 62ce3344636e..0b72e810aa9e 100644
--- a/examples/tts/conf/hifigan/hifigan_data.yaml
+++ b/examples/tts/conf/hifigan_dataset/hifigan_22050.yaml
@@ -1,14 +1,9 @@
-# This config contains the default values for training a HiFi-GAN model.
+# This config contains the default values for training a 22.05kHz HiFi-GAN model.
 # If you want to train model on other dataset, you can change config values according to your dataset.
 # Most dataset-specific arguments are in the head of the config file, see below.
 
 name: "HifiGan"
 
-defaults:
-  - feature: ???
-  - sample: ???
-  - model/generator: ???
-
 max_epochs: ???
 batch_size: 16
 weighted_sampling_steps_per_epoch: null
@@ -19,6 +14,19 @@ log_ds_meta: ???
 
 log_dir: ???
 
+mel_dim: 80
+lowfreq: 0
+highfreq: null
+
+# Change these values depending on your sampling rate.
+sample_rate: 22050
+win_length: 1024
+hop_length: 256
+upsample_rates: [8, 8, 2, 2]
+train_n_samples: 8192
+val_min_duration_seconds: 3.0
+val_n_samples: 66048
+
 model:
 
   max_epochs: ${max_epochs}
@@ -27,16 +35,16 @@ model:
 
   preprocessor:
     _target_: nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures
-    nfilt: ${feature.mel_feature.mel_dim}
-    lowfreq: ${feature.mel_feature.lowfreq}
-    highfreq: ${feature.mel_feature.highfreq}
-    n_fft: ${feature.win_length}
-    n_window_size: ${feature.win_length}
-    n_window_stride: ${feature.hop_length}
+    nfilt: ${mel_dim}
+    lowfreq: ${lowfreq}
+    highfreq: ${highfreq}
+    n_fft: ${win_length}
+    n_window_size: ${win_length}
+    n_window_stride: ${hop_length}
     pad_to: 0
     pad_value: 0
     exact_pad: true
-    sample_rate: ${feature.sample_rate}
+    sample_rate: ${sample_rate}
     window: hann
     normalize: null
     preemph: null
@@ -49,12 +57,21 @@ model:
     mel_norm: null
     use_grads: false
 
+  generator:
+    _target_: nemo.collections.tts.modules.hifigan_modules.Generator
+    resblock: 1
+    upsample_rates: ${upsample_rates}
+    upsample_kernel_sizes: [16, 16, 4, 4]
+    upsample_initial_channel: 512
+    resblock_kernel_sizes: [3, 7, 11]
+    resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
+
   train_ds:
     dataset:
       _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
       weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch}
-      sample_rate: ${feature.sample_rate}
-      n_samples: ${sample.train_n_samples}
+      sample_rate: ${sample_rate}
+      n_samples: ${train_n_samples}
       min_duration: 0.4
       max_duration: null
       dataset_meta: ${train_ds_meta}
@@ -66,9 +83,9 @@ model:
   validation_ds:
     dataset:
       _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
-      sample_rate: ${feature.sample_rate}
-      n_samples: ${sample.val_n_samples}
-      min_duration: 3.0
+      sample_rate: ${sample_rate}
+      n_samples: ${val_n_samples}
+      min_duration: ${val_min_duration_seconds}
       max_duration: null
       dataset_meta: ${val_ds_meta}
 
@@ -88,7 +105,7 @@ model:
 
     dataset:
       _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
-      sample_rate: ${feature.sample_rate}
+      sample_rate: ${sample_rate}
       n_samples: null
       min_duration: null
       max_duration: null
diff --git a/examples/tts/conf/hifigan_dataset/hifigan_44100.yaml b/examples/tts/conf/hifigan_dataset/hifigan_44100.yaml
new file mode 100644
index 000000000000..537dc67cec38
--- /dev/null
+++ b/examples/tts/conf/hifigan_dataset/hifigan_44100.yaml
@@ -0,0 +1,151 @@
+# This config contains the default values for training a 44.1kHz HiFi-GAN model.
+# If you want to train model on other dataset, you can change config values according to your dataset.
+# Most dataset-specific arguments are in the head of the config file, see below.
+
+name: "HifiGan"
+
+max_epochs: ???
+batch_size: 16
+weighted_sampling_steps_per_epoch: null
+
+train_ds_meta: ???
+val_ds_meta: ???
+log_ds_meta: ???
+
+log_dir: ???
+
+mel_dim: 80
+lowfreq: 0
+highfreq: null
+
+# Change these values depending on your sampling rate.
+sample_rate: 44100
+win_length: 2048
+hop_length: 512
+upsample_rates: [8, 8, 4, 2]
+train_n_samples: 16384
+val_min_duration_seconds: 3.0
+val_n_samples: 131072
+
+model:
+
+  max_epochs: ${max_epochs}
+  steps_per_epoch: ${weighted_sampling_steps_per_epoch}
+  l1_loss_factor: 60
+
+  preprocessor:
+    _target_: nemo.collections.asr.parts.preprocessing.features.FilterbankFeatures
+    nfilt: ${mel_dim}
+    lowfreq: ${lowfreq}
+    highfreq: ${highfreq}
+    n_fft: ${win_length}
+    n_window_size: ${win_length}
+    n_window_stride: ${hop_length}
+    pad_to: 0
+    pad_value: 0
+    exact_pad: true
+    sample_rate: ${sample_rate}
+    window: hann
+    normalize: null
+    preemph: null
+    dither: 0.0
+    frame_splicing: 1
+    log: true
+    log_zero_guard_type: add
+    log_zero_guard_value: 1.0
+    mag_power: 1.0
+    mel_norm: null
+    use_grads: false
+
+  generator:
+    _target_: nemo.collections.tts.modules.hifigan_modules.Generator
+    resblock: 1
+    upsample_rates: ${upsample_rates}
+    upsample_kernel_sizes: [16, 16, 4, 4]
+    upsample_initial_channel: 512
+    resblock_kernel_sizes: [3, 7, 11]
+    resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]
+
+  train_ds:
+    dataset:
+      _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
+      weighted_sampling_steps_per_epoch: ${weighted_sampling_steps_per_epoch}
+      sample_rate: ${sample_rate}
+      n_samples: ${train_n_samples}
+      min_duration: 0.4
+      max_duration: null
+      dataset_meta: ${train_ds_meta}
+
+    dataloader_params:
+      batch_size: ${batch_size}
+      num_workers: 4
+
+  validation_ds:
+    dataset:
+      _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
+      sample_rate: ${sample_rate}
+      n_samples: ${val_n_samples}
+      min_duration: ${val_min_duration_seconds}
+      max_duration: null
+      dataset_meta: ${val_ds_meta}
+
+    dataloader_params:
+      batch_size: ${batch_size}
+      num_workers: 2
+
+  log_config:
+    log_dir: ${log_dir}
+    log_epochs: [10, 50]
+    epoch_frequency: 100
+    log_tensorboard: false
+    log_wandb: false
+
+    generators:
+      - _target_: nemo.collections.tts.parts.utils.callbacks.VocoderArtifactGenerator
+
+    dataset:
+      _target_: nemo.collections.tts.data.vocoder_dataset.VocoderDataset
+      sample_rate: ${sample_rate}
+      n_samples: null
+      min_duration: null
+      max_duration: null
+      trunc_duration: 15.0
+      dataset_meta: ${log_ds_meta}
+
+    dataloader_params:
+      batch_size: 4
+      num_workers: 2
+
+  optim:
+    _target_: torch.optim.AdamW
+    lr: 2e-4
+    betas: [0.8, 0.99]
+    weight_decay: 1e-6
+    sched:
+      name: ExponentialLR
+      gamma: 0.999
+
+trainer:
+  num_nodes: 1
+  devices: 1
+  accelerator: gpu
+  strategy: ddp
+  precision: 16
+  max_epochs: ${max_epochs}
+  accumulate_grad_batches: 1
+  enable_checkpointing: False  # Provided by exp_manager
+  logger: false # Provided by exp_manager
+  log_every_n_steps: 100
+  check_val_every_n_epoch: 10
+  benchmark: false
+
+exp_manager:
+  exp_dir: null
+  name: ${name}
+  create_tensorboard_logger: true
+  create_checkpoint_callback: true
+  create_wandb_logger: false
+  checkpoint_callback_params:
+    monitor: val_loss
+  resume_if_exists: false
+  resume_ignore_no_checkpoint: false
diff --git a/tutorials/tts/FastPitch_Data_Preparation.ipynb b/tutorials/tts/FastPitch_Data_Preparation.ipynb
index 8a3094c76a56..46778759d5cb 100644
--- a/tutorials/tts/FastPitch_Data_Preparation.ipynb
+++ b/tutorials/tts/FastPitch_Data_Preparation.ipynb
@@ -99,6 +99,7 @@
     {
       "cell_type": "code",
       "source": [
+        "\n",
         "# Download local version of NeMo scripts. If you are running locally and want to use your own local NeMo code,\n",
         "# comment out the below lines and set NEMO_ROOT_DIR to your local path.\n",
         "!git clone -b $BRANCH https://github.com/NVIDIA/NeMo.git $NEMO_ROOT_DIR"
@@ -443,7 +444,7 @@
         "# Directory with raw audio data\n",
         "input_audio_dir = DATA_DIR / \"audio\"\n",
         "# Directory to write preprocessed audio to\n",
-        "output_audio_dir = DATA_DIR / \"audio_44khz\"\n",
+        "output_audio_dir = DATA_DIR / \"audio_preprocessed\"\n",
         "# Whether to overwrite existing audio, if it exists in the output directory\n",
         "overwrite_audio = True\n",
         "# Whether to overwrite output manifest, if it exists\n",
@@ -627,9 +628,18 @@
       "source": [
         "feature_script = NEMO_SCRIPT_DIR / \"compute_features.py\"\n",
         "\n",
-        "feature_config_path = NEMO_CONFIG_DIR / \"feature\" / \"feature_44100.yaml\"\n",
-        "audio_dir = DATA_DIR / \"audio_44khz\"\n",
-        "feature_dir = DATA_DIR / \"features_44khz\"\n",
+        "sample_rate = 44100\n",
+        "\n",
+        "if sample_rate == 22050:\n",
+        "    feature_config_filename = \"feature_22050.yaml\"\n",
+        "elif sample_rate == 44100:\n",
+        "    feature_config_filename = \"feature_44100.yaml\"\n",
+        "else:\n",
+        "    raise ValueError(f\"Unsupported sampling rate {sample_rate}\")\n",
+        "\n",
+        "feature_config_path = NEMO_CONFIG_DIR / \"feature\" / feature_config_filename\n",
+        "audio_dir = DATA_DIR / \"audio_preprocessed\"\n",
+        "feature_dir = DATA_DIR / \"features\"\n",
         "num_workers = 4\n",
         "\n",
         "def compute_features(data_type):\n",
@@ -723,7 +733,7 @@
         "feature_stats_script = NEMO_SCRIPT_DIR / \"compute_feature_stats.py\"\n",
         "\n",
         "train_manifest_filepath = DATA_DIR / \"train_manifest.json\"\n",
-        "output_stats_path = DATA_DIR / \"feature_stats_44khz.json\"\n",
+        "output_stats_path = DATA_DIR / \"feature_stats.json\"\n",
         "\n",
         "args = [\n",
         "    f\"--feature_config_path={feature_config_path}\",\n",
@@ -800,7 +810,7 @@
       "cell_type": "code",
       "source": [
         "dataset_name = \"vctk\"\n",
-        "audio_dir = DATA_DIR / \"audio_44khz\"\n",
+        "audio_dir = DATA_DIR / \"audio_preprocessed\"\n",
         "train_manifest_filepath = DATA_DIR / \"train_manifest.json\"\n",
         "dev_manifest_filepath = DATA_DIR / \"dev_manifest.json\""
       ],
@@ -822,10 +832,14 @@
         "sample_rate = 44100\n",
         "\n",
         "# Config files specifying all HiFi-GAN parameters\n",
-        "hifigan_config_dir = NEMO_CONFIG_DIR / \"hifigan\"\n",
-        "hifigan_config_filename = \"hifigan_data.yaml\"\n",
-        "feature_config = f\"feature_{sample_rate}\"\n",
-        "sample_config = f\"sample_{sample_rate}\"\n",
+        "hifigan_config_dir = NEMO_CONFIG_DIR / \"hifigan_dataset\"\n",
+        "\n",
+        "if sample_rate == 22050:\n",
+        "    hifigan_config_filename = \"hifigan_22050.yaml\"\n",
+        "elif sample_rate == 44100:\n",
+        "    hifigan_config_filename = \"hifigan_44100.yaml\"\n",
+        "else:\n",
+        "    raise ValueError(f\"Unsupported sampling rate {sample_rate}\")\n",
         "\n",
         "# Name of the experiment that will determine where it is saved locally and in TensorBoard and WandB\n",
         "run_id = \"test_run\"\n",
@@ -834,13 +848,6 @@
         "# Directory where predicted audio will be stored periodically throughout training\n",
         "hifigan_log_dir = hifigan_exp_output_dir / \"logs\"\n",
         "\n",
-        "if sample_rate == 22050:\n",
-        "    generator_config = \"v1\"\n",
-        "elif sample_rate == 44100:\n",
-        "    generator_config = \"v1_44100\"\n",
-        "else:\n",
-        "    raise ValueError(f\"Unsupported sampling rate {sample_rate}\")\n",
-        "\n",
         "if torch.cuda.is_available():\n",
         "    accelerator=\"gpu\"\n",
         "    batch_size = 16\n",
@@ -850,11 +857,7 @@
         "\n",
         "args = [\n",
         "    f\"--config-path={hifigan_config_dir}\",\n",
-        "    f\"--config-dir={NEMO_CONFIG_DIR}\",\n",
         "    f\"--config-name={hifigan_config_filename}\",\n",
-        "    f\"feature={feature_config}\",\n",
-        "    f\"sample={sample_config}\",\n",
-        "    f'model/generator={generator_config}',\n",
         "    f\"max_epochs={epochs}\",\n",
         "    f\"weighted_sampling_steps_per_epoch={steps_per_epoch}\",\n",
         "    f\"batch_size={batch_size}\",\n",
@@ -986,8 +989,13 @@
         "\n",
         "# Config files specifying all FastPitch parameters\n",
         "fastpitch_config_dir = NEMO_CONFIG_DIR / \"fastpitch\"\n",
-        "fastpitch_config_filename = \"fastpitch.yaml\"\n",
-        "feature_config = f\"feature_{sample_rate}\"\n",
+        "\n",
+        "if sample_rate == 22050:\n",
+        "    fastpitch_config_filename = \"fastpitch_22050.yaml\"\n",
+        "elif sample_rate == 44100:\n",
+        "    fastpitch_config_filename = \"fastpitch_44100.yaml\"\n",
+        "else:\n",
+        "    raise ValueError(f\"Unsupported sampling rate {sample_rate}\")\n",
         "\n",
         "# Metadata files and directories\n",
         "dataset_file_dir = NEMO_DIR / \"scripts\" / \"tts_dataset_files\"\n",
@@ -995,8 +1003,8 @@
         "heteronyms_path = dataset_file_dir / \"heteronyms-052722\"\n",
         "\n",
         "speaker_path = DATA_DIR / \"speakers.json\"\n",
-        "feature_dir = DATA_DIR / \"features_44khz\"\n",
-        "stats_path = DATA_DIR / \"feature_stats_44khz.json\"\n",
+        "feature_dir = DATA_DIR / \"features\"\n",
+        "stats_path = DATA_DIR / \"feature_stats.json\"\n",
         "\n",
         "def get_latest_checkpoint(checkpoint_dir):\n",
         "    output_path = None\n",
@@ -1031,9 +1039,7 @@
         "\n",
         "args = [\n",
         "    f\"--config-path={fastpitch_config_dir}\",\n",
-        "    f\"--config-dir={NEMO_CONFIG_DIR}\",\n",
         "    f\"--config-name={fastpitch_config_filename}\",\n",
-        "    f\"feature={feature_config}\",\n",
         "    f\"n_speakers={num_speakers}\",\n",
         "    f\"speaker_path={speaker_path}\",\n",
         "    f\"max_epochs={epochs}\",\n",